001package org.hl7.fhir.r5.utils;
002
003import java.util.ArrayList;
004import java.util.List;
005import java.util.StringJoiner;
006
007import org.apache.poi.xssf.model.Comments;
008import org.hl7.fhir.exceptions.FHIRException;
009
010/*
011  Copyright (c) 2011+, HL7, Inc.
012  All rights reserved.
013  
014  Redistribution and use in source and binary forms, with or without modification, 
015  are permitted provided that the following conditions are met:
016    
017   * Redistributions of source code must retain the above copyright notice, this 
018     list of conditions and the following disclaimer.
019   * Redistributions in binary form must reproduce the above copyright notice, 
020     this list of conditions and the following disclaimer in the documentation 
021     and/or other materials provided with the distribution.
022   * Neither the name of HL7 nor the names of its contributors may be used to 
023     endorse or promote products derived from this software without specific 
024     prior written permission.
025  
026  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
027  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
028  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
029  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
030  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
031  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
032  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
033  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
034  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
035  POSSIBILITY OF SUCH DAMAGE.
036  
037 */
038
039
040
041import org.hl7.fhir.r5.model.ExpressionNode;
042import org.hl7.fhir.utilities.CommaSeparatedStringBuilder;
043import org.hl7.fhir.utilities.SourceLocation;
044import org.hl7.fhir.utilities.Utilities;
045
046// shared lexer for concrete syntaxes 
047// - FluentPath
048// - Mapping language
049
050public class FHIRLexer {
051  public class FHIRLexerException extends FHIRException {
052
053    public FHIRLexerException() {
054      super();
055    }
056
057    public FHIRLexerException(String message, Throwable cause) {
058      super(message, cause);
059    }
060
061    public FHIRLexerException(String message) {
062      super(message);
063    }
064
065    public FHIRLexerException(Throwable cause) {
066      super(cause);
067    }
068
069  }
070  private String source;
071  private int cursor;
072  private int currentStart;
073  private String current;
074  private List<String> comments = new ArrayList<>();
075  private SourceLocation currentLocation;
076  private SourceLocation currentStartLocation;
077  private int id;
078  private String name;
079
080  public FHIRLexer(String source, String name) throws FHIRLexerException {
081    this.source = source == null ? "" : source;
082    this.name = name == null ? "??" : name;
083    currentLocation = new SourceLocation(1, 1);
084    next();
085  }
086  public FHIRLexer(String source, int i) throws FHIRLexerException {
087    this.source = source;
088    this.cursor = i;
089    currentLocation = new SourceLocation(1, 1);
090    next();
091  }
092  public String getCurrent() {
093    return current;
094  }
095  public SourceLocation getCurrentLocation() {
096    return currentLocation;
097  }
098
099  public boolean isConstant() {
100    return !Utilities.noString(current) && ((current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
101        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
102        current.equals("true") || current.equals("false") || current.equals("{}"));
103  }
104
105  public boolean isFixedName() {
106    return current != null && (current.charAt(0) == '`');
107  }
108
109  public boolean isStringConstant() {
110    return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`';
111  }
112
113  public String take() throws FHIRLexerException {
114    String s = current;
115    next();
116    return s;
117  }
118
119  public int takeInt() throws FHIRLexerException {
120    String s = current;
121    if (!Utilities.isInteger(s))
122      throw error("Found "+current+" expecting an integer");
123    next();
124    return Integer.parseInt(s);
125  }
126
127  public boolean isToken() {
128    if (Utilities.noString(current))
129      return false;
130
131    if (current.startsWith("$"))
132      return true;
133
134    if (current.equals("*") || current.equals("**"))
135      return true;
136
137    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
138      for (int i = 1; i < current.length(); i++) 
139        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
140            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
141          return false;
142      return true;
143    }
144    return false;
145  }
146
147  public FHIRLexerException error(String msg) {
148    return error(msg, currentLocation.toString());
149  }
150
151  public FHIRLexerException error(String msg, String location) {
152    return new FHIRLexerException("Error @"+location+": "+msg);
153  }
154
155  public void next() throws FHIRLexerException {
156    skipWhitespaceAndComments();
157    current = null;
158    currentStart = cursor;
159    currentStartLocation = currentLocation;
160    if (cursor < source.length()) {
161      char ch = source.charAt(cursor);
162      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
163        cursor++;
164        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
165          cursor++;
166        current = source.substring(currentStart, cursor);
167      } else if (ch == '.' ) {
168        cursor++;
169        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
170          cursor++;
171        current = source.substring(currentStart, cursor);
172      } else if (ch >= '0' && ch <= '9') {
173          cursor++;
174        boolean dotted = false;
175        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
176          if (source.charAt(cursor) == '.')
177            dotted = true;
178          cursor++;
179        }
180        if (source.charAt(cursor-1) == '.')
181          cursor--;
182        current = source.substring(currentStart, cursor);
183      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
184        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
185            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
186          cursor++;
187        current = source.substring(currentStart, cursor);
188      } else if (ch == '%') {
189        cursor++;
190        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
191          cursor++;
192          while (cursor < source.length() && (source.charAt(cursor) != '`'))
193            cursor++;
194          cursor++;
195        } else
196        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
197            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
198          cursor++;
199        current = source.substring(currentStart, cursor);
200      } else if (ch == '/') {
201        cursor++;
202        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
203          // this is en error - should already have been skipped
204          error("This shoudn't happen?");
205        }
206        current = source.substring(currentStart, cursor);
207      } else if (ch == '$') {
208        cursor++;
209        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
210          cursor++;
211        current = source.substring(currentStart, cursor);
212      } else if (ch == '{') {
213        cursor++;
214        ch = source.charAt(cursor);
215        if (ch == '}')
216          cursor++;
217        current = source.substring(currentStart, cursor);
218      } else if (ch == '"') {
219        cursor++;
220        boolean escape = false;
221        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
222          if (escape)
223            escape = false;
224          else 
225            escape = (source.charAt(cursor) == '\\');
226          cursor++;
227        }
228        if (cursor == source.length())
229          throw error("Unterminated string");
230        cursor++;
231        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
232      } else if (ch == '`') {
233        cursor++;
234        boolean escape = false;
235        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
236          if (escape)
237            escape = false;
238          else 
239            escape = (source.charAt(cursor) == '\\');
240          cursor++;
241        }
242        if (cursor == source.length())
243          throw error("Unterminated string");
244        cursor++;
245        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
246      } else if (ch == '\''){
247        cursor++;
248        char ech = ch;
249        boolean escape = false;
250        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
251          if (escape)
252            escape = false;
253          else 
254            escape = (source.charAt(cursor) == '\\');
255          cursor++;
256        }
257        if (cursor == source.length())
258          throw error("Unterminated string");
259        cursor++;
260        current = source.substring(currentStart, cursor);
261        if (ech == '\'')
262          current = "\'"+current.substring(1, current.length() - 1)+"\'";
263      } else if (ch == '`') {
264        cursor++;
265        boolean escape = false;
266        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
267          if (escape)
268            escape = false;
269          else 
270            escape = (source.charAt(cursor) == '\\');
271          cursor++;
272        }
273        if (cursor == source.length())
274          throw error("Unterminated string");
275        cursor++;
276        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
277      } else if (ch == '@'){
278        int start = cursor;
279        cursor++;
280        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
281          cursor++;          
282        current = source.substring(currentStart, cursor);
283      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
284        cursor++;
285        current = source.substring(currentStart, cursor);
286      }
287    }
288  }
289
290  private void skipWhitespaceAndComments() {
291    comments.clear();
292    boolean last13 = false;
293    boolean done = false;
294    while (cursor < source.length() && !done) {
295      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2))) {
296        int start = cursor+2;
297        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 
298          cursor++;        
299        }
300        comments.add(source.substring(start, cursor).trim());
301      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
302        int start = cursor+2;
303        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
304          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
305          cursor++;        
306        }
307        if (cursor >= source.length() -1) {
308          error("Unfinished comment");
309        } else {
310          comments.add(source.substring(start, cursor).trim());
311          cursor = cursor + 2;
312        }
313      } else if (Character.isWhitespace(source.charAt(cursor))) {
314        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
315        cursor++;
316      } else {
317        done = true;
318      }
319    }
320  }
321  
322  private boolean isDateChar(char ch,int start) {
323    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
324    
325    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
326  }
327  public boolean isOp() {
328    return ExpressionNode.Operation.fromCode(current) != null;
329  }
330  public boolean done() {
331    return currentStart >= source.length();
332  }
333  public int nextId() {
334    id++;
335    return id;
336  }
337  public SourceLocation getCurrentStartLocation() {
338    return currentStartLocation;
339  }
340  
341  // special case use
342  public void setCurrent(String current) {
343    this.current = current;
344  }
345
346  public boolean hasComments() {
347    return comments.size() > 0;
348  }
349
350  public List<String> getComments() {
351    return comments;
352  }
353
354  public String getAllComments() {
355    CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n");
356    b.addAll(comments);
357    comments.clear();
358    return b.toString();
359  }
360
361  public String getFirstComment() {
362    if (hasComments()) {
363      String s = comments.get(0);
364      comments.remove(0);
365      return s;      
366    } else {
367      return null;
368    }
369  }
370
371  public boolean hasToken(String kw) {
372    return !done() && kw.equals(current);
373  }
374  public boolean hasToken(String... names) {
375    if (done()) 
376      return false;
377    for (String s : names)
378      if (s.equals(current))
379        return true;
380    return false;
381  }
382  
383  public void token(String kw) throws FHIRLexerException {
384    if (!kw.equals(current)) 
385      throw error("Found \""+current+"\" expecting \""+kw+"\"");
386    next();
387  }
388  
389  public String readConstant(String desc) throws FHIRLexerException {
390    if (!isStringConstant())
391      throw error("Found "+current+" expecting \"["+desc+"]\"");
392
393    return processConstant(take());
394  }
395
396  public String readFixedName(String desc) throws FHIRLexerException {
397    if (!isFixedName())
398      throw error("Found "+current+" expecting \"["+desc+"]\"");
399
400    return processFixedName(take());
401  }
402
403  public String processConstant(String s) throws FHIRLexerException {
404    StringBuilder b = new StringBuilder();
405    int i = 1;
406    while (i < s.length()-1) {
407      char ch = s.charAt(i);
408      if (ch == '\\') {
409        i++;
410        switch (s.charAt(i)) {
411        case 't': 
412          b.append('\t');
413          break;
414        case 'r':
415          b.append('\r');
416          break;
417        case 'n': 
418          b.append('\n');
419          break;
420        case 'f': 
421          b.append('\f');
422          break;
423        case '\'':
424          b.append('\'');
425          break;
426        case '"':
427          b.append('"');
428          break;
429        case '`':
430          b.append('`');
431          break;
432        case '\\': 
433          b.append('\\');
434          break;
435        case '/': 
436          b.append('/');
437          break;
438        case 'u':
439          i++;
440          int uc = Integer.parseInt(s.substring(i, i+4), 16);
441          b.append((char) uc);
442          i = i + 4;
443          break;
444        default:
445          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
446        }
447      } else {
448        b.append(ch);
449        i++;
450      }
451    }
452    return b.toString();
453  }
454  
455  public String processFixedName(String s) throws FHIRLexerException {
456    StringBuilder b = new StringBuilder();
457    int i = 1;
458    while (i < s.length()-1) {
459      char ch = s.charAt(i);
460      if (ch == '\\') {
461        i++;
462        switch (s.charAt(i)) {
463        case 't': 
464          b.append('\t');
465          break;
466        case 'r':
467          b.append('\r');
468          break;
469        case 'n': 
470          b.append('\n');
471          break;
472        case 'f': 
473          b.append('\f');
474          break;
475        case '\'':
476          b.append('\'');
477          break;
478        case '"':
479          b.append('"');
480          break;
481        case '\\': 
482          b.append('\\');
483          break;
484        case '/': 
485          b.append('/');
486          break;
487        case 'u':
488          i++;
489          int uc = Integer.parseInt(s.substring(i, i+4), 16);
490          b.append((char) uc);
491          i = i + 4;
492          break;
493        default:
494          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
495        }
496      } else {
497        b.append(ch);
498        i++;
499      }
500    }
501    return b.toString();
502  }
503
504  public void skipToken(String token) throws FHIRLexerException {
505    if (getCurrent().equals(token))
506      next();
507    
508  }
509  public String takeDottedToken() throws FHIRLexerException {
510    StringBuilder b = new StringBuilder();
511    b.append(take());
512    while (!done() && getCurrent().equals(".")) {
513      b.append(take());
514      b.append(take());
515    }
516    return b.toString();
517  }
518  
519  public int getCurrentStart() {
520    return currentStart;
521  }
522  public String getSource() {
523    return source;
524  }
525
526}