001package org.hl7.fhir.r4.utils;
002
003import org.hl7.fhir.exceptions.FHIRException;
004
005/*
006  Copyright (c) 2011+, HL7, Inc.
007  All rights reserved.
008  
009  Redistribution and use in source and binary forms, with or without modification, 
010  are permitted provided that the following conditions are met:
011    
012   * Redistributions of source code must retain the above copyright notice, this 
013     list of conditions and the following disclaimer.
014   * Redistributions in binary form must reproduce the above copyright notice, 
015     this list of conditions and the following disclaimer in the documentation 
016     and/or other materials provided with the distribution.
017   * Neither the name of HL7 nor the names of its contributors may be used to 
018     endorse or promote products derived from this software without specific 
019     prior written permission.
020  
021  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
022  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
023  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
024  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
025  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
026  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
027  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
028  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
029  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
030  POSSIBILITY OF SUCH DAMAGE.
031  
032 */
033
034
035
036import org.hl7.fhir.r4.model.ExpressionNode;
037import org.hl7.fhir.utilities.SourceLocation;
038import org.hl7.fhir.utilities.Utilities;
039
040// shared lexer for concrete syntaxes 
041// - FluentPath
042// - Mapping language
043
044public class FHIRLexer {
045  public class FHIRLexerException extends FHIRException {
046
047    public FHIRLexerException() {
048      super();
049    }
050
051    public FHIRLexerException(String message, Throwable cause) {
052      super(message, cause);
053    }
054
055    public FHIRLexerException(String message) {
056      super(message);
057    }
058
059    public FHIRLexerException(Throwable cause) {
060      super(cause);
061    }
062
063  }
064  private String source;
065  private int cursor;
066  private int currentStart;
067  private String current;
068  private SourceLocation currentLocation;
069  private SourceLocation currentStartLocation;
070  private int id;
071  private String name;
072
073  public FHIRLexer(String source, String name) throws FHIRLexerException {
074    this.source = source;
075    this.name = name == null ? "??" : name;
076    currentLocation = new SourceLocation(1, 1);
077    next();
078  }
079  public FHIRLexer(String source, int i) throws FHIRLexerException {
080    this.source = source;
081    this.cursor = i;
082    currentLocation = new SourceLocation(1, 1);
083    next();
084  }
085  public String getCurrent() {
086    return current;
087  }
088  public SourceLocation getCurrentLocation() {
089    return currentLocation;
090  }
091
092  public boolean isConstant() {
093    return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
094        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
095        current.equals("true") || current.equals("false") || current.equals("{}");
096  }
097
098  public boolean isFixedName() {
099    return current != null && (current.charAt(0) == '`');
100  }
101
102  public boolean isStringConstant() {
103    return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`';
104  }
105
106  public String take() throws FHIRLexerException {
107    String s = current;
108    next();
109    return s;
110  }
111
112  public int takeInt() throws FHIRLexerException {
113    String s = current;
114    if (!Utilities.isInteger(s))
115      throw error("Found "+current+" expecting an integer");
116    next();
117    return Integer.parseInt(s);
118  }
119
120  public boolean isToken() {
121    if (Utilities.noString(current))
122      return false;
123
124    if (current.startsWith("$"))
125      return true;
126
127    if (current.equals("*") || current.equals("**"))
128      return true;
129
130    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
131      for (int i = 1; i < current.length(); i++) 
132        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
133            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
134          return false;
135      return true;
136    }
137    return false;
138  }
139
140  public FHIRLexerException error(String msg) {
141    return error(msg, currentLocation.toString());
142  }
143
144  public FHIRLexerException error(String msg, String location) {
145    return new FHIRLexerException("Error in "+name+" at "+location+": "+msg);
146  }
147
148  public void next() throws FHIRLexerException {
149    skipWhitespaceAndComments();
150    current = null;
151    currentStart = cursor;
152    currentStartLocation = currentLocation;
153    if (cursor < source.length()) {
154      char ch = source.charAt(cursor);
155      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
156        cursor++;
157        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
158          cursor++;
159        current = source.substring(currentStart, cursor);
160      } else if (ch == '.' ) {
161        cursor++;
162        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
163          cursor++;
164        current = source.substring(currentStart, cursor);
165      } else if (ch >= '0' && ch <= '9') {
166          cursor++;
167        boolean dotted = false;
168        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
169          if (source.charAt(cursor) == '.')
170            dotted = true;
171          cursor++;
172        }
173        if (source.charAt(cursor-1) == '.')
174          cursor--;
175        current = source.substring(currentStart, cursor);
176      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
177        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
178            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
179          cursor++;
180        current = source.substring(currentStart, cursor);
181      } else if (ch == '%') {
182        cursor++;
183        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
184          cursor++;
185          while (cursor < source.length() && (source.charAt(cursor) != '`'))
186            cursor++;
187          cursor++;
188        } else
189        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
190            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
191          cursor++;
192        current = source.substring(currentStart, cursor);
193      } else if (ch == '/') {
194        cursor++;
195        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
196          // this is en error - should already have been skipped
197          error("This shoudn't happen?");
198        }
199        current = source.substring(currentStart, cursor);
200      } else if (ch == '$') {
201        cursor++;
202        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
203          cursor++;
204        current = source.substring(currentStart, cursor);
205      } else if (ch == '{') {
206        cursor++;
207        ch = source.charAt(cursor);
208        if (ch == '}')
209          cursor++;
210        current = source.substring(currentStart, cursor);
211      } else if (ch == '"') {
212        cursor++;
213        boolean escape = false;
214        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
215          if (escape)
216            escape = false;
217          else 
218            escape = (source.charAt(cursor) == '\\');
219          cursor++;
220        }
221        if (cursor == source.length())
222          throw error("Unterminated string");
223        cursor++;
224        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
225      } else if (ch == '`') {
226        cursor++;
227        boolean escape = false;
228        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
229          if (escape)
230            escape = false;
231          else 
232            escape = (source.charAt(cursor) == '\\');
233          cursor++;
234        }
235        if (cursor == source.length())
236          throw error("Unterminated string");
237        cursor++;
238        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
239      } else if (ch == '\''){
240        cursor++;
241        char ech = ch;
242        boolean escape = false;
243        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
244          if (escape)
245            escape = false;
246          else 
247            escape = (source.charAt(cursor) == '\\');
248          cursor++;
249        }
250        if (cursor == source.length())
251          throw error("Unterminated string");
252        cursor++;
253        current = source.substring(currentStart, cursor);
254        if (ech == '\'')
255          current = "\'"+current.substring(1, current.length() - 1)+"\'";
256      } else if (ch == '`') {
257        cursor++;
258        boolean escape = false;
259        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
260          if (escape)
261            escape = false;
262          else 
263            escape = (source.charAt(cursor) == '\\');
264          cursor++;
265        }
266        if (cursor == source.length())
267          throw error("Unterminated string");
268        cursor++;
269        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
270      } else if (ch == '@'){
271        int start = cursor;
272        cursor++;
273        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
274          cursor++;          
275        current = source.substring(currentStart, cursor);
276      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
277        cursor++;
278        current = source.substring(currentStart, cursor);
279      }
280    }
281  }
282
283
284  private void skipWhitespaceAndComments() {
285    boolean last13 = false;
286    boolean done = false;
287    while (cursor < source.length() && !done) {
288      if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2))) {
289        while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 
290          cursor++;        
291      } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) {
292        while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 
293          last13 = currentLocation.checkChar(source.charAt(cursor), last13);
294          cursor++;        
295        }
296        if (cursor >= source.length() -1) {
297          error("Unfinished comment");
298        } else {
299          cursor = cursor + 2;
300        }
301      } else if (Character.isWhitespace(source.charAt(cursor))) {
302        last13 = currentLocation.checkChar(source.charAt(cursor), last13);
303        cursor++;
304      } else {
305        done = true;
306      }
307    }
308  }
309
310  
311  private boolean isDateChar(char ch,int start) {
312    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
313    
314    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
315  }
316  public boolean isOp() {
317    return ExpressionNode.Operation.fromCode(current) != null;
318  }
319  public boolean done() {
320    return currentStart >= source.length();
321  }
322  public int nextId() {
323    id++;
324    return id;
325  }
326  public SourceLocation getCurrentStartLocation() {
327    return currentStartLocation;
328  }
329  
330  // special case use
331  public void setCurrent(String current) {
332    this.current = current;
333  }
334
335  public boolean hasComment() {
336    return !done() && current.startsWith("//");
337  }
338  public boolean hasToken(String kw) {
339    return !done() && kw.equals(current);
340  }
341  public boolean hasToken(String... names) {
342    if (done()) 
343      return false;
344    for (String s : names)
345      if (s.equals(current))
346        return true;
347    return false;
348  }
349  
350  public void token(String kw) throws FHIRLexerException {
351    if (!kw.equals(current)) 
352      throw error("Found \""+current+"\" expecting \""+kw+"\"");
353    next();
354  }
355  
356  public String readConstant(String desc) throws FHIRLexerException {
357    if (!isStringConstant())
358      throw error("Found "+current+" expecting \"["+desc+"]\"");
359
360    return processConstant(take());
361  }
362
363  public String readFixedName(String desc) throws FHIRLexerException {
364    if (!isFixedName())
365      throw error("Found "+current+" expecting \"["+desc+"]\"");
366
367    return processFixedName(take());
368  }
369
370  public String processConstant(String s) throws FHIRLexerException {
371    StringBuilder b = new StringBuilder();
372    int i = 1;
373    while (i < s.length()-1) {
374      char ch = s.charAt(i);
375      if (ch == '\\') {
376        i++;
377        switch (s.charAt(i)) {
378        case 't': 
379          b.append('\t');
380          break;
381        case 'r':
382          b.append('\r');
383          break;
384        case 'n': 
385          b.append('\n');
386          break;
387        case 'f': 
388          b.append('\f');
389          break;
390        case '\'':
391          b.append('\'');
392          break;
393        case '"':
394          b.append('"');
395          break;
396        case '`':
397          b.append('`');
398          break;
399        case '\\': 
400          b.append('\\');
401          break;
402        case '/': 
403          b.append('/');
404          break;
405        case 'u':
406          i++;
407          int uc = Integer.parseInt(s.substring(i, i+4), 16);
408          b.append((char) uc);
409          i = i + 4;
410          break;
411        default:
412          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
413        }
414      } else {
415        b.append(ch);
416        i++;
417      }
418    }
419    return b.toString();
420  }
421  
422  public String processFixedName(String s) throws FHIRLexerException {
423    StringBuilder b = new StringBuilder();
424    int i = 1;
425    while (i < s.length()-1) {
426      char ch = s.charAt(i);
427      if (ch == '\\') {
428        i++;
429        switch (s.charAt(i)) {
430        case 't': 
431          b.append('\t');
432          break;
433        case 'r':
434          b.append('\r');
435          break;
436        case 'n': 
437          b.append('\n');
438          break;
439        case 'f': 
440          b.append('\f');
441          break;
442        case '\'':
443          b.append('\'');
444          break;
445        case '"':
446          b.append('"');
447          break;
448        case '\\': 
449          b.append('\\');
450          break;
451        case '/': 
452          b.append('/');
453          break;
454        case 'u':
455          i++;
456          int uc = Integer.parseInt(s.substring(i, i+4), 16);
457          b.append((char) uc);
458          i = i + 4;
459          break;
460        default:
461          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
462        }
463      } else {
464        b.append(ch);
465        i++;
466      }
467    }
468    return b.toString();
469  }
470
471  public void skipToken(String token) throws FHIRLexerException {
472    if (getCurrent().equals(token))
473      next();
474    
475  }
476  public String takeDottedToken() throws FHIRLexerException {
477    StringBuilder b = new StringBuilder();
478    b.append(take());
479    while (!done() && getCurrent().equals(".")) {
480      b.append(take());
481      b.append(take());
482    }
483    return b.toString();
484  }
485  
486  void skipComments() throws FHIRLexerException {
487    while (!done() && hasComment())
488      next();
489  }
490  public int getCurrentStart() {
491    return currentStart;
492  }
493
494}