001package org.hl7.fhir.r4.utils;
002
003import org.hl7.fhir.exceptions.FHIRException;
004
005/*
006  Copyright (c) 2011+, HL7, Inc.
007  All rights reserved.
008  
009  Redistribution and use in source and binary forms, with or without modification, 
010  are permitted provided that the following conditions are met:
011    
012   * Redistributions of source code must retain the above copyright notice, this 
013     list of conditions and the following disclaimer.
014   * Redistributions in binary form must reproduce the above copyright notice, 
015     this list of conditions and the following disclaimer in the documentation 
016     and/or other materials provided with the distribution.
017   * Neither the name of HL7 nor the names of its contributors may be used to 
018     endorse or promote products derived from this software without specific 
019     prior written permission.
020  
021  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
022  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
023  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
024  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
025  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
026  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
027  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
028  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
029  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
030  POSSIBILITY OF SUCH DAMAGE.
031  
032 */
033
034
035
036import org.hl7.fhir.r4.model.ExpressionNode;
037import org.hl7.fhir.r4.model.ExpressionNode.SourceLocation;
038import org.hl7.fhir.utilities.Utilities;
039
040// shared lexer for concrete syntaxes 
041// - FluentPath
042// - Mapping language
043
044public class FHIRLexer {
045  public class FHIRLexerException extends FHIRException {
046
047    public FHIRLexerException() {
048      super();
049    }
050
051    public FHIRLexerException(String message, Throwable cause) {
052      super(message, cause);
053    }
054
055    public FHIRLexerException(String message) {
056      super(message);
057    }
058
059    public FHIRLexerException(Throwable cause) {
060      super(cause);
061    }
062
063  }
064  private String source;
065  private int cursor;
066  private int currentStart;
067  private String current;
068  private SourceLocation currentLocation;
069  private SourceLocation currentStartLocation;
070  private int id;
071  private String name;
072
073  public FHIRLexer(String source, String name) throws FHIRLexerException {
074    this.source = source;
075    this.name = name == null ? "??" : name;
076    currentLocation = new SourceLocation(1, 1);
077    next();
078  }
079  public FHIRLexer(String source, int i) throws FHIRLexerException {
080    this.source = source;
081    this.cursor = i;
082    currentLocation = new SourceLocation(1, 1);
083    next();
084  }
085  public String getCurrent() {
086    return current;
087  }
088  public SourceLocation getCurrentLocation() {
089    return currentLocation;
090  }
091
092  public boolean isConstant() {
093    return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
094        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
095        current.equals("true") || current.equals("false") || current.equals("{}");
096  }
097
098  public boolean isFixedName() {
099    return current != null && (current.charAt(0) == '`');
100  }
101
102  public boolean isStringConstant() {
103    return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`';
104  }
105
106  public String take() throws FHIRLexerException {
107    String s = current;
108    next();
109    return s;
110  }
111
112  public int takeInt() throws FHIRLexerException {
113    String s = current;
114    if (!Utilities.isInteger(s))
115      throw error("Found "+current+" expecting an integer");
116    next();
117    return Integer.parseInt(s);
118  }
119
120  public boolean isToken() {
121    if (Utilities.noString(current))
122      return false;
123
124    if (current.startsWith("$"))
125      return true;
126
127    if (current.equals("*") || current.equals("**"))
128      return true;
129
130    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
131      for (int i = 1; i < current.length(); i++) 
132        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
133            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
134          return false;
135      return true;
136    }
137    return false;
138  }
139
140  public FHIRLexerException error(String msg) {
141    return error(msg, currentLocation.toString());
142  }
143
144  public FHIRLexerException error(String msg, String location) {
145    return new FHIRLexerException("Error in "+name+" at "+location+": "+msg);
146  }
147
148  public void next() throws FHIRLexerException {
149    current = null;
150    boolean last13 = false;
151    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) {
152      if (source.charAt(cursor) == '\r') {
153        currentLocation.setLine(currentLocation.getLine() + 1);
154        currentLocation.setColumn(1);
155        last13 = true;
156      } else if (!last13 && (source.charAt(cursor) == '\n')) {
157        currentLocation.setLine(currentLocation.getLine() + 1);
158        currentLocation.setColumn(1);
159        last13 = false;
160      } else {
161        last13 = false;
162        currentLocation.setColumn(currentLocation.getColumn() + 1);
163      }
164      cursor++;
165    }
166    currentStart = cursor;
167    currentStartLocation = currentLocation;
168    if (cursor < source.length()) {
169      char ch = source.charAt(cursor);
170      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
171        cursor++;
172        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 
173          cursor++;
174        current = source.substring(currentStart, cursor);
175      } else if (ch == '.' ) {
176        cursor++;
177        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
178          cursor++;
179        current = source.substring(currentStart, cursor);
180      } else if (ch >= '0' && ch <= '9') {
181          cursor++;
182        boolean dotted = false;
183        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
184          if (source.charAt(cursor) == '.')
185            dotted = true;
186          cursor++;
187        }
188        if (source.charAt(cursor-1) == '.')
189          cursor--;
190        current = source.substring(currentStart, cursor);
191      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
192        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
193            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
194          cursor++;
195        current = source.substring(currentStart, cursor);
196      } else if (ch == '%') {
197        cursor++;
198        if (cursor < source.length() && (source.charAt(cursor) == '`')) {
199          cursor++;
200          while (cursor < source.length() && (source.charAt(cursor) != '`'))
201            cursor++;
202          cursor++;
203        } else
204        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
205            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
206          cursor++;
207        current = source.substring(currentStart, cursor);
208      } else if (ch == '/') {
209        cursor++;
210        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
211          cursor++;
212          while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 
213            cursor++;
214        }
215        current = source.substring(currentStart, cursor);
216      } else if (ch == '$') {
217        cursor++;
218        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
219          cursor++;
220        current = source.substring(currentStart, cursor);
221      } else if (ch == '{') {
222        cursor++;
223        ch = source.charAt(cursor);
224        if (ch == '}')
225          cursor++;
226        current = source.substring(currentStart, cursor);
227      } else if (ch == '"') {
228        cursor++;
229        boolean escape = false;
230        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
231          if (escape)
232            escape = false;
233          else 
234            escape = (source.charAt(cursor) == '\\');
235          cursor++;
236        }
237        if (cursor == source.length())
238          throw error("Unterminated string");
239        cursor++;
240        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
241      } else if (ch == '`') {
242        cursor++;
243        boolean escape = false;
244        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
245          if (escape)
246            escape = false;
247          else 
248            escape = (source.charAt(cursor) == '\\');
249          cursor++;
250        }
251        if (cursor == source.length())
252          throw error("Unterminated string");
253        cursor++;
254        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
255      } else if (ch == '\''){
256        cursor++;
257        char ech = ch;
258        boolean escape = false;
259        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
260          if (escape)
261            escape = false;
262          else 
263            escape = (source.charAt(cursor) == '\\');
264          cursor++;
265        }
266        if (cursor == source.length())
267          throw error("Unterminated string");
268        cursor++;
269        current = source.substring(currentStart, cursor);
270        if (ech == '\'')
271          current = "\'"+current.substring(1, current.length() - 1)+"\'";
272      } else if (ch == '`') {
273        cursor++;
274        boolean escape = false;
275        while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) {
276          if (escape)
277            escape = false;
278          else 
279            escape = (source.charAt(cursor) == '\\');
280          cursor++;
281        }
282        if (cursor == source.length())
283          throw error("Unterminated string");
284        cursor++;
285        current = "`"+source.substring(currentStart+1, cursor-1)+"`";
286      } else if (ch == '@'){
287        int start = cursor;
288        cursor++;
289        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
290          cursor++;          
291        current = source.substring(currentStart, cursor);
292      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
293        cursor++;
294        current = source.substring(currentStart, cursor);
295      }
296    }
297  }
298
299
300  private boolean isDateChar(char ch,int start) {
301    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
302    
303    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
304  }
305  public boolean isOp() {
306    return ExpressionNode.Operation.fromCode(current) != null;
307  }
308  public boolean done() {
309    return currentStart >= source.length();
310  }
311  public int nextId() {
312    id++;
313    return id;
314  }
315  public SourceLocation getCurrentStartLocation() {
316    return currentStartLocation;
317  }
318  
319  // special case use
320  public void setCurrent(String current) {
321    this.current = current;
322  }
323
324  public boolean hasComment() {
325    return !done() && current.startsWith("//");
326  }
327  public boolean hasToken(String kw) {
328    return !done() && kw.equals(current);
329  }
330  public boolean hasToken(String... names) {
331    if (done()) 
332      return false;
333    for (String s : names)
334      if (s.equals(current))
335        return true;
336    return false;
337  }
338  
339  public void token(String kw) throws FHIRLexerException {
340    if (!kw.equals(current)) 
341      throw error("Found \""+current+"\" expecting \""+kw+"\"");
342    next();
343  }
344  
345  public String readConstant(String desc) throws FHIRLexerException {
346    if (!isStringConstant())
347      throw error("Found "+current+" expecting \"["+desc+"]\"");
348
349    return processConstant(take());
350  }
351
352  public String readFixedName(String desc) throws FHIRLexerException {
353    if (!isFixedName())
354      throw error("Found "+current+" expecting \"["+desc+"]\"");
355
356    return processFixedName(take());
357  }
358
359  public String processConstant(String s) throws FHIRLexerException {
360    StringBuilder b = new StringBuilder();
361    int i = 1;
362    while (i < s.length()-1) {
363      char ch = s.charAt(i);
364      if (ch == '\\') {
365        i++;
366        switch (s.charAt(i)) {
367        case 't': 
368          b.append('\t');
369          break;
370        case 'r':
371          b.append('\r');
372          break;
373        case 'n': 
374          b.append('\n');
375          break;
376        case 'f': 
377          b.append('\f');
378          break;
379        case '\'':
380          b.append('\'');
381          break;
382        case '"':
383          b.append('"');
384          break;
385        case '`':
386          b.append('`');
387          break;
388        case '\\': 
389          b.append('\\');
390          break;
391        case '/': 
392          b.append('/');
393          break;
394        case 'u':
395          i++;
396          int uc = Integer.parseInt(s.substring(i, i+4), 16);
397          b.append((char) uc);
398          i = i + 4;
399          break;
400        default:
401          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
402        }
403      } else {
404        b.append(ch);
405        i++;
406      }
407    }
408    return b.toString();
409  }
410  
411  public String processFixedName(String s) throws FHIRLexerException {
412    StringBuilder b = new StringBuilder();
413    int i = 1;
414    while (i < s.length()-1) {
415      char ch = s.charAt(i);
416      if (ch == '\\') {
417        i++;
418        switch (s.charAt(i)) {
419        case 't': 
420          b.append('\t');
421          break;
422        case 'r':
423          b.append('\r');
424          break;
425        case 'n': 
426          b.append('\n');
427          break;
428        case 'f': 
429          b.append('\f');
430          break;
431        case '\'':
432          b.append('\'');
433          break;
434        case '"':
435          b.append('"');
436          break;
437        case '\\': 
438          b.append('\\');
439          break;
440        case '/': 
441          b.append('/');
442          break;
443        case 'u':
444          i++;
445          int uc = Integer.parseInt(s.substring(i, i+4), 16);
446          b.append((char) uc);
447          i = i + 4;
448          break;
449        default:
450          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
451        }
452      } else {
453        b.append(ch);
454        i++;
455      }
456    }
457    return b.toString();
458  }
459
460  public void skipToken(String token) throws FHIRLexerException {
461    if (getCurrent().equals(token))
462      next();
463    
464  }
465  public String takeDottedToken() throws FHIRLexerException {
466    StringBuilder b = new StringBuilder();
467    b.append(take());
468    while (!done() && getCurrent().equals(".")) {
469      b.append(take());
470      b.append(take());
471    }
472    return b.toString();
473  }
474  
475  void skipComments() throws FHIRLexerException {
476    while (!done() && hasComment())
477      next();
478  }
479  public int getCurrentStart() {
480    return currentStart;
481  }
482
483}