001package org.hl7.fhir.r4.utils;
002
003import org.hl7.fhir.r4.model.ExpressionNode;
004import org.hl7.fhir.r4.model.ExpressionNode.SourceLocation;
005import org.hl7.fhir.exceptions.FHIRException;
006import org.hl7.fhir.utilities.Utilities;
007
008// shared lexer for concrete syntaxes 
009// - FluentPath
010// - Mapping language
011
012public class FHIRLexer {
013  public class FHIRLexerException extends FHIRException {
014
015    public FHIRLexerException() {
016      super();
017    }
018
019    public FHIRLexerException(String message, Throwable cause) {
020      super(message, cause);
021    }
022
023    public FHIRLexerException(String message) {
024      super(message);
025    }
026
027    public FHIRLexerException(Throwable cause) {
028      super(cause);
029    }
030
031  }
032  private String source;
033  private int cursor;
034  private int currentStart;
035  private String current;
036  private SourceLocation currentLocation;
037  private SourceLocation currentStartLocation;
038  private int id;
039
040  public FHIRLexer(String source) throws FHIRLexerException {
041    this.source = source;
042    currentLocation = new SourceLocation(1, 1);
043    next();
044  }
045  public String getCurrent() {
046    return current;
047  }
048  public SourceLocation getCurrentLocation() {
049    return currentLocation;
050  }
051
052  public boolean isConstant(boolean incDoubleQuotes) {
053    return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
054        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
055        current.equals("true") || current.equals("false") || current.equals("{}");
056  }
057
058  public boolean isStringConstant() {
059    return current.charAt(0) == '\'' || current.charAt(0) == '"';
060  }
061
062  public String take() throws FHIRLexerException {
063    String s = current;
064    next();
065    return s;
066  }
067
068  public int takeInt() throws FHIRLexerException {
069    String s = current;
070    if (!Utilities.isInteger(s))
071      throw error("Found "+current+" expecting an integer");
072    next();
073    return Integer.parseInt(s);
074  }
075
076  public boolean isToken() {
077    if (Utilities.noString(current))
078      return false;
079
080    if (current.startsWith("$"))
081      return true;
082
083    if (current.equals("*") || current.equals("**"))
084      return true;
085
086    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
087      for (int i = 1; i < current.length(); i++) 
088        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
089            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
090          return false;
091      return true;
092    }
093    return false;
094  }
095
096  public FHIRLexerException error(String msg) {
097    return error(msg, currentLocation.toString());
098  }
099
100  public FHIRLexerException error(String msg, String location) {
101    return new FHIRLexerException("Error at "+location+": "+msg);
102  }
103
104  public void next() throws FHIRLexerException {
105    current = null;
106    boolean last13 = false;
107    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) {
108      if (source.charAt(cursor) == '\r') {
109        currentLocation.setLine(currentLocation.getLine() + 1);
110        currentLocation.setColumn(1);
111        last13 = true;
112      } else if (!last13 && (source.charAt(cursor) == '\n')) {
113        currentLocation.setLine(currentLocation.getLine() + 1);
114        currentLocation.setColumn(1);
115        last13 = false;
116      } else {
117        last13 = false;
118        currentLocation.setColumn(currentLocation.getColumn() + 1);
119      }
120      cursor++;
121    }
122    currentStart = cursor;
123    currentStartLocation = currentLocation;
124    if (cursor < source.length()) {
125      char ch = source.charAt(cursor);
126      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
127        cursor++;
128        if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')) 
129          cursor++;
130        current = source.substring(currentStart, cursor);
131      } else if (ch == '.' ) {
132        cursor++;
133        if (cursor < source.length() && (source.charAt(cursor) == '.')) 
134          cursor++;
135        current = source.substring(currentStart, cursor);
136      } else if (ch >= '0' && ch <= '9') {
137          cursor++;
138        boolean dotted = false;
139        while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) {
140          if (source.charAt(cursor) == '.')
141            dotted = true;
142          cursor++;
143        }
144        if (source.charAt(cursor-1) == '.')
145          cursor--;
146        current = source.substring(currentStart, cursor);
147      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
148        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
149            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 
150          cursor++;
151        current = source.substring(currentStart, cursor);
152      } else if (ch == '%') {
153        cursor++;
154        if (cursor < source.length() && (source.charAt(cursor) == '"')) {
155          cursor++;
156          while (cursor < source.length() && (source.charAt(cursor) != '"'))
157            cursor++;
158          cursor++;
159        } else
160        while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 
161            (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-'))
162          cursor++;
163        current = source.substring(currentStart, cursor);
164      } else if (ch == '/') {
165        cursor++;
166        if (cursor < source.length() && (source.charAt(cursor) == '/')) {
167          cursor++;
168          while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 
169            cursor++;
170        }
171        current = source.substring(currentStart, cursor);
172      } else if (ch == '$') {
173        cursor++;
174        while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z'))
175          cursor++;
176        current = source.substring(currentStart, cursor);
177      } else if (ch == '{') {
178        cursor++;
179        ch = source.charAt(cursor);
180        if (ch == '}')
181          cursor++;
182        current = source.substring(currentStart, cursor);
183      } else if (ch == '"'){
184        cursor++;
185        boolean escape = false;
186        while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) {
187          if (escape)
188            escape = false;
189          else 
190            escape = (source.charAt(cursor) == '\\');
191          cursor++;
192        }
193        if (cursor == source.length())
194          throw error("Unterminated string");
195        cursor++;
196        current = "\""+source.substring(currentStart+1, cursor-1)+"\"";
197      } else if (ch == '\''){
198        cursor++;
199        char ech = ch;
200        boolean escape = false;
201        while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) {
202          if (escape)
203            escape = false;
204          else 
205            escape = (source.charAt(cursor) == '\\');
206          cursor++;
207        }
208        if (cursor == source.length())
209          throw error("Unterminated string");
210        cursor++;
211        current = source.substring(currentStart, cursor);
212        if (ech == '\'')
213          current = "\'"+current.substring(1, current.length() - 1)+"\'";
214      } else if (ch == '@'){
215        int start = cursor;
216        cursor++;
217        while (cursor < source.length() && isDateChar(source.charAt(cursor), start))
218          cursor++;          
219        current = source.substring(currentStart, cursor);
220      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
221        cursor++;
222        current = source.substring(currentStart, cursor);
223      }
224    }
225  }
226
227
228  private boolean isDateChar(char ch,int start) {
229    int eot = source.charAt(start+1) == 'T' ? 10 : 20;
230    
231    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1)));
232  }
233  public boolean isOp() {
234    return ExpressionNode.Operation.fromCode(current) != null;
235  }
236  public boolean done() {
237    return currentStart >= source.length();
238  }
239  public int nextId() {
240    id++;
241    return id;
242  }
243  public SourceLocation getCurrentStartLocation() {
244    return currentStartLocation;
245  }
246  
247  // special case use
248  public void setCurrent(String current) {
249    this.current = current;
250  }
251
252  public boolean hasComment() {
253    return !done() && current.startsWith("//");
254  }
255  public boolean hasToken(String kw) {
256    return !done() && kw.equals(current);
257  }
258  public boolean hasToken(String... names) {
259    if (done()) 
260      return false;
261    for (String s : names)
262      if (s.equals(current))
263        return true;
264    return false;
265  }
266  
267  public void token(String kw) throws FHIRLexerException {
268    if (!kw.equals(current)) 
269      throw error("Found \""+current+"\" expecting \""+kw+"\"");
270    next();
271  }
272  
273  public String readConstant(String desc) throws FHIRLexerException {
274    if (!isStringConstant())
275      throw error("Found "+current+" expecting \"["+desc+"]\"");
276
277    return processConstant(take());
278  }
279
280  public String processConstant(String s) throws FHIRLexerException {
281    StringBuilder b = new StringBuilder();
282    int i = 1;
283    while (i < s.length()-1) {
284      char ch = s.charAt(i);
285      if (ch == '\\') {
286        i++;
287        switch (s.charAt(i)) {
288        case 't': 
289          b.append('\t');
290          break;
291        case 'r':
292          b.append('\r');
293          break;
294        case 'n': 
295          b.append('\n');
296          break;
297        case 'f': 
298          b.append('\f');
299          break;
300        case '\'':
301          b.append('\'');
302          break;
303        case '"':
304          b.append('"');
305          break;
306        case '\\': 
307          b.append('\\');
308          break;
309        case '/': 
310          b.append('\\');
311          break;
312        case 'u':
313          i++;
314          int uc = Integer.parseInt(s.substring(i, i+4), 16);
315          b.append((char) uc);
316          i = i + 4;
317          break;
318        default:
319          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
320        }
321      } else {
322        b.append(ch);
323        i++;
324      }
325    }
326    return b.toString();
327
328  }
329  public void skipToken(String token) throws FHIRLexerException {
330    if (getCurrent().equals(token))
331      next();
332    
333  }
334  public String takeDottedToken() throws FHIRLexerException {
335    StringBuilder b = new StringBuilder();
336    b.append(take());
337    while (!done() && getCurrent().equals(".")) {
338      b.append(take());
339      b.append(take());
340    }
341    return b.toString();
342  }
343  
344  void skipComments() throws FHIRLexerException {
345    while (!done() && hasComment())
346      next();
347  }
348
349}