001package org.hl7.fhir.dstu2016may.utils;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import org.hl7.fhir.dstu2016may.model.ExpressionNode;
035import org.hl7.fhir.dstu2016may.model.ExpressionNode.SourceLocation;
036import org.hl7.fhir.exceptions.FHIRException;
037import org.hl7.fhir.utilities.Utilities;
038
039// shared lexer for concrete syntaxes 
040// - FluentPath
041// - Mapping language
042
043public class FHIRLexer {
044  public class FHIRLexerException extends FHIRException {
045
046    public FHIRLexerException() {
047      super();
048    }
049
050    public FHIRLexerException(String message, Throwable cause) {
051      super(message, cause);
052    }
053
054    public FHIRLexerException(String message) {
055      super(message);
056    }
057
058    public FHIRLexerException(Throwable cause) {
059      super(cause);
060    }
061
062  }
063  private String path;
064  private int cursor;
065  private int currentStart;
066  private String current;
067  private SourceLocation currentLocation;
068  private SourceLocation currentStartLocation;
069  private int id;
070
071  public FHIRLexer(String source) throws FHIRLexerException {
072    this.path = source;
073    currentLocation = new SourceLocation(1, 1);
074    next();
075  }
076  public String getCurrent() {
077    return current;
078  }
079  public SourceLocation getCurrentLocation() {
080    return currentLocation;
081  }
082
083  public boolean isConstant(boolean incDoubleQuotes) {
084    return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || current.charAt(0) == '-' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false") || current.equals("{}");
085  }
086
087  public boolean isStringConstant() {
088    return current.charAt(0) == '\'' || current.charAt(0) == '"';
089  }
090
091  public String take() throws FHIRLexerException {
092    String s = current;
093    next();
094    return s;
095  }
096
097  public boolean isToken() {
098    if (Utilities.noString(current))
099      return false;
100
101    if (current.startsWith("$"))
102      return true;
103
104    if (current.equals("*") || current.equals("**"))
105      return true;
106
107    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
108      for (int i = 1; i < current.length(); i++) 
109        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
110            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
111          return false;
112      return true;
113    }
114    return false;
115  }
116
117  public FHIRLexerException error(String msg) {
118    return error(msg, currentLocation.toString());
119  }
120
121  public FHIRLexerException error(String msg, String location) {
122    return new FHIRLexerException("Error in "+path+" at "+location+": "+msg);
123  }
124
125  public void next() throws FHIRLexerException {
126    current = null;
127    boolean last13 = false;
128    while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) {
129      if (path.charAt(cursor) == '\r') {
130        currentLocation.setLine(currentLocation.getLine() + 1);
131        currentLocation.setColumn(1);
132        last13 = true;
133      } else if (!last13 && (path.charAt(cursor) == '\n')) {
134        currentLocation.setLine(currentLocation.getLine() + 1);
135        currentLocation.setColumn(1);
136        last13 = false;
137      } else {
138        last13 = false;
139        currentLocation.setColumn(currentLocation.getColumn() + 1);
140      }
141      cursor++;
142    }
143    currentStart = cursor;
144    currentStartLocation = currentLocation;
145    if (cursor < path.length()) {
146      char ch = path.charAt(cursor);
147      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
148        cursor++;
149        if (cursor < path.length() && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-')) 
150          cursor++;
151        current = path.substring(currentStart, cursor);
152      } else if (ch >= '0' && ch <= '9') {
153          cursor++;
154        boolean dotted = false;
155        while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || (path.charAt(cursor) == '.') && !dotted)) {
156          if (path.charAt(cursor) == '.')
157            dotted = true;
158          cursor++;
159        }
160        if (path.charAt(cursor-1) == '.')
161          cursor--;
162        current = path.substring(currentStart, cursor);
163      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
164        while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 
165            (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_')) 
166          cursor++;
167        current = path.substring(currentStart, cursor);
168      } else if (ch == '%') {
169        cursor++;
170        if (cursor < path.length() && (path.charAt(cursor) == '"')) {
171          cursor++;
172          while (cursor < path.length() && (path.charAt(cursor) != '"'))
173            cursor++;
174          cursor++;
175        } else
176        while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 
177            (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':' || path.charAt(cursor) == '-'))
178          cursor++;
179        current = path.substring(currentStart, cursor);
180      } else if (ch == '/') {
181        cursor++;
182        if (cursor < path.length() && (path.charAt(cursor) == '/')) {
183          cursor++;
184          while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n')) 
185            cursor++;
186        }
187        current = path.substring(currentStart, cursor);
188      } else if (ch == '$') {
189        cursor++;
190        while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z'))
191          cursor++;
192        current = path.substring(currentStart, cursor);
193      } else if (ch == '{') {
194        cursor++;
195        ch = path.charAt(cursor);
196        if (ch == '}')
197          cursor++;
198        current = path.substring(currentStart, cursor);
199      } else if (ch == '"'){
200        cursor++;
201        boolean escape = false;
202        while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) {
203          if (escape)
204            escape = false;
205          else 
206            escape = (path.charAt(cursor) == '\\');
207          cursor++;
208        }
209        if (cursor == path.length())
210          throw error("Unterminated string");
211        cursor++;
212        current = "\""+path.substring(currentStart+1, cursor-1)+"\"";
213      } else if (ch == '\''){
214        cursor++;
215        char ech = ch;
216        boolean escape = false;
217        while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) {
218          if (escape)
219            escape = false;
220          else 
221            escape = (path.charAt(cursor) == '\\');
222          cursor++;
223        }
224        if (cursor == path.length())
225          throw error("Unterminated string");
226        cursor++;
227        current = path.substring(currentStart, cursor);
228        if (ech == '\'')
229          current = "\'"+current.substring(1, current.length() - 1)+"\'";
230      } else if (ch == '@'){
231        cursor++;
232        while (cursor < path.length() && isDateChar(path.charAt(cursor)))
233          cursor++;          
234        current = path.substring(currentStart, cursor);
235      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
236        cursor++;
237        current = path.substring(currentStart, cursor);
238      }
239    }
240  }
241
242
243  private boolean isDateChar(char ch) {
244    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch);
245  }
246  public boolean isOp() {
247    return ExpressionNode.Operation.fromCode(current) != null;
248  }
249  public boolean done() {
250    return currentStart >= path.length();
251  }
252  public int nextId() {
253    id++;
254    return id;
255  }
256  public SourceLocation getCurrentStartLocation() {
257    return currentStartLocation;
258  }
259  
260  // special case use
261  public void setCurrent(String current) {
262    this.current = current;
263  }
264
265  public boolean hasComment() {
266    return !done() && current.startsWith("//");
267  }
268  public boolean hasToken(String kw) {
269      return !done() && kw.equals(current);
270  }
271  public void token(String kw) throws FHIRLexerException {
272    if (!kw.equals(current)) 
273      throw error("Found \""+current+"\" expecting \""+kw+"\"");
274    next();
275  }
276  public String readConstant(String desc) throws FHIRLexerException {
277    if (!isStringConstant())
278      throw error("Found "+current+" expecting \"["+desc+"]\"");
279
280    return processConstant(take());
281  }
282
283  public String processConstant(String s) throws FHIRLexerException {
284    StringBuilder b = new StringBuilder();
285    int i = 1;
286    while (i < s.length()-1) {
287      char ch = s.charAt(i);
288      if (ch == '\\') {
289        i++;
290        switch (s.charAt(i)) {
291        case 't': 
292          b.append('\t');
293          break;
294        case 'r':
295          b.append('\r');
296          break;
297        case 'n': 
298          b.append('\n');
299          break;
300        case 'f': 
301          b.append('\f');
302          break;
303        case '\'':
304          b.append('\'');
305          break;
306        case '\\': 
307          b.append('\\');
308          break;
309        case '/': 
310          b.append('\\');
311          break;
312        case 'u':
313          i++;
314          int uc = Integer.parseInt(s.substring(i, i+4), 16);
315          b.append((char) uc);
316          i = i + 4;
317          break;
318        default:
319          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
320        }
321      } else {
322        b.append(ch);
323        i++;
324      }
325    }
326    return b.toString();
327
328  }
329  public void skipToken(String token) throws FHIRLexerException {
330    if (getCurrent().equals(token))
331      next();
332    
333  }
334  public String takeDottedToken() throws FHIRLexerException {
335    StringBuilder b = new StringBuilder();
336    b.append(take());
337    while (!done() && getCurrent().equals(".")) {
338      b.append(take());
339      b.append(take());
340    }
341    return b.toString();
342  }
343  
344  void skipComments() throws FHIRLexerException {
345    while (!done() && hasComment())
346      next();
347  }
348
349}