001package org.hl7.fhir.dstu2.utils;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import org.hl7.fhir.dstu2.model.ExpressionNode;
035import org.hl7.fhir.dstu2.model.ExpressionNode.SourceLocation;
036import org.hl7.fhir.exceptions.FHIRException;
037import org.hl7.fhir.utilities.Utilities;
038
039// shared lexer for concrete syntaxes 
040// - FluentPath
041// - Mapping language
042
043public class FHIRLexer {
044  public class FHIRLexerException extends FHIRException {
045
046    public FHIRLexerException() {
047      super();
048    }
049
050    public FHIRLexerException(String message, Throwable cause) {
051      super(message, cause);
052    }
053
054    public FHIRLexerException(String message) {
055      super(message);
056    }
057
058    public FHIRLexerException(Throwable cause) {
059      super(cause);
060    }
061
062  }
063  private String path;
064  private int cursor;
065  private int currentStart;
066  private String current;
067  private SourceLocation currentLocation;
068  private SourceLocation currentStartLocation;
069  private int id;
070
071  public FHIRLexer(String source) throws FHIRLexerException {
072    this.path = source;
073    currentLocation = new SourceLocation(1, 1);
074    next();
075  }
076  public String getCurrent() {
077    return current;
078  }
079  public SourceLocation getCurrentLocation() {
080    return currentLocation;
081  }
082
083  public boolean isConstant(boolean incDoubleQuotes) {
084    return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 
085        current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 
086        current.equals("true") || current.equals("false") || current.equals("{}");
087  }
088
089  public boolean isStringConstant() {
090    return current.charAt(0) == '\'' || current.charAt(0) == '"';
091  }
092
093  public String take() throws FHIRLexerException {
094    String s = current;
095    next();
096    return s;
097  }
098
099  public boolean isToken() {
100    if (Utilities.noString(current))
101      return false;
102
103    if (current.startsWith("$"))
104      return true;
105
106    if (current.equals("*") || current.equals("**"))
107      return true;
108
109    if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) {
110      for (int i = 1; i < current.length(); i++) 
111        if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') ||
112            (current.charAt(1) >= '0' && current.charAt(1) <= '9')))
113          return false;
114      return true;
115    }
116    return false;
117  }
118
119  public FHIRLexerException error(String msg) {
120    return error(msg, currentLocation.toString());
121  }
122
123  public FHIRLexerException error(String msg, String location) {
124    return new FHIRLexerException("Error in "+path+" at "+location+": "+msg);
125  }
126
127  public void next() throws FHIRLexerException {
128    current = null;
129    boolean last13 = false;
130    while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) {
131      if (path.charAt(cursor) == '\r') {
132        currentLocation.setLine(currentLocation.getLine() + 1);
133        currentLocation.setColumn(1);
134        last13 = true;
135      } else if (!last13 && (path.charAt(cursor) == '\n')) {
136        currentLocation.setLine(currentLocation.getLine() + 1);
137        currentLocation.setColumn(1);
138        last13 = false;
139      } else {
140        last13 = false;
141        currentLocation.setColumn(currentLocation.getColumn() + 1);
142      }
143      cursor++;
144    }
145    currentStart = cursor;
146    currentStartLocation = currentLocation;
147    if (cursor < path.length()) {
148      char ch = path.charAt(cursor);
149      if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=')  {
150        cursor++;
151        if (cursor < path.length() && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-')) 
152          cursor++;
153        current = path.substring(currentStart, cursor);
154      } else if (ch == '.' ) {
155        cursor++;
156        if (cursor < path.length() && (path.charAt(cursor) == '.')) 
157          cursor++;
158        current = path.substring(currentStart, cursor);
159      } else if (ch >= '0' && ch <= '9') {
160          cursor++;
161        boolean dotted = false;
162        while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || (path.charAt(cursor) == '.') && !dotted)) {
163          if (path.charAt(cursor) == '.')
164            dotted = true;
165          cursor++;
166        }
167        if (path.charAt(cursor-1) == '.')
168          cursor--;
169        current = path.substring(currentStart, cursor);
170      }  else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
171        while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 
172            (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_')) 
173          cursor++;
174        current = path.substring(currentStart, cursor);
175      } else if (ch == '%') {
176        cursor++;
177        if (cursor < path.length() && (path.charAt(cursor) == '"')) {
178          cursor++;
179          while (cursor < path.length() && (path.charAt(cursor) != '"'))
180            cursor++;
181          cursor++;
182        } else
183        while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 
184            (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':' || path.charAt(cursor) == '-'))
185          cursor++;
186        current = path.substring(currentStart, cursor);
187      } else if (ch == '/') {
188        cursor++;
189        if (cursor < path.length() && (path.charAt(cursor) == '/')) {
190          cursor++;
191          while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n')) 
192            cursor++;
193        }
194        current = path.substring(currentStart, cursor);
195      } else if (ch == '$') {
196        cursor++;
197        while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z'))
198          cursor++;
199        current = path.substring(currentStart, cursor);
200      } else if (ch == '{') {
201        cursor++;
202        ch = path.charAt(cursor);
203        if (ch == '}')
204          cursor++;
205        current = path.substring(currentStart, cursor);
206      } else if (ch == '"'){
207        cursor++;
208        boolean escape = false;
209        while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) {
210          if (escape)
211            escape = false;
212          else 
213            escape = (path.charAt(cursor) == '\\');
214          cursor++;
215        }
216        if (cursor == path.length())
217          throw error("Unterminated string");
218        cursor++;
219        current = "\""+path.substring(currentStart+1, cursor-1)+"\"";
220      } else if (ch == '\''){
221        cursor++;
222        char ech = ch;
223        boolean escape = false;
224        while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) {
225          if (escape)
226            escape = false;
227          else 
228            escape = (path.charAt(cursor) == '\\');
229          cursor++;
230        }
231        if (cursor == path.length())
232          throw error("Unterminated string");
233        cursor++;
234        current = path.substring(currentStart, cursor);
235        if (ech == '\'')
236          current = "\'"+current.substring(1, current.length() - 1)+"\'";
237      } else if (ch == '@'){
238        cursor++;
239        while (cursor < path.length() && isDateChar(path.charAt(cursor)))
240          cursor++;          
241        current = path.substring(currentStart, cursor);
242      } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then
243        cursor++;
244        current = path.substring(currentStart, cursor);
245      }
246    }
247  }
248
249
250  private boolean isDateChar(char ch) {
251    return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch);
252  }
253  public boolean isOp() {
254    return ExpressionNode.Operation.fromCode(current) != null;
255  }
256  public boolean done() {
257    return currentStart >= path.length();
258  }
259  public int nextId() {
260    id++;
261    return id;
262  }
263  public SourceLocation getCurrentStartLocation() {
264    return currentStartLocation;
265  }
266  
267  // special case use
268  public void setCurrent(String current) {
269    this.current = current;
270  }
271
272  public boolean hasComment() {
273    return !done() && current.startsWith("//");
274  }
275  public boolean hasToken(String kw) {
276    return !done() && kw.equals(current);
277  }
278  public void token(String kw) throws FHIRLexerException {
279    if (!kw.equals(current)) 
280      throw error("Found \""+current+"\" expecting \""+kw+"\"");
281    next();
282  }
283  public String readConstant(String desc) throws FHIRLexerException {
284    if (!isStringConstant())
285      throw error("Found "+current+" expecting \"["+desc+"]\"");
286
287    return processConstant(take());
288  }
289
290  public String processConstant(String s) throws FHIRLexerException {
291    StringBuilder b = new StringBuilder();
292    int i = 1;
293    while (i < s.length()-1) {
294      char ch = s.charAt(i);
295      if (ch == '\\') {
296        i++;
297        switch (s.charAt(i)) {
298        case 't': 
299          b.append('\t');
300          break;
301        case 'r':
302          b.append('\r');
303          break;
304        case 'n': 
305          b.append('\n');
306          break;
307        case 'f': 
308          b.append('\f');
309          break;
310        case '\'':
311          b.append('\'');
312          break;
313        case '\\': 
314          b.append('\\');
315          break;
316        case '/': 
317          b.append('\\');
318          break;
319        case 'u':
320          i++;
321          int uc = Integer.parseInt(s.substring(i, i+4), 16);
322          b.append((char) uc);
323          i = i + 4;
324          break;
325        default:
326          throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i));
327        }
328      } else {
329        b.append(ch);
330        i++;
331      }
332    }
333    return b.toString();
334
335  }
336  public void skipToken(String token) throws FHIRLexerException {
337    if (getCurrent().equals(token))
338      next();
339    
340  }
341  public String takeDottedToken() throws FHIRLexerException {
342    StringBuilder b = new StringBuilder();
343    b.append(take());
344    while (!done() && getCurrent().equals(".")) {
345      b.append(take());
346      b.append(take());
347    }
348    return b.toString();
349  }
350  
351  void skipComments() throws FHIRLexerException {
352    while (!done() && hasComment())
353      next();
354  }
355
356}