001package org.hl7.fhir.dstu2016may.utils; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import org.hl7.fhir.dstu2016may.model.ExpressionNode; 035import org.hl7.fhir.dstu2016may.model.ExpressionNode.SourceLocation; 036import org.hl7.fhir.exceptions.FHIRException; 037import org.hl7.fhir.utilities.Utilities; 038 039// shared lexer for concrete syntaxes 040// - FluentPath 041// - Mapping language 042 043public class FHIRLexer { 044 public class FHIRLexerException extends FHIRException { 045 046 public FHIRLexerException() { 047 super(); 048 } 049 050 public FHIRLexerException(String message, Throwable cause) { 051 super(message, cause); 052 } 053 054 public FHIRLexerException(String message) { 055 super(message); 056 } 057 058 public FHIRLexerException(Throwable cause) { 059 super(cause); 060 } 061 062 } 063 private String path; 064 private int cursor; 065 private int currentStart; 066 private String current; 067 private SourceLocation currentLocation; 068 private SourceLocation currentStartLocation; 069 private int id; 070 071 public FHIRLexer(String source) throws FHIRLexerException { 072 this.path = source; 073 currentLocation = new SourceLocation(1, 1); 074 next(); 075 } 076 public String getCurrent() { 077 return current; 078 } 079 public SourceLocation getCurrentLocation() { 080 return currentLocation; 081 } 082 083 public boolean isConstant(boolean incDoubleQuotes) { 084 return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || current.charAt(0) == '-' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || current.equals("true") || current.equals("false") || current.equals("{}"); 085 } 086 087 public boolean isStringConstant() { 088 return current.charAt(0) == '\'' || current.charAt(0) == '"'; 089 } 090 091 public String take() throws FHIRLexerException { 092 String s = current; 093 next(); 094 return s; 095 } 096 097 public boolean isToken() { 098 if (Utilities.noString(current)) 099 return false; 100 101 if (current.startsWith("$")) 102 return true; 103 104 if (current.equals("*") || current.equals("**")) 105 return true; 106 107 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 108 for (int i = 1; i < current.length(); i++) 109 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 110 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 111 return false; 112 return true; 113 } 114 return false; 115 } 116 117 public FHIRLexerException error(String msg) { 118 return error(msg, currentLocation.toString()); 119 } 120 121 public FHIRLexerException error(String msg, String location) { 122 return new FHIRLexerException("Error in "+path+" at "+location+": "+msg); 123 } 124 125 public void next() throws FHIRLexerException { 126 current = null; 127 boolean last13 = false; 128 while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) { 129 if (path.charAt(cursor) == '\r') { 130 currentLocation.setLine(currentLocation.getLine() + 1); 131 currentLocation.setColumn(1); 132 last13 = true; 133 } else if (!last13 && (path.charAt(cursor) == '\n')) { 134 currentLocation.setLine(currentLocation.getLine() + 1); 135 currentLocation.setColumn(1); 136 last13 = false; 137 } else { 138 last13 = false; 139 currentLocation.setColumn(currentLocation.getColumn() + 1); 140 } 141 cursor++; 142 } 143 currentStart = cursor; 144 currentStartLocation = currentLocation; 145 if (cursor < path.length()) { 146 char ch = path.charAt(cursor); 147 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 148 cursor++; 149 if (cursor < path.length() && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-')) 150 cursor++; 151 current = path.substring(currentStart, cursor); 152 } else if (ch >= '0' && ch <= '9') { 153 cursor++; 154 boolean dotted = false; 155 while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || (path.charAt(cursor) == '.') && !dotted)) { 156 if (path.charAt(cursor) == '.') 157 dotted = true; 158 cursor++; 159 } 160 if (path.charAt(cursor-1) == '.') 161 cursor--; 162 current = path.substring(currentStart, cursor); 163 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 164 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 165 (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_')) 166 cursor++; 167 current = path.substring(currentStart, cursor); 168 } else if (ch == '%') { 169 cursor++; 170 if (cursor < path.length() && (path.charAt(cursor) == '"')) { 171 cursor++; 172 while (cursor < path.length() && (path.charAt(cursor) != '"')) 173 cursor++; 174 cursor++; 175 } else 176 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 177 (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':' || path.charAt(cursor) == '-')) 178 cursor++; 179 current = path.substring(currentStart, cursor); 180 } else if (ch == '/') { 181 cursor++; 182 if (cursor < path.length() && (path.charAt(cursor) == '/')) { 183 cursor++; 184 while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n')) 185 cursor++; 186 } 187 current = path.substring(currentStart, cursor); 188 } else if (ch == '$') { 189 cursor++; 190 while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z')) 191 cursor++; 192 current = path.substring(currentStart, cursor); 193 } else if (ch == '{') { 194 cursor++; 195 ch = path.charAt(cursor); 196 if (ch == '}') 197 cursor++; 198 current = path.substring(currentStart, cursor); 199 } else if (ch == '"'){ 200 cursor++; 201 boolean escape = false; 202 while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) { 203 if (escape) 204 escape = false; 205 else 206 escape = (path.charAt(cursor) == '\\'); 207 cursor++; 208 } 209 if (cursor == path.length()) 210 throw error("Unterminated string"); 211 cursor++; 212 current = "\""+path.substring(currentStart+1, cursor-1)+"\""; 213 } else if (ch == '\''){ 214 cursor++; 215 char ech = ch; 216 boolean escape = false; 217 while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) { 218 if (escape) 219 escape = false; 220 else 221 escape = (path.charAt(cursor) == '\\'); 222 cursor++; 223 } 224 if (cursor == path.length()) 225 throw error("Unterminated string"); 226 cursor++; 227 current = path.substring(currentStart, cursor); 228 if (ech == '\'') 229 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 230 } else if (ch == '@'){ 231 cursor++; 232 while (cursor < path.length() && isDateChar(path.charAt(cursor))) 233 cursor++; 234 current = path.substring(currentStart, cursor); 235 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 236 cursor++; 237 current = path.substring(currentStart, cursor); 238 } 239 } 240 } 241 242 243 private boolean isDateChar(char ch) { 244 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch); 245 } 246 public boolean isOp() { 247 return ExpressionNode.Operation.fromCode(current) != null; 248 } 249 public boolean done() { 250 return currentStart >= path.length(); 251 } 252 public int nextId() { 253 id++; 254 return id; 255 } 256 public SourceLocation getCurrentStartLocation() { 257 return currentStartLocation; 258 } 259 260 // special case use 261 public void setCurrent(String current) { 262 this.current = current; 263 } 264 265 public boolean hasComment() { 266 return !done() && current.startsWith("//"); 267 } 268 public boolean hasToken(String kw) { 269 return !done() && kw.equals(current); 270 } 271 public void token(String kw) throws FHIRLexerException { 272 if (!kw.equals(current)) 273 throw error("Found \""+current+"\" expecting \""+kw+"\""); 274 next(); 275 } 276 public String readConstant(String desc) throws FHIRLexerException { 277 if (!isStringConstant()) 278 throw error("Found "+current+" expecting \"["+desc+"]\""); 279 280 return processConstant(take()); 281 } 282 283 public String processConstant(String s) throws FHIRLexerException { 284 StringBuilder b = new StringBuilder(); 285 int i = 1; 286 while (i < s.length()-1) { 287 char ch = s.charAt(i); 288 if (ch == '\\') { 289 i++; 290 switch (s.charAt(i)) { 291 case 't': 292 b.append('\t'); 293 break; 294 case 'r': 295 b.append('\r'); 296 break; 297 case 'n': 298 b.append('\n'); 299 break; 300 case 'f': 301 b.append('\f'); 302 break; 303 case '\'': 304 b.append('\''); 305 break; 306 case '\\': 307 b.append('\\'); 308 break; 309 case '/': 310 b.append('\\'); 311 break; 312 case 'u': 313 i++; 314 int uc = Integer.parseInt(s.substring(i, i+4), 16); 315 b.append((char) uc); 316 i = i + 4; 317 break; 318 default: 319 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 320 } 321 } else { 322 b.append(ch); 323 i++; 324 } 325 } 326 return b.toString(); 327 328 } 329 public void skipToken(String token) throws FHIRLexerException { 330 if (getCurrent().equals(token)) 331 next(); 332 333 } 334 public String takeDottedToken() throws FHIRLexerException { 335 StringBuilder b = new StringBuilder(); 336 b.append(take()); 337 while (!done() && getCurrent().equals(".")) { 338 b.append(take()); 339 b.append(take()); 340 } 341 return b.toString(); 342 } 343 344 void skipComments() throws FHIRLexerException { 345 while (!done() && hasComment()) 346 next(); 347 } 348 349}