001package org.hl7.fhir.dstu2.utils; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import org.hl7.fhir.dstu2.model.ExpressionNode; 035import org.hl7.fhir.dstu2.model.ExpressionNode.SourceLocation; 036import org.hl7.fhir.exceptions.FHIRException; 037import org.hl7.fhir.utilities.Utilities; 038 039// shared lexer for concrete syntaxes 040// - FluentPath 041// - Mapping language 042 043public class FHIRLexer { 044 public class FHIRLexerException extends FHIRException { 045 046 public FHIRLexerException() { 047 super(); 048 } 049 050 public FHIRLexerException(String message, Throwable cause) { 051 super(message, cause); 052 } 053 054 public FHIRLexerException(String message) { 055 super(message); 056 } 057 058 public FHIRLexerException(Throwable cause) { 059 super(cause); 060 } 061 062 } 063 private String path; 064 private int cursor; 065 private int currentStart; 066 private String current; 067 private SourceLocation currentLocation; 068 private SourceLocation currentStartLocation; 069 private int id; 070 071 public FHIRLexer(String source) throws FHIRLexerException { 072 this.path = source; 073 currentLocation = new SourceLocation(1, 1); 074 next(); 075 } 076 public String getCurrent() { 077 return current; 078 } 079 public SourceLocation getCurrentLocation() { 080 return currentLocation; 081 } 082 083 public boolean isConstant(boolean incDoubleQuotes) { 084 return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 085 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 086 current.equals("true") || current.equals("false") || current.equals("{}"); 087 } 088 089 public boolean isStringConstant() { 090 return current.charAt(0) == '\'' || current.charAt(0) == '"'; 091 } 092 093 public String take() throws FHIRLexerException { 094 String s = current; 095 next(); 096 return s; 097 } 098 099 public boolean isToken() { 100 if (Utilities.noString(current)) 101 return false; 102 103 if (current.startsWith("$")) 104 return true; 105 106 if (current.equals("*") || current.equals("**")) 107 return true; 108 109 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 110 for (int i = 1; i < current.length(); i++) 111 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 112 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 113 return false; 114 return true; 115 } 116 return false; 117 } 118 119 public FHIRLexerException error(String msg) { 120 return error(msg, currentLocation.toString()); 121 } 122 123 public FHIRLexerException error(String msg, String location) { 124 return new FHIRLexerException("Error in "+path+" at "+location+": "+msg); 125 } 126 127 public void next() throws FHIRLexerException { 128 current = null; 129 boolean last13 = false; 130 while (cursor < path.length() && Character.isWhitespace(path.charAt(cursor))) { 131 if (path.charAt(cursor) == '\r') { 132 currentLocation.setLine(currentLocation.getLine() + 1); 133 currentLocation.setColumn(1); 134 last13 = true; 135 } else if (!last13 && (path.charAt(cursor) == '\n')) { 136 currentLocation.setLine(currentLocation.getLine() + 1); 137 currentLocation.setColumn(1); 138 last13 = false; 139 } else { 140 last13 = false; 141 currentLocation.setColumn(currentLocation.getColumn() + 1); 142 } 143 cursor++; 144 } 145 currentStart = cursor; 146 currentStartLocation = currentLocation; 147 if (cursor < path.length()) { 148 char ch = path.charAt(cursor); 149 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 150 cursor++; 151 if (cursor < path.length() && (path.charAt(cursor) == '=' || path.charAt(cursor) == '~' || path.charAt(cursor) == '-')) 152 cursor++; 153 current = path.substring(currentStart, cursor); 154 } else if (ch == '.' ) { 155 cursor++; 156 if (cursor < path.length() && (path.charAt(cursor) == '.')) 157 cursor++; 158 current = path.substring(currentStart, cursor); 159 } else if (ch >= '0' && ch <= '9') { 160 cursor++; 161 boolean dotted = false; 162 while (cursor < path.length() && ((path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || (path.charAt(cursor) == '.') && !dotted)) { 163 if (path.charAt(cursor) == '.') 164 dotted = true; 165 cursor++; 166 } 167 if (path.charAt(cursor-1) == '.') 168 cursor--; 169 current = path.substring(currentStart, cursor); 170 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 171 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 172 (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == '_')) 173 cursor++; 174 current = path.substring(currentStart, cursor); 175 } else if (ch == '%') { 176 cursor++; 177 if (cursor < path.length() && (path.charAt(cursor) == '"')) { 178 cursor++; 179 while (cursor < path.length() && (path.charAt(cursor) != '"')) 180 cursor++; 181 cursor++; 182 } else 183 while (cursor < path.length() && ((path.charAt(cursor) >= 'A' && path.charAt(cursor) <= 'Z') || (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z') || 184 (path.charAt(cursor) >= '0' && path.charAt(cursor) <= '9') || path.charAt(cursor) == ':' || path.charAt(cursor) == '-')) 185 cursor++; 186 current = path.substring(currentStart, cursor); 187 } else if (ch == '/') { 188 cursor++; 189 if (cursor < path.length() && (path.charAt(cursor) == '/')) { 190 cursor++; 191 while (cursor < path.length() && !((path.charAt(cursor) == '\r') || path.charAt(cursor) == '\n')) 192 cursor++; 193 } 194 current = path.substring(currentStart, cursor); 195 } else if (ch == '$') { 196 cursor++; 197 while (cursor < path.length() && (path.charAt(cursor) >= 'a' && path.charAt(cursor) <= 'z')) 198 cursor++; 199 current = path.substring(currentStart, cursor); 200 } else if (ch == '{') { 201 cursor++; 202 ch = path.charAt(cursor); 203 if (ch == '}') 204 cursor++; 205 current = path.substring(currentStart, cursor); 206 } else if (ch == '"'){ 207 cursor++; 208 boolean escape = false; 209 while (cursor < path.length() && (escape || path.charAt(cursor) != '"')) { 210 if (escape) 211 escape = false; 212 else 213 escape = (path.charAt(cursor) == '\\'); 214 cursor++; 215 } 216 if (cursor == path.length()) 217 throw error("Unterminated string"); 218 cursor++; 219 current = "\""+path.substring(currentStart+1, cursor-1)+"\""; 220 } else if (ch == '\''){ 221 cursor++; 222 char ech = ch; 223 boolean escape = false; 224 while (cursor < path.length() && (escape || path.charAt(cursor) != ech)) { 225 if (escape) 226 escape = false; 227 else 228 escape = (path.charAt(cursor) == '\\'); 229 cursor++; 230 } 231 if (cursor == path.length()) 232 throw error("Unterminated string"); 233 cursor++; 234 current = path.substring(currentStart, cursor); 235 if (ech == '\'') 236 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 237 } else if (ch == '@'){ 238 cursor++; 239 while (cursor < path.length() && isDateChar(path.charAt(cursor))) 240 cursor++; 241 current = path.substring(currentStart, cursor); 242 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 243 cursor++; 244 current = path.substring(currentStart, cursor); 245 } 246 } 247 } 248 249 250 private boolean isDateChar(char ch) { 251 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch); 252 } 253 public boolean isOp() { 254 return ExpressionNode.Operation.fromCode(current) != null; 255 } 256 public boolean done() { 257 return currentStart >= path.length(); 258 } 259 public int nextId() { 260 id++; 261 return id; 262 } 263 public SourceLocation getCurrentStartLocation() { 264 return currentStartLocation; 265 } 266 267 // special case use 268 public void setCurrent(String current) { 269 this.current = current; 270 } 271 272 public boolean hasComment() { 273 return !done() && current.startsWith("//"); 274 } 275 public boolean hasToken(String kw) { 276 return !done() && kw.equals(current); 277 } 278 public void token(String kw) throws FHIRLexerException { 279 if (!kw.equals(current)) 280 throw error("Found \""+current+"\" expecting \""+kw+"\""); 281 next(); 282 } 283 public String readConstant(String desc) throws FHIRLexerException { 284 if (!isStringConstant()) 285 throw error("Found "+current+" expecting \"["+desc+"]\""); 286 287 return processConstant(take()); 288 } 289 290 public String processConstant(String s) throws FHIRLexerException { 291 StringBuilder b = new StringBuilder(); 292 int i = 1; 293 while (i < s.length()-1) { 294 char ch = s.charAt(i); 295 if (ch == '\\') { 296 i++; 297 switch (s.charAt(i)) { 298 case 't': 299 b.append('\t'); 300 break; 301 case 'r': 302 b.append('\r'); 303 break; 304 case 'n': 305 b.append('\n'); 306 break; 307 case 'f': 308 b.append('\f'); 309 break; 310 case '\'': 311 b.append('\''); 312 break; 313 case '\\': 314 b.append('\\'); 315 break; 316 case '/': 317 b.append('\\'); 318 break; 319 case 'u': 320 i++; 321 int uc = Integer.parseInt(s.substring(i, i+4), 16); 322 b.append((char) uc); 323 i = i + 4; 324 break; 325 default: 326 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 327 } 328 } else { 329 b.append(ch); 330 i++; 331 } 332 } 333 return b.toString(); 334 335 } 336 public void skipToken(String token) throws FHIRLexerException { 337 if (getCurrent().equals(token)) 338 next(); 339 340 } 341 public String takeDottedToken() throws FHIRLexerException { 342 StringBuilder b = new StringBuilder(); 343 b.append(take()); 344 while (!done() && getCurrent().equals(".")) { 345 b.append(take()); 346 b.append(take()); 347 } 348 return b.toString(); 349 } 350 351 void skipComments() throws FHIRLexerException { 352 while (!done() && hasComment()) 353 next(); 354 } 355 356}