001package org.hl7.fhir.r4.utils; 002 003import org.hl7.fhir.exceptions.FHIRException; 004 005/* 006 Copyright (c) 2011+, HL7, Inc. 007 All rights reserved. 008 009 Redistribution and use in source and binary forms, with or without modification, 010 are permitted provided that the following conditions are met: 011 012 * Redistributions of source code must retain the above copyright notice, this 013 list of conditions and the following disclaimer. 014 * Redistributions in binary form must reproduce the above copyright notice, 015 this list of conditions and the following disclaimer in the documentation 016 and/or other materials provided with the distribution. 017 * Neither the name of HL7 nor the names of its contributors may be used to 018 endorse or promote products derived from this software without specific 019 prior written permission. 020 021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 022 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 023 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 024 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 025 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 026 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 028 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 029 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 030 POSSIBILITY OF SUCH DAMAGE. 031 032 */ 033 034 035 036import org.hl7.fhir.r4.model.ExpressionNode; 037import org.hl7.fhir.r4.model.ExpressionNode.SourceLocation; 038import org.hl7.fhir.utilities.Utilities; 039 040// shared lexer for concrete syntaxes 041// - FluentPath 042// - Mapping language 043 044public class FHIRLexer { 045 public class FHIRLexerException extends FHIRException { 046 047 public FHIRLexerException() { 048 super(); 049 } 050 051 public FHIRLexerException(String message, Throwable cause) { 052 super(message, cause); 053 } 054 055 public FHIRLexerException(String message) { 056 super(message); 057 } 058 059 public FHIRLexerException(Throwable cause) { 060 super(cause); 061 } 062 063 } 064 private String source; 065 private int cursor; 066 private int currentStart; 067 private String current; 068 private SourceLocation currentLocation; 069 private SourceLocation currentStartLocation; 070 private int id; 071 private String name; 072 073 public FHIRLexer(String source, String name) throws FHIRLexerException { 074 this.source = source; 075 this.name = name == null ? "??" : name; 076 currentLocation = new SourceLocation(1, 1); 077 next(); 078 } 079 public FHIRLexer(String source, int i) throws FHIRLexerException { 080 this.source = source; 081 this.cursor = i; 082 currentLocation = new SourceLocation(1, 1); 083 next(); 084 } 085 public String getCurrent() { 086 return current; 087 } 088 public SourceLocation getCurrentLocation() { 089 return currentLocation; 090 } 091 092 public boolean isConstant() { 093 return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 094 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 095 current.equals("true") || current.equals("false") || current.equals("{}"); 096 } 097 098 public boolean isFixedName() { 099 return current != null && (current.charAt(0) == '`'); 100 } 101 102 public boolean isStringConstant() { 103 return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`'; 104 } 105 106 public String take() throws FHIRLexerException { 107 String s = current; 108 next(); 109 return s; 110 } 111 112 public int takeInt() throws FHIRLexerException { 113 String s = current; 114 if (!Utilities.isInteger(s)) 115 throw error("Found "+current+" expecting an integer"); 116 next(); 117 return Integer.parseInt(s); 118 } 119 120 public boolean isToken() { 121 if (Utilities.noString(current)) 122 return false; 123 124 if (current.startsWith("$")) 125 return true; 126 127 if (current.equals("*") || current.equals("**")) 128 return true; 129 130 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 131 for (int i = 1; i < current.length(); i++) 132 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 133 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 134 return false; 135 return true; 136 } 137 return false; 138 } 139 140 public FHIRLexerException error(String msg) { 141 return error(msg, currentLocation.toString()); 142 } 143 144 public FHIRLexerException error(String msg, String location) { 145 return new FHIRLexerException("Error in "+name+" at "+location+": "+msg); 146 } 147 148 public void next() throws FHIRLexerException { 149 current = null; 150 boolean last13 = false; 151 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) { 152 if (source.charAt(cursor) == '\r') { 153 currentLocation.setLine(currentLocation.getLine() + 1); 154 currentLocation.setColumn(1); 155 last13 = true; 156 } else if (!last13 && (source.charAt(cursor) == '\n')) { 157 currentLocation.setLine(currentLocation.getLine() + 1); 158 currentLocation.setColumn(1); 159 last13 = false; 160 } else { 161 last13 = false; 162 currentLocation.setColumn(currentLocation.getColumn() + 1); 163 } 164 cursor++; 165 } 166 currentStart = cursor; 167 currentStartLocation = currentLocation; 168 if (cursor < source.length()) { 169 char ch = source.charAt(cursor); 170 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 171 cursor++; 172 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 173 cursor++; 174 current = source.substring(currentStart, cursor); 175 } else if (ch == '.' ) { 176 cursor++; 177 if (cursor < source.length() && (source.charAt(cursor) == '.')) 178 cursor++; 179 current = source.substring(currentStart, cursor); 180 } else if (ch >= '0' && ch <= '9') { 181 cursor++; 182 boolean dotted = false; 183 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 184 if (source.charAt(cursor) == '.') 185 dotted = true; 186 cursor++; 187 } 188 if (source.charAt(cursor-1) == '.') 189 cursor--; 190 current = source.substring(currentStart, cursor); 191 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 192 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 193 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 194 cursor++; 195 current = source.substring(currentStart, cursor); 196 } else if (ch == '%') { 197 cursor++; 198 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 199 cursor++; 200 while (cursor < source.length() && (source.charAt(cursor) != '`')) 201 cursor++; 202 cursor++; 203 } else 204 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 205 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 206 cursor++; 207 current = source.substring(currentStart, cursor); 208 } else if (ch == '/') { 209 cursor++; 210 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 211 cursor++; 212 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 213 cursor++; 214 } 215 current = source.substring(currentStart, cursor); 216 } else if (ch == '$') { 217 cursor++; 218 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 219 cursor++; 220 current = source.substring(currentStart, cursor); 221 } else if (ch == '{') { 222 cursor++; 223 ch = source.charAt(cursor); 224 if (ch == '}') 225 cursor++; 226 current = source.substring(currentStart, cursor); 227 } else if (ch == '"') { 228 cursor++; 229 boolean escape = false; 230 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 231 if (escape) 232 escape = false; 233 else 234 escape = (source.charAt(cursor) == '\\'); 235 cursor++; 236 } 237 if (cursor == source.length()) 238 throw error("Unterminated string"); 239 cursor++; 240 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 241 } else if (ch == '`') { 242 cursor++; 243 boolean escape = false; 244 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 245 if (escape) 246 escape = false; 247 else 248 escape = (source.charAt(cursor) == '\\'); 249 cursor++; 250 } 251 if (cursor == source.length()) 252 throw error("Unterminated string"); 253 cursor++; 254 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 255 } else if (ch == '\''){ 256 cursor++; 257 char ech = ch; 258 boolean escape = false; 259 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 260 if (escape) 261 escape = false; 262 else 263 escape = (source.charAt(cursor) == '\\'); 264 cursor++; 265 } 266 if (cursor == source.length()) 267 throw error("Unterminated string"); 268 cursor++; 269 current = source.substring(currentStart, cursor); 270 if (ech == '\'') 271 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 272 } else if (ch == '`') { 273 cursor++; 274 boolean escape = false; 275 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 276 if (escape) 277 escape = false; 278 else 279 escape = (source.charAt(cursor) == '\\'); 280 cursor++; 281 } 282 if (cursor == source.length()) 283 throw error("Unterminated string"); 284 cursor++; 285 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 286 } else if (ch == '@'){ 287 int start = cursor; 288 cursor++; 289 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 290 cursor++; 291 current = source.substring(currentStart, cursor); 292 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 293 cursor++; 294 current = source.substring(currentStart, cursor); 295 } 296 } 297 } 298 299 300 private boolean isDateChar(char ch,int start) { 301 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 302 303 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 304 } 305 public boolean isOp() { 306 return ExpressionNode.Operation.fromCode(current) != null; 307 } 308 public boolean done() { 309 return currentStart >= source.length(); 310 } 311 public int nextId() { 312 id++; 313 return id; 314 } 315 public SourceLocation getCurrentStartLocation() { 316 return currentStartLocation; 317 } 318 319 // special case use 320 public void setCurrent(String current) { 321 this.current = current; 322 } 323 324 public boolean hasComment() { 325 return !done() && current.startsWith("//"); 326 } 327 public boolean hasToken(String kw) { 328 return !done() && kw.equals(current); 329 } 330 public boolean hasToken(String... names) { 331 if (done()) 332 return false; 333 for (String s : names) 334 if (s.equals(current)) 335 return true; 336 return false; 337 } 338 339 public void token(String kw) throws FHIRLexerException { 340 if (!kw.equals(current)) 341 throw error("Found \""+current+"\" expecting \""+kw+"\""); 342 next(); 343 } 344 345 public String readConstant(String desc) throws FHIRLexerException { 346 if (!isStringConstant()) 347 throw error("Found "+current+" expecting \"["+desc+"]\""); 348 349 return processConstant(take()); 350 } 351 352 public String readFixedName(String desc) throws FHIRLexerException { 353 if (!isFixedName()) 354 throw error("Found "+current+" expecting \"["+desc+"]\""); 355 356 return processFixedName(take()); 357 } 358 359 public String processConstant(String s) throws FHIRLexerException { 360 StringBuilder b = new StringBuilder(); 361 int i = 1; 362 while (i < s.length()-1) { 363 char ch = s.charAt(i); 364 if (ch == '\\') { 365 i++; 366 switch (s.charAt(i)) { 367 case 't': 368 b.append('\t'); 369 break; 370 case 'r': 371 b.append('\r'); 372 break; 373 case 'n': 374 b.append('\n'); 375 break; 376 case 'f': 377 b.append('\f'); 378 break; 379 case '\'': 380 b.append('\''); 381 break; 382 case '"': 383 b.append('"'); 384 break; 385 case '`': 386 b.append('`'); 387 break; 388 case '\\': 389 b.append('\\'); 390 break; 391 case '/': 392 b.append('/'); 393 break; 394 case 'u': 395 i++; 396 int uc = Integer.parseInt(s.substring(i, i+4), 16); 397 b.append((char) uc); 398 i = i + 4; 399 break; 400 default: 401 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 402 } 403 } else { 404 b.append(ch); 405 i++; 406 } 407 } 408 return b.toString(); 409 } 410 411 public String processFixedName(String s) throws FHIRLexerException { 412 StringBuilder b = new StringBuilder(); 413 int i = 1; 414 while (i < s.length()-1) { 415 char ch = s.charAt(i); 416 if (ch == '\\') { 417 i++; 418 switch (s.charAt(i)) { 419 case 't': 420 b.append('\t'); 421 break; 422 case 'r': 423 b.append('\r'); 424 break; 425 case 'n': 426 b.append('\n'); 427 break; 428 case 'f': 429 b.append('\f'); 430 break; 431 case '\'': 432 b.append('\''); 433 break; 434 case '"': 435 b.append('"'); 436 break; 437 case '\\': 438 b.append('\\'); 439 break; 440 case '/': 441 b.append('/'); 442 break; 443 case 'u': 444 i++; 445 int uc = Integer.parseInt(s.substring(i, i+4), 16); 446 b.append((char) uc); 447 i = i + 4; 448 break; 449 default: 450 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 451 } 452 } else { 453 b.append(ch); 454 i++; 455 } 456 } 457 return b.toString(); 458 } 459 460 public void skipToken(String token) throws FHIRLexerException { 461 if (getCurrent().equals(token)) 462 next(); 463 464 } 465 public String takeDottedToken() throws FHIRLexerException { 466 StringBuilder b = new StringBuilder(); 467 b.append(take()); 468 while (!done() && getCurrent().equals(".")) { 469 b.append(take()); 470 b.append(take()); 471 } 472 return b.toString(); 473 } 474 475 void skipComments() throws FHIRLexerException { 476 while (!done() && hasComment()) 477 next(); 478 } 479 public int getCurrentStart() { 480 return currentStart; 481 } 482 483}