001package org.hl7.fhir.r4.utils; 002 003import org.hl7.fhir.exceptions.FHIRException; 004 005/* 006 Copyright (c) 2011+, HL7, Inc. 007 All rights reserved. 008 009 Redistribution and use in source and binary forms, with or without modification, 010 are permitted provided that the following conditions are met: 011 012 * Redistributions of source code must retain the above copyright notice, this 013 list of conditions and the following disclaimer. 014 * Redistributions in binary form must reproduce the above copyright notice, 015 this list of conditions and the following disclaimer in the documentation 016 and/or other materials provided with the distribution. 017 * Neither the name of HL7 nor the names of its contributors may be used to 018 endorse or promote products derived from this software without specific 019 prior written permission. 020 021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 022 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 023 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 024 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 025 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 026 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 027 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 028 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 029 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 030 POSSIBILITY OF SUCH DAMAGE. 031 032 */ 033 034 035 036import org.hl7.fhir.r4.model.ExpressionNode; 037import org.hl7.fhir.utilities.SourceLocation; 038import org.hl7.fhir.utilities.Utilities; 039 040// shared lexer for concrete syntaxes 041// - FluentPath 042// - Mapping language 043 044public class FHIRLexer { 045 public class FHIRLexerException extends FHIRException { 046 047 public FHIRLexerException() { 048 super(); 049 } 050 051 public FHIRLexerException(String message, Throwable cause) { 052 super(message, cause); 053 } 054 055 public FHIRLexerException(String message) { 056 super(message); 057 } 058 059 public FHIRLexerException(Throwable cause) { 060 super(cause); 061 } 062 063 } 064 private String source; 065 private int cursor; 066 private int currentStart; 067 private String current; 068 private SourceLocation currentLocation; 069 private SourceLocation currentStartLocation; 070 private int id; 071 private String name; 072 073 public FHIRLexer(String source, String name) throws FHIRLexerException { 074 this.source = source; 075 this.name = name == null ? "??" : name; 076 currentLocation = new SourceLocation(1, 1); 077 next(); 078 } 079 public FHIRLexer(String source, int i) throws FHIRLexerException { 080 this.source = source; 081 this.cursor = i; 082 currentLocation = new SourceLocation(1, 1); 083 next(); 084 } 085 public String getCurrent() { 086 return current; 087 } 088 public SourceLocation getCurrentLocation() { 089 return currentLocation; 090 } 091 092 public boolean isConstant() { 093 return current != null && (current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 094 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 095 current.equals("true") || current.equals("false") || current.equals("{}"); 096 } 097 098 public boolean isFixedName() { 099 return current != null && (current.charAt(0) == '`'); 100 } 101 102 public boolean isStringConstant() { 103 return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`'; 104 } 105 106 public String take() throws FHIRLexerException { 107 String s = current; 108 next(); 109 return s; 110 } 111 112 public int takeInt() throws FHIRLexerException { 113 String s = current; 114 if (!Utilities.isInteger(s)) 115 throw error("Found "+current+" expecting an integer"); 116 next(); 117 return Integer.parseInt(s); 118 } 119 120 public boolean isToken() { 121 if (Utilities.noString(current)) 122 return false; 123 124 if (current.startsWith("$")) 125 return true; 126 127 if (current.equals("*") || current.equals("**")) 128 return true; 129 130 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 131 for (int i = 1; i < current.length(); i++) 132 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 133 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 134 return false; 135 return true; 136 } 137 return false; 138 } 139 140 public FHIRLexerException error(String msg) { 141 return error(msg, currentLocation.toString()); 142 } 143 144 public FHIRLexerException error(String msg, String location) { 145 return new FHIRLexerException("Error in "+name+" at "+location+": "+msg); 146 } 147 148 public void next() throws FHIRLexerException { 149 skipWhitespaceAndComments(); 150 current = null; 151 currentStart = cursor; 152 currentStartLocation = currentLocation; 153 if (cursor < source.length()) { 154 char ch = source.charAt(cursor); 155 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 156 cursor++; 157 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 158 cursor++; 159 current = source.substring(currentStart, cursor); 160 } else if (ch == '.' ) { 161 cursor++; 162 if (cursor < source.length() && (source.charAt(cursor) == '.')) 163 cursor++; 164 current = source.substring(currentStart, cursor); 165 } else if (ch >= '0' && ch <= '9') { 166 cursor++; 167 boolean dotted = false; 168 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 169 if (source.charAt(cursor) == '.') 170 dotted = true; 171 cursor++; 172 } 173 if (source.charAt(cursor-1) == '.') 174 cursor--; 175 current = source.substring(currentStart, cursor); 176 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 177 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 178 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 179 cursor++; 180 current = source.substring(currentStart, cursor); 181 } else if (ch == '%') { 182 cursor++; 183 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 184 cursor++; 185 while (cursor < source.length() && (source.charAt(cursor) != '`')) 186 cursor++; 187 cursor++; 188 } else 189 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 190 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 191 cursor++; 192 current = source.substring(currentStart, cursor); 193 } else if (ch == '/') { 194 cursor++; 195 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 196 // this is en error - should already have been skipped 197 error("This shoudn't happen?"); 198 } 199 current = source.substring(currentStart, cursor); 200 } else if (ch == '$') { 201 cursor++; 202 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 203 cursor++; 204 current = source.substring(currentStart, cursor); 205 } else if (ch == '{') { 206 cursor++; 207 ch = source.charAt(cursor); 208 if (ch == '}') 209 cursor++; 210 current = source.substring(currentStart, cursor); 211 } else if (ch == '"') { 212 cursor++; 213 boolean escape = false; 214 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 215 if (escape) 216 escape = false; 217 else 218 escape = (source.charAt(cursor) == '\\'); 219 cursor++; 220 } 221 if (cursor == source.length()) 222 throw error("Unterminated string"); 223 cursor++; 224 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 225 } else if (ch == '`') { 226 cursor++; 227 boolean escape = false; 228 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 229 if (escape) 230 escape = false; 231 else 232 escape = (source.charAt(cursor) == '\\'); 233 cursor++; 234 } 235 if (cursor == source.length()) 236 throw error("Unterminated string"); 237 cursor++; 238 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 239 } else if (ch == '\''){ 240 cursor++; 241 char ech = ch; 242 boolean escape = false; 243 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 244 if (escape) 245 escape = false; 246 else 247 escape = (source.charAt(cursor) == '\\'); 248 cursor++; 249 } 250 if (cursor == source.length()) 251 throw error("Unterminated string"); 252 cursor++; 253 current = source.substring(currentStart, cursor); 254 if (ech == '\'') 255 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 256 } else if (ch == '`') { 257 cursor++; 258 boolean escape = false; 259 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 260 if (escape) 261 escape = false; 262 else 263 escape = (source.charAt(cursor) == '\\'); 264 cursor++; 265 } 266 if (cursor == source.length()) 267 throw error("Unterminated string"); 268 cursor++; 269 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 270 } else if (ch == '@'){ 271 int start = cursor; 272 cursor++; 273 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 274 cursor++; 275 current = source.substring(currentStart, cursor); 276 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 277 cursor++; 278 current = source.substring(currentStart, cursor); 279 } 280 } 281 } 282 283 284 private void skipWhitespaceAndComments() { 285 boolean last13 = false; 286 boolean done = false; 287 while (cursor < source.length() && !done) { 288 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2))) { 289 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 290 cursor++; 291 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 292 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 293 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 294 cursor++; 295 } 296 if (cursor >= source.length() -1) { 297 error("Unfinished comment"); 298 } else { 299 cursor = cursor + 2; 300 } 301 } else if (Character.isWhitespace(source.charAt(cursor))) { 302 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 303 cursor++; 304 } else { 305 done = true; 306 } 307 } 308 } 309 310 311 private boolean isDateChar(char ch,int start) { 312 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 313 314 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 315 } 316 public boolean isOp() { 317 return ExpressionNode.Operation.fromCode(current) != null; 318 } 319 public boolean done() { 320 return currentStart >= source.length(); 321 } 322 public int nextId() { 323 id++; 324 return id; 325 } 326 public SourceLocation getCurrentStartLocation() { 327 return currentStartLocation; 328 } 329 330 // special case use 331 public void setCurrent(String current) { 332 this.current = current; 333 } 334 335 public boolean hasComment() { 336 return !done() && current.startsWith("//"); 337 } 338 public boolean hasToken(String kw) { 339 return !done() && kw.equals(current); 340 } 341 public boolean hasToken(String... names) { 342 if (done()) 343 return false; 344 for (String s : names) 345 if (s.equals(current)) 346 return true; 347 return false; 348 } 349 350 public void token(String kw) throws FHIRLexerException { 351 if (!kw.equals(current)) 352 throw error("Found \""+current+"\" expecting \""+kw+"\""); 353 next(); 354 } 355 356 public String readConstant(String desc) throws FHIRLexerException { 357 if (!isStringConstant()) 358 throw error("Found "+current+" expecting \"["+desc+"]\""); 359 360 return processConstant(take()); 361 } 362 363 public String readFixedName(String desc) throws FHIRLexerException { 364 if (!isFixedName()) 365 throw error("Found "+current+" expecting \"["+desc+"]\""); 366 367 return processFixedName(take()); 368 } 369 370 public String processConstant(String s) throws FHIRLexerException { 371 StringBuilder b = new StringBuilder(); 372 int i = 1; 373 while (i < s.length()-1) { 374 char ch = s.charAt(i); 375 if (ch == '\\') { 376 i++; 377 switch (s.charAt(i)) { 378 case 't': 379 b.append('\t'); 380 break; 381 case 'r': 382 b.append('\r'); 383 break; 384 case 'n': 385 b.append('\n'); 386 break; 387 case 'f': 388 b.append('\f'); 389 break; 390 case '\'': 391 b.append('\''); 392 break; 393 case '"': 394 b.append('"'); 395 break; 396 case '`': 397 b.append('`'); 398 break; 399 case '\\': 400 b.append('\\'); 401 break; 402 case '/': 403 b.append('/'); 404 break; 405 case 'u': 406 i++; 407 int uc = Integer.parseInt(s.substring(i, i+4), 16); 408 b.append((char) uc); 409 i = i + 4; 410 break; 411 default: 412 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 413 } 414 } else { 415 b.append(ch); 416 i++; 417 } 418 } 419 return b.toString(); 420 } 421 422 public String processFixedName(String s) throws FHIRLexerException { 423 StringBuilder b = new StringBuilder(); 424 int i = 1; 425 while (i < s.length()-1) { 426 char ch = s.charAt(i); 427 if (ch == '\\') { 428 i++; 429 switch (s.charAt(i)) { 430 case 't': 431 b.append('\t'); 432 break; 433 case 'r': 434 b.append('\r'); 435 break; 436 case 'n': 437 b.append('\n'); 438 break; 439 case 'f': 440 b.append('\f'); 441 break; 442 case '\'': 443 b.append('\''); 444 break; 445 case '"': 446 b.append('"'); 447 break; 448 case '\\': 449 b.append('\\'); 450 break; 451 case '/': 452 b.append('/'); 453 break; 454 case 'u': 455 i++; 456 int uc = Integer.parseInt(s.substring(i, i+4), 16); 457 b.append((char) uc); 458 i = i + 4; 459 break; 460 default: 461 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 462 } 463 } else { 464 b.append(ch); 465 i++; 466 } 467 } 468 return b.toString(); 469 } 470 471 public void skipToken(String token) throws FHIRLexerException { 472 if (getCurrent().equals(token)) 473 next(); 474 475 } 476 public String takeDottedToken() throws FHIRLexerException { 477 StringBuilder b = new StringBuilder(); 478 b.append(take()); 479 while (!done() && getCurrent().equals(".")) { 480 b.append(take()); 481 b.append(take()); 482 } 483 return b.toString(); 484 } 485 486 void skipComments() throws FHIRLexerException { 487 while (!done() && hasComment()) 488 next(); 489 } 490 public int getCurrentStart() { 491 return currentStart; 492 } 493 494}