001package org.hl7.fhir.r5.utils; 002 003import java.util.ArrayList; 004import java.util.List; 005import java.util.StringJoiner; 006 007import org.apache.poi.xssf.model.Comments; 008import org.hl7.fhir.exceptions.FHIRException; 009 010/* 011 Copyright (c) 2011+, HL7, Inc. 012 All rights reserved. 013 014 Redistribution and use in source and binary forms, with or without modification, 015 are permitted provided that the following conditions are met: 016 017 * Redistributions of source code must retain the above copyright notice, this 018 list of conditions and the following disclaimer. 019 * Redistributions in binary form must reproduce the above copyright notice, 020 this list of conditions and the following disclaimer in the documentation 021 and/or other materials provided with the distribution. 022 * Neither the name of HL7 nor the names of its contributors may be used to 023 endorse or promote products derived from this software without specific 024 prior written permission. 025 026 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 027 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 028 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 029 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 030 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 031 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 032 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 033 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 034 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 035 POSSIBILITY OF SUCH DAMAGE. 036 037 */ 038 039 040 041import org.hl7.fhir.r5.model.ExpressionNode; 042import org.hl7.fhir.utilities.CommaSeparatedStringBuilder; 043import org.hl7.fhir.utilities.SourceLocation; 044import org.hl7.fhir.utilities.Utilities; 045 046// shared lexer for concrete syntaxes 047// - FluentPath 048// - Mapping language 049 050public class FHIRLexer { 051 public class FHIRLexerException extends FHIRException { 052 053 public FHIRLexerException() { 054 super(); 055 } 056 057 public FHIRLexerException(String message, Throwable cause) { 058 super(message, cause); 059 } 060 061 public FHIRLexerException(String message) { 062 super(message); 063 } 064 065 public FHIRLexerException(Throwable cause) { 066 super(cause); 067 } 068 069 } 070 private String source; 071 private int cursor; 072 private int currentStart; 073 private String current; 074 private List<String> comments = new ArrayList<>(); 075 private SourceLocation currentLocation; 076 private SourceLocation currentStartLocation; 077 private int id; 078 private String name; 079 080 public FHIRLexer(String source, String name) throws FHIRLexerException { 081 this.source = source == null ? "" : source; 082 this.name = name == null ? "??" : name; 083 currentLocation = new SourceLocation(1, 1); 084 next(); 085 } 086 public FHIRLexer(String source, int i) throws FHIRLexerException { 087 this.source = source; 088 this.cursor = i; 089 currentLocation = new SourceLocation(1, 1); 090 next(); 091 } 092 public String getCurrent() { 093 return current; 094 } 095 public SourceLocation getCurrentLocation() { 096 return currentLocation; 097 } 098 099 public boolean isConstant() { 100 return !Utilities.noString(current) && ((current.charAt(0) == '\'' || current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 101 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 102 current.equals("true") || current.equals("false") || current.equals("{}")); 103 } 104 105 public boolean isFixedName() { 106 return current != null && (current.charAt(0) == '`'); 107 } 108 109 public boolean isStringConstant() { 110 return current.charAt(0) == '\'' || current.charAt(0) == '"' || current.charAt(0) == '`'; 111 } 112 113 public String take() throws FHIRLexerException { 114 String s = current; 115 next(); 116 return s; 117 } 118 119 public int takeInt() throws FHIRLexerException { 120 String s = current; 121 if (!Utilities.isInteger(s)) 122 throw error("Found "+current+" expecting an integer"); 123 next(); 124 return Integer.parseInt(s); 125 } 126 127 public boolean isToken() { 128 if (Utilities.noString(current)) 129 return false; 130 131 if (current.startsWith("$")) 132 return true; 133 134 if (current.equals("*") || current.equals("**")) 135 return true; 136 137 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 138 for (int i = 1; i < current.length(); i++) 139 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 140 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 141 return false; 142 return true; 143 } 144 return false; 145 } 146 147 public FHIRLexerException error(String msg) { 148 return error(msg, currentLocation.toString()); 149 } 150 151 public FHIRLexerException error(String msg, String location) { 152 return new FHIRLexerException("Error @"+location+": "+msg); 153 } 154 155 public void next() throws FHIRLexerException { 156 skipWhitespaceAndComments(); 157 current = null; 158 currentStart = cursor; 159 currentStartLocation = currentLocation; 160 if (cursor < source.length()) { 161 char ch = source.charAt(cursor); 162 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 163 cursor++; 164 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-') || (ch == '-' && source.charAt(cursor) == '>')) 165 cursor++; 166 current = source.substring(currentStart, cursor); 167 } else if (ch == '.' ) { 168 cursor++; 169 if (cursor < source.length() && (source.charAt(cursor) == '.')) 170 cursor++; 171 current = source.substring(currentStart, cursor); 172 } else if (ch >= '0' && ch <= '9') { 173 cursor++; 174 boolean dotted = false; 175 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 176 if (source.charAt(cursor) == '.') 177 dotted = true; 178 cursor++; 179 } 180 if (source.charAt(cursor-1) == '.') 181 cursor--; 182 current = source.substring(currentStart, cursor); 183 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 184 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 185 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 186 cursor++; 187 current = source.substring(currentStart, cursor); 188 } else if (ch == '%') { 189 cursor++; 190 if (cursor < source.length() && (source.charAt(cursor) == '`')) { 191 cursor++; 192 while (cursor < source.length() && (source.charAt(cursor) != '`')) 193 cursor++; 194 cursor++; 195 } else 196 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 197 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 198 cursor++; 199 current = source.substring(currentStart, cursor); 200 } else if (ch == '/') { 201 cursor++; 202 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 203 // this is en error - should already have been skipped 204 error("This shoudn't happen?"); 205 } 206 current = source.substring(currentStart, cursor); 207 } else if (ch == '$') { 208 cursor++; 209 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 210 cursor++; 211 current = source.substring(currentStart, cursor); 212 } else if (ch == '{') { 213 cursor++; 214 ch = source.charAt(cursor); 215 if (ch == '}') 216 cursor++; 217 current = source.substring(currentStart, cursor); 218 } else if (ch == '"') { 219 cursor++; 220 boolean escape = false; 221 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 222 if (escape) 223 escape = false; 224 else 225 escape = (source.charAt(cursor) == '\\'); 226 cursor++; 227 } 228 if (cursor == source.length()) 229 throw error("Unterminated string"); 230 cursor++; 231 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 232 } else if (ch == '`') { 233 cursor++; 234 boolean escape = false; 235 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 236 if (escape) 237 escape = false; 238 else 239 escape = (source.charAt(cursor) == '\\'); 240 cursor++; 241 } 242 if (cursor == source.length()) 243 throw error("Unterminated string"); 244 cursor++; 245 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 246 } else if (ch == '\''){ 247 cursor++; 248 char ech = ch; 249 boolean escape = false; 250 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 251 if (escape) 252 escape = false; 253 else 254 escape = (source.charAt(cursor) == '\\'); 255 cursor++; 256 } 257 if (cursor == source.length()) 258 throw error("Unterminated string"); 259 cursor++; 260 current = source.substring(currentStart, cursor); 261 if (ech == '\'') 262 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 263 } else if (ch == '`') { 264 cursor++; 265 boolean escape = false; 266 while (cursor < source.length() && (escape || source.charAt(cursor) != '`')) { 267 if (escape) 268 escape = false; 269 else 270 escape = (source.charAt(cursor) == '\\'); 271 cursor++; 272 } 273 if (cursor == source.length()) 274 throw error("Unterminated string"); 275 cursor++; 276 current = "`"+source.substring(currentStart+1, cursor-1)+"`"; 277 } else if (ch == '@'){ 278 int start = cursor; 279 cursor++; 280 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 281 cursor++; 282 current = source.substring(currentStart, cursor); 283 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 284 cursor++; 285 current = source.substring(currentStart, cursor); 286 } 287 } 288 } 289 290 private void skipWhitespaceAndComments() { 291 comments.clear(); 292 boolean last13 = false; 293 boolean done = false; 294 while (cursor < source.length() && !done) { 295 if (cursor < source.length() -1 && "//".equals(source.substring(cursor, cursor+2))) { 296 int start = cursor+2; 297 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) { 298 cursor++; 299 } 300 comments.add(source.substring(start, cursor).trim()); 301 } else if (cursor < source.length() - 1 && "/*".equals(source.substring(cursor, cursor+2))) { 302 int start = cursor+2; 303 while (cursor < source.length() - 1 && !"*/".equals(source.substring(cursor, cursor+2))) { 304 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 305 cursor++; 306 } 307 if (cursor >= source.length() -1) { 308 error("Unfinished comment"); 309 } else { 310 comments.add(source.substring(start, cursor).trim()); 311 cursor = cursor + 2; 312 } 313 } else if (Character.isWhitespace(source.charAt(cursor))) { 314 last13 = currentLocation.checkChar(source.charAt(cursor), last13); 315 cursor++; 316 } else { 317 done = true; 318 } 319 } 320 } 321 322 private boolean isDateChar(char ch,int start) { 323 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 324 325 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 326 } 327 public boolean isOp() { 328 return ExpressionNode.Operation.fromCode(current) != null; 329 } 330 public boolean done() { 331 return currentStart >= source.length(); 332 } 333 public int nextId() { 334 id++; 335 return id; 336 } 337 public SourceLocation getCurrentStartLocation() { 338 return currentStartLocation; 339 } 340 341 // special case use 342 public void setCurrent(String current) { 343 this.current = current; 344 } 345 346 public boolean hasComments() { 347 return comments.size() > 0; 348 } 349 350 public List<String> getComments() { 351 return comments; 352 } 353 354 public String getAllComments() { 355 CommaSeparatedStringBuilder b = new CommaSeparatedStringBuilder("\r\n"); 356 b.addAll(comments); 357 comments.clear(); 358 return b.toString(); 359 } 360 361 public String getFirstComment() { 362 if (hasComments()) { 363 String s = comments.get(0); 364 comments.remove(0); 365 return s; 366 } else { 367 return null; 368 } 369 } 370 371 public boolean hasToken(String kw) { 372 return !done() && kw.equals(current); 373 } 374 public boolean hasToken(String... names) { 375 if (done()) 376 return false; 377 for (String s : names) 378 if (s.equals(current)) 379 return true; 380 return false; 381 } 382 383 public void token(String kw) throws FHIRLexerException { 384 if (!kw.equals(current)) 385 throw error("Found \""+current+"\" expecting \""+kw+"\""); 386 next(); 387 } 388 389 public String readConstant(String desc) throws FHIRLexerException { 390 if (!isStringConstant()) 391 throw error("Found "+current+" expecting \"["+desc+"]\""); 392 393 return processConstant(take()); 394 } 395 396 public String readFixedName(String desc) throws FHIRLexerException { 397 if (!isFixedName()) 398 throw error("Found "+current+" expecting \"["+desc+"]\""); 399 400 return processFixedName(take()); 401 } 402 403 public String processConstant(String s) throws FHIRLexerException { 404 StringBuilder b = new StringBuilder(); 405 int i = 1; 406 while (i < s.length()-1) { 407 char ch = s.charAt(i); 408 if (ch == '\\') { 409 i++; 410 switch (s.charAt(i)) { 411 case 't': 412 b.append('\t'); 413 break; 414 case 'r': 415 b.append('\r'); 416 break; 417 case 'n': 418 b.append('\n'); 419 break; 420 case 'f': 421 b.append('\f'); 422 break; 423 case '\'': 424 b.append('\''); 425 break; 426 case '"': 427 b.append('"'); 428 break; 429 case '`': 430 b.append('`'); 431 break; 432 case '\\': 433 b.append('\\'); 434 break; 435 case '/': 436 b.append('/'); 437 break; 438 case 'u': 439 i++; 440 int uc = Integer.parseInt(s.substring(i, i+4), 16); 441 b.append((char) uc); 442 i = i + 4; 443 break; 444 default: 445 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 446 } 447 } else { 448 b.append(ch); 449 i++; 450 } 451 } 452 return b.toString(); 453 } 454 455 public String processFixedName(String s) throws FHIRLexerException { 456 StringBuilder b = new StringBuilder(); 457 int i = 1; 458 while (i < s.length()-1) { 459 char ch = s.charAt(i); 460 if (ch == '\\') { 461 i++; 462 switch (s.charAt(i)) { 463 case 't': 464 b.append('\t'); 465 break; 466 case 'r': 467 b.append('\r'); 468 break; 469 case 'n': 470 b.append('\n'); 471 break; 472 case 'f': 473 b.append('\f'); 474 break; 475 case '\'': 476 b.append('\''); 477 break; 478 case '"': 479 b.append('"'); 480 break; 481 case '\\': 482 b.append('\\'); 483 break; 484 case '/': 485 b.append('/'); 486 break; 487 case 'u': 488 i++; 489 int uc = Integer.parseInt(s.substring(i, i+4), 16); 490 b.append((char) uc); 491 i = i + 4; 492 break; 493 default: 494 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 495 } 496 } else { 497 b.append(ch); 498 i++; 499 } 500 } 501 return b.toString(); 502 } 503 504 public void skipToken(String token) throws FHIRLexerException { 505 if (getCurrent().equals(token)) 506 next(); 507 508 } 509 public String takeDottedToken() throws FHIRLexerException { 510 StringBuilder b = new StringBuilder(); 511 b.append(take()); 512 while (!done() && getCurrent().equals(".")) { 513 b.append(take()); 514 b.append(take()); 515 } 516 return b.toString(); 517 } 518 519 public int getCurrentStart() { 520 return currentStart; 521 } 522 public String getSource() { 523 return source; 524 } 525 526}