001package org.hl7.fhir.r4.utils; 002 003import org.hl7.fhir.r4.model.ExpressionNode; 004import org.hl7.fhir.r4.model.ExpressionNode.SourceLocation; 005import org.hl7.fhir.exceptions.FHIRException; 006import org.hl7.fhir.utilities.Utilities; 007 008// shared lexer for concrete syntaxes 009// - FluentPath 010// - Mapping language 011 012public class FHIRLexer { 013 public class FHIRLexerException extends FHIRException { 014 015 public FHIRLexerException() { 016 super(); 017 } 018 019 public FHIRLexerException(String message, Throwable cause) { 020 super(message, cause); 021 } 022 023 public FHIRLexerException(String message) { 024 super(message); 025 } 026 027 public FHIRLexerException(Throwable cause) { 028 super(cause); 029 } 030 031 } 032 private String source; 033 private int cursor; 034 private int currentStart; 035 private String current; 036 private SourceLocation currentLocation; 037 private SourceLocation currentStartLocation; 038 private int id; 039 040 public FHIRLexer(String source) throws FHIRLexerException { 041 this.source = source; 042 currentLocation = new SourceLocation(1, 1); 043 next(); 044 } 045 public String getCurrent() { 046 return current; 047 } 048 public SourceLocation getCurrentLocation() { 049 return currentLocation; 050 } 051 052 public boolean isConstant(boolean incDoubleQuotes) { 053 return current.charAt(0) == '\'' || (incDoubleQuotes && current.charAt(0) == '"') || current.charAt(0) == '@' || current.charAt(0) == '%' || 054 current.charAt(0) == '-' || current.charAt(0) == '+' || (current.charAt(0) >= '0' && current.charAt(0) <= '9') || 055 current.equals("true") || current.equals("false") || current.equals("{}"); 056 } 057 058 public boolean isStringConstant() { 059 return current.charAt(0) == '\'' || current.charAt(0) == '"'; 060 } 061 062 public String take() throws FHIRLexerException { 063 String s = current; 064 next(); 065 return s; 066 } 067 068 public int takeInt() throws FHIRLexerException { 069 String s = current; 070 if (!Utilities.isInteger(s)) 071 throw error("Found "+current+" expecting an integer"); 072 next(); 073 return Integer.parseInt(s); 074 } 075 076 public boolean isToken() { 077 if (Utilities.noString(current)) 078 return false; 079 080 if (current.startsWith("$")) 081 return true; 082 083 if (current.equals("*") || current.equals("**")) 084 return true; 085 086 if ((current.charAt(0) >= 'A' && current.charAt(0) <= 'Z') || (current.charAt(0) >= 'a' && current.charAt(0) <= 'z')) { 087 for (int i = 1; i < current.length(); i++) 088 if (!( (current.charAt(1) >= 'A' && current.charAt(1) <= 'Z') || (current.charAt(1) >= 'a' && current.charAt(1) <= 'z') || 089 (current.charAt(1) >= '0' && current.charAt(1) <= '9'))) 090 return false; 091 return true; 092 } 093 return false; 094 } 095 096 public FHIRLexerException error(String msg) { 097 return error(msg, currentLocation.toString()); 098 } 099 100 public FHIRLexerException error(String msg, String location) { 101 return new FHIRLexerException("Error at "+location+": "+msg); 102 } 103 104 public void next() throws FHIRLexerException { 105 current = null; 106 boolean last13 = false; 107 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) { 108 if (source.charAt(cursor) == '\r') { 109 currentLocation.setLine(currentLocation.getLine() + 1); 110 currentLocation.setColumn(1); 111 last13 = true; 112 } else if (!last13 && (source.charAt(cursor) == '\n')) { 113 currentLocation.setLine(currentLocation.getLine() + 1); 114 currentLocation.setColumn(1); 115 last13 = false; 116 } else { 117 last13 = false; 118 currentLocation.setColumn(currentLocation.getColumn() + 1); 119 } 120 cursor++; 121 } 122 currentStart = cursor; 123 currentStartLocation = currentLocation; 124 if (cursor < source.length()) { 125 char ch = source.charAt(cursor); 126 if (ch == '!' || ch == '>' || ch == '<' || ch == ':' || ch == '-' || ch == '=') { 127 cursor++; 128 if (cursor < source.length() && (source.charAt(cursor) == '=' || source.charAt(cursor) == '~' || source.charAt(cursor) == '-')) 129 cursor++; 130 current = source.substring(currentStart, cursor); 131 } else if (ch == '.' ) { 132 cursor++; 133 if (cursor < source.length() && (source.charAt(cursor) == '.')) 134 cursor++; 135 current = source.substring(currentStart, cursor); 136 } else if (ch >= '0' && ch <= '9') { 137 cursor++; 138 boolean dotted = false; 139 while (cursor < source.length() && ((source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || (source.charAt(cursor) == '.') && !dotted)) { 140 if (source.charAt(cursor) == '.') 141 dotted = true; 142 cursor++; 143 } 144 if (source.charAt(cursor-1) == '.') 145 cursor--; 146 current = source.substring(currentStart, cursor); 147 } else if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 148 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 149 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == '_')) 150 cursor++; 151 current = source.substring(currentStart, cursor); 152 } else if (ch == '%') { 153 cursor++; 154 if (cursor < source.length() && (source.charAt(cursor) == '"')) { 155 cursor++; 156 while (cursor < source.length() && (source.charAt(cursor) != '"')) 157 cursor++; 158 cursor++; 159 } else 160 while (cursor < source.length() && ((source.charAt(cursor) >= 'A' && source.charAt(cursor) <= 'Z') || (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z') || 161 (source.charAt(cursor) >= '0' && source.charAt(cursor) <= '9') || source.charAt(cursor) == ':' || source.charAt(cursor) == '-')) 162 cursor++; 163 current = source.substring(currentStart, cursor); 164 } else if (ch == '/') { 165 cursor++; 166 if (cursor < source.length() && (source.charAt(cursor) == '/')) { 167 cursor++; 168 while (cursor < source.length() && !((source.charAt(cursor) == '\r') || source.charAt(cursor) == '\n')) 169 cursor++; 170 } 171 current = source.substring(currentStart, cursor); 172 } else if (ch == '$') { 173 cursor++; 174 while (cursor < source.length() && (source.charAt(cursor) >= 'a' && source.charAt(cursor) <= 'z')) 175 cursor++; 176 current = source.substring(currentStart, cursor); 177 } else if (ch == '{') { 178 cursor++; 179 ch = source.charAt(cursor); 180 if (ch == '}') 181 cursor++; 182 current = source.substring(currentStart, cursor); 183 } else if (ch == '"'){ 184 cursor++; 185 boolean escape = false; 186 while (cursor < source.length() && (escape || source.charAt(cursor) != '"')) { 187 if (escape) 188 escape = false; 189 else 190 escape = (source.charAt(cursor) == '\\'); 191 cursor++; 192 } 193 if (cursor == source.length()) 194 throw error("Unterminated string"); 195 cursor++; 196 current = "\""+source.substring(currentStart+1, cursor-1)+"\""; 197 } else if (ch == '\''){ 198 cursor++; 199 char ech = ch; 200 boolean escape = false; 201 while (cursor < source.length() && (escape || source.charAt(cursor) != ech)) { 202 if (escape) 203 escape = false; 204 else 205 escape = (source.charAt(cursor) == '\\'); 206 cursor++; 207 } 208 if (cursor == source.length()) 209 throw error("Unterminated string"); 210 cursor++; 211 current = source.substring(currentStart, cursor); 212 if (ech == '\'') 213 current = "\'"+current.substring(1, current.length() - 1)+"\'"; 214 } else if (ch == '@'){ 215 int start = cursor; 216 cursor++; 217 while (cursor < source.length() && isDateChar(source.charAt(cursor), start)) 218 cursor++; 219 current = source.substring(currentStart, cursor); 220 } else { // if CharInSet(ch, ['.', ',', '(', ')', '=', '$']) then 221 cursor++; 222 current = source.substring(currentStart, cursor); 223 } 224 } 225 } 226 227 228 private boolean isDateChar(char ch,int start) { 229 int eot = source.charAt(start+1) == 'T' ? 10 : 20; 230 231 return ch == '-' || ch == ':' || ch == 'T' || ch == '+' || ch == 'Z' || Character.isDigit(ch) || (cursor-start == eot && ch == '.' && cursor < source.length()-1&& Character.isDigit(source.charAt(cursor+1))); 232 } 233 public boolean isOp() { 234 return ExpressionNode.Operation.fromCode(current) != null; 235 } 236 public boolean done() { 237 return currentStart >= source.length(); 238 } 239 public int nextId() { 240 id++; 241 return id; 242 } 243 public SourceLocation getCurrentStartLocation() { 244 return currentStartLocation; 245 } 246 247 // special case use 248 public void setCurrent(String current) { 249 this.current = current; 250 } 251 252 public boolean hasComment() { 253 return !done() && current.startsWith("//"); 254 } 255 public boolean hasToken(String kw) { 256 return !done() && kw.equals(current); 257 } 258 public boolean hasToken(String... names) { 259 if (done()) 260 return false; 261 for (String s : names) 262 if (s.equals(current)) 263 return true; 264 return false; 265 } 266 267 public void token(String kw) throws FHIRLexerException { 268 if (!kw.equals(current)) 269 throw error("Found \""+current+"\" expecting \""+kw+"\""); 270 next(); 271 } 272 273 public String readConstant(String desc) throws FHIRLexerException { 274 if (!isStringConstant()) 275 throw error("Found "+current+" expecting \"["+desc+"]\""); 276 277 return processConstant(take()); 278 } 279 280 public String processConstant(String s) throws FHIRLexerException { 281 StringBuilder b = new StringBuilder(); 282 int i = 1; 283 while (i < s.length()-1) { 284 char ch = s.charAt(i); 285 if (ch == '\\') { 286 i++; 287 switch (s.charAt(i)) { 288 case 't': 289 b.append('\t'); 290 break; 291 case 'r': 292 b.append('\r'); 293 break; 294 case 'n': 295 b.append('\n'); 296 break; 297 case 'f': 298 b.append('\f'); 299 break; 300 case '\'': 301 b.append('\''); 302 break; 303 case '"': 304 b.append('"'); 305 break; 306 case '\\': 307 b.append('\\'); 308 break; 309 case '/': 310 b.append('\\'); 311 break; 312 case 'u': 313 i++; 314 int uc = Integer.parseInt(s.substring(i, i+4), 16); 315 b.append((char) uc); 316 i = i + 4; 317 break; 318 default: 319 throw new FHIRLexerException("Unknown character escape \\"+s.charAt(i)); 320 } 321 } else { 322 b.append(ch); 323 i++; 324 } 325 } 326 return b.toString(); 327 328 } 329 public void skipToken(String token) throws FHIRLexerException { 330 if (getCurrent().equals(token)) 331 next(); 332 333 } 334 public String takeDottedToken() throws FHIRLexerException { 335 StringBuilder b = new StringBuilder(); 336 b.append(take()); 337 while (!done() && getCurrent().equals(".")) { 338 b.append(take()); 339 b.append(take()); 340 } 341 return b.toString(); 342 } 343 344 void skipComments() throws FHIRLexerException { 345 while (!done() && hasComment()) 346 next(); 347 } 348 349}