001package org.hl7.fhir.r4.utils.formats; 002 003import java.io.IOException; 004import java.io.OutputStream; 005import java.io.OutputStreamWriter; 006import java.io.UnsupportedEncodingException; 007import java.util.ArrayList; 008import java.util.Collections; 009import java.util.HashMap; 010import java.util.HashSet; 011import java.util.List; 012import java.util.Map; 013import java.util.Set; 014import java.util.UUID; 015 016import org.hl7.fhir.exceptions.FHIRFormatError; 017import org.hl7.fhir.utilities.Utilities; 018 019public class Turtle { 020 021 public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; 022 023 public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; 024 public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; 025 026 // Object model 027 public abstract class Triple { 028 private String uri; 029 } 030 031 public class StringType extends Triple { 032 private String value; 033 034 public StringType(String value) { 035 super(); 036 this.value = value; 037 } 038 } 039 040 public class Complex extends Triple { 041 protected List<Predicate> predicates = new ArrayList<Predicate>(); 042 043 public Complex predicate(String predicate, String object) { 044 predicateSet.add(predicate); 045 objectSet.add(object); 046 return predicate(predicate, new StringType(object)); 047 } 048 049 public Complex linkedPredicate(String predicate, String object, String link) { 050 predicateSet.add(predicate); 051 objectSet.add(object); 052 return linkedPredicate(predicate, new StringType(object), link); 053 } 054 055 public Complex predicate(String predicate, Triple object) { 056 Predicate p = getPredicate(predicate); 057 if (p == null) { 058 p = new Predicate(); 059 p.predicate = predicate; 060 predicateSet.add(predicate); 061 predicates.add(p); 062 } 063 if (object instanceof StringType) 064 objectSet.add(((StringType) object).value); 065 p.objects.add(object); 066 return this; 067 } 068 069 protected Predicate getPredicate(String predicate) { 070 for (Predicate p : predicates) 071 if (p.predicate.equals(predicate)) 072 return p; 073 return null; 074 } 075 076 public Complex linkedPredicate(String predicate, Triple object, String link) { 077 Predicate p = getPredicate(predicate); 078 if (p == null) { 079 p = new Predicate(); 080 p.predicate = predicate; 081 p.link = link; 082 predicateSet.add(predicate); 083 predicates.add(p); 084 } 085 if (object instanceof StringType) 086 objectSet.add(((StringType) object).value); 087 p.objects.add(object); 088 return this; 089 } 090 091 public Complex predicate(String predicate) { 092 predicateSet.add(predicate); 093 Complex c = complex(); 094 predicate(predicate, c); 095 return c; 096 } 097 098 public Complex linkedPredicate(String predicate, String link) { 099 predicateSet.add(predicate); 100 Complex c = complex(); 101 linkedPredicate(predicate, c, link); 102 return c; 103 } 104 105 public void prefix(String code, String url) { 106 Turtle.this.prefix(code, url); 107 } 108 } 109 110 private class Predicate { 111 protected String predicate; 112 protected String link; 113 protected List<Triple> objects = new ArrayList<Turtle.Triple>(); 114 protected String comment; 115 116 public String getPredicate() { 117 return predicate; 118 } 119 public String makelink() { 120 if (link == null) 121 return predicate; 122 else 123 return "<a href=\""+link+"\">"+Utilities.escapeXml(predicate)+"</a>"; 124 } 125 126 public List<Triple> getObjects() { 127 return objects; 128 } 129 public String getComment() { 130 return comment; 131 } 132 } 133 134 public class Subject extends Complex { 135 private String id; 136 137 public Predicate predicate(String predicate, Triple object, String comment) { 138 Predicate p = getPredicate(predicate); 139 if (p == null) { 140 p = new Predicate(); 141 p.predicate = predicate; 142 predicateSet.add(predicate); 143 predicates.add(p); 144 p.comment = comment; 145 } 146 if (object instanceof StringType) 147 objectSet.add(((StringType) object).value); 148 p.objects.add(object); 149 return p; 150 } 151 152 public void comment(String comment) { 153 if (!Utilities.noString(comment)) { 154 predicate("rdfs:comment", literal(comment)); 155 predicate("dcterms:description", literal(comment)); 156 } 157 } 158 159 public void label(String label) { 160 if (!Utilities.noString(label)) { 161 predicate("rdfs:label", literal(label)); 162 predicate("dc:title", literal(label)); 163 } 164 } 165 166 } 167 168 public class Section { 169 private String name; 170 private List<Subject> subjects = new ArrayList<Subject>(); 171 172 public Subject triple(String subject, String predicate, String object, String comment) { 173 return triple(subject, predicate, new StringType(object), comment); 174 } 175 176 public Subject triple(String subject, String predicate, String object) { 177 return triple(subject, predicate, new StringType(object)); 178 } 179 180 public Subject triple(String subject, String predicate, Triple object) { 181 return triple(subject, predicate, object, null); 182 } 183 184 public Subject triple(String subject, String predicate, Triple object, String comment) { 185 Subject s = subject(subject); 186 s.predicate(predicate, object, comment); 187 return s; 188 } 189 190 public void comment(String subject, String comment) { 191 triple(subject, "rdfs:comment", literal(comment)); 192 triple(subject, "dcterms:description", literal(comment)); 193 } 194 195 public void label(String subject, String comment) { 196 triple(subject, "rdfs:label", literal(comment)); 197 triple(subject, "dc:title", literal(comment)); 198 } 199 200 public Subject subject(String subject) { 201 for (Subject ss : subjects) 202 if (ss.id.equals(subject)) 203 return ss; 204 Subject s = new Subject(); 205 s.id = subject; 206 subjects.add(s); 207 return s; 208 } 209 210 public boolean hasSubject(String subject) { 211 for (Subject ss : subjects) 212 if (ss.id.equals(subject)) 213 return true; 214 return false; 215 } 216 } 217 218 private List<Section> sections = new ArrayList<Section>(); 219 protected Set<String> subjectSet = new HashSet<String>(); 220 protected Set<String> predicateSet = new HashSet<String>(); 221 protected Set<String> objectSet = new HashSet<String>(); 222 protected Map<String, String> prefixes = new HashMap<String, String>(); 223 224 public void prefix(String code, String url) { 225 prefixes.put(code, url); 226 } 227 228 protected boolean hasSection(String sn) { 229 for (Section s : sections) 230 if (s.name.equals(sn)) 231 return true; 232 return false; 233 234 } 235 236 public Section section(String sn) { 237 if (hasSection(sn)) 238 throw new Error("Duplicate section name "+sn); 239 Section s = new Section(); 240 s.name = sn; 241 sections.add(s); 242 return s; 243 } 244 245 protected String matches(String url, String prefixUri, String prefix) { 246 if (url.startsWith(prefixUri)) { 247 prefixes.put(prefix, prefixUri); 248 return prefix+":"+escape(url.substring(prefixUri.length()), false); 249 } 250 return null; 251 } 252 253 protected Complex complex() { 254 return new Complex(); 255 } 256 257 private void checkPrefix(Triple object) { 258 if (object instanceof StringType) 259 checkPrefix(((StringType) object).value); 260 else { 261 Complex obj = (Complex) object; 262 for (Predicate po : obj.predicates) { 263 checkPrefix(po.getPredicate()); 264 for (Triple o : po.getObjects()) 265 checkPrefix(o); 266 } 267 } 268 } 269 270 protected void checkPrefix(String pname) { 271 if (pname.startsWith("(")) 272 return; 273 if (pname.startsWith("\"")) 274 return; 275 if (pname.startsWith("<")) 276 return; 277 278 if (pname.contains(":")) { 279 String prefix = pname.substring(0, pname.indexOf(":")); 280 if (!prefixes.containsKey(prefix) && !prefix.equals("http")&& !prefix.equals("urn")) 281 throw new Error("undefined prefix "+prefix); 282 } 283 } 284 285 protected StringType literal(String s) { 286 return new StringType("\""+escape(s, true)+"\""); 287 } 288 289 protected StringType literalTyped(String s, String t) { 290 return new StringType("\""+escape(s, true)+"\"^^xs:"+t); 291 } 292 293 public static String escape(String s, boolean string) { 294 if (s == null) 295 return ""; 296 297 StringBuilder b = new StringBuilder(); 298 for (char c : s.toCharArray()) { 299 if (c == '\r') 300 b.append("\\r"); 301 else if (c == '\n') 302 b.append("\\n"); 303 else if (c == '"') 304 b.append("\\\""); 305 else if (c == '\\') 306 b.append("\\\\"); 307 else if (c == '/' && !string) 308 b.append("\\/"); 309 else 310 b.append(c); 311 } 312 return b.toString(); 313 } 314 315 protected String pctEncode(String s) { 316 if (s == null) 317 return ""; 318 319 StringBuilder b = new StringBuilder(); 320 for (char c : s.toCharArray()) { 321 if (c >= 'A' && c <= 'Z') 322 b.append(c); 323 else if (c >= 'a' && c <= 'z') 324 b.append(c); 325 else if (c >= '0' && c <= '9') 326 b.append(c); 327 else if (c == '.') 328 b.append(c); 329 else 330 b.append("%"+Integer.toHexString(c)); 331 } 332 return b.toString(); 333 } 334 335 protected List<String> sorted(Set<String> keys) { 336 List<String> names = new ArrayList<String>(); 337 names.addAll(keys); 338 Collections.sort(names); 339 return names; 340 } 341 342 public void commit(OutputStream destination, boolean header) throws IOException { 343 LineOutputStreamWriter writer = new LineOutputStreamWriter(destination); 344 commitPrefixes(writer, header); 345 for (Section s : sections) { 346 commitSection(writer, s); 347 } 348 writer.ln("# -------------------------------------------------------------------------------------"); 349 writer.ln(); 350 writer.flush(); 351 writer.close(); 352 } 353 354 public String asHtml() throws Exception { 355 StringBuilder b = new StringBuilder(); 356 b.append("<pre class=\"rdf\">\r\n"); 357 commitPrefixes(b); 358 for (Section s : sections) { 359 commitSection(b, s); 360 } 361 b.append("</pre>\r\n"); 362 b.append("\r\n"); 363 return b.toString(); 364 } 365 366 private void commitPrefixes(LineOutputStreamWriter writer, boolean header) throws IOException { 367 if (header) { 368 writer.ln("# FHIR Sub-definitions"); 369 writer.write("# This is work in progress, and may change rapidly \r\n"); 370 writer.ln(); 371 writer.write("# A note about policy: the focus here is providing the knowledge from \r\n"); 372 writer.write("# the FHIR specification as a set of triples for knowledge processing. \r\n"); 373 writer.write("# Where appopriate, predicates defined external to FHIR are used. \"Where \r\n"); 374 writer.write("# appropriate\" means that the predicates are a faithful representation \r\n"); 375 writer.write("# of the FHIR semantics, and do not involve insane (or owful) syntax. \r\n"); 376 writer.ln(); 377 writer.write("# Where the community agrees on additional predicate statements (such \r\n"); 378 writer.write("# as OWL constraints) these are added in addition to the direct FHIR \r\n"); 379 writer.write("# predicates \r\n"); 380 writer.ln(); 381 writer.write("# This it not a formal ontology, though it is possible it may start to become one eventually\r\n"); 382 writer.ln(); 383 writer.write("# this file refers to concepts defined in rim.ttl and to others defined elsewhere outside HL7 \r\n"); 384 writer.ln(); 385 } 386 for (String p : sorted(prefixes.keySet())) 387 writer.ln("@prefix "+p+": <"+prefixes.get(p)+"> ."); 388 writer.ln(); 389 if (header) { 390 writer.ln("# Predicates used in this file:"); 391 for (String s : sorted(predicateSet)) 392 writer.ln(" # "+s); 393 writer.ln(); 394 } 395 } 396 397 private void commitPrefixes(StringBuilder b) throws Exception { 398 for (String p : sorted(prefixes.keySet())) 399 b.append("@prefix "+p+": <"+prefixes.get(p)+"> .\r\n"); 400 b.append("\r\n"); 401 } 402 403 // private String lastSubject = null; 404 // private String lastComment = ""; 405 406 private void commitSection(LineOutputStreamWriter writer, Section section) throws IOException { 407 writer.ln("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())); 408 writer.ln(); 409 for (Subject sbj : section.subjects) { 410 if (Utilities.noString(sbj.id)) { 411 writer.write("["); 412 } else { 413 writer.write(sbj.id); 414 writer.write(" "); 415 } 416 int i = 0; 417 418 for (Predicate p : sbj.predicates) { 419 writer.write(p.getPredicate()); 420 writer.write(" "); 421 boolean first = true; 422 for (Triple o : p.getObjects()) { 423 if (first) 424 first = false; 425 else 426 writer.write(", "); 427 if (o instanceof StringType) 428 writer.write(((StringType) o).value); 429 else { 430 writer.write("["); 431 if (write((Complex) o, writer, 4)) 432 writer.write("\r\n ]"); 433 else 434 writer.write("]"); 435 } 436 } 437 String comment = p.comment == null? "" : " # "+p.comment; 438 i++; 439 if (i < sbj.predicates.size()) 440 writer.write(";"+comment+"\r\n "); 441 else { 442 if (Utilities.noString(sbj.id)) 443 writer.write("]"); 444 writer.write(" ."+comment+"\r\n\r\n"); 445 } 446 } 447 } 448 } 449 450 private void commitSection(StringBuilder b, Section section) throws Exception { 451 b.append("# - "+section.name+" "+Utilities.padLeft("", '-', 75-section.name.length())+"\r\n"); 452 b.append("\r\n"); 453 for (Subject sbj : section.subjects) { 454 b.append(Utilities.escapeXml(sbj.id)); 455 b.append(" "); 456 int i = 0; 457 458 for (Predicate p : sbj.predicates) { 459 b.append(p.makelink()); 460 b.append(" "); 461 boolean first = true; 462 for (Triple o : p.getObjects()) { 463 if (first) 464 first = false; 465 else 466 b.append(", "); 467 if (o instanceof StringType) 468 b.append(Utilities.escapeXml(((StringType) o).value)); 469 else { 470 b.append("["); 471 if (write((Complex) o, b, 4)) 472 b.append("\r\n ]"); 473 else 474 b.append("]"); 475 } 476 } 477 String comment = p.comment == null? "" : " # "+p.comment; 478 i++; 479 if (i < sbj.predicates.size()) 480 b.append(";"+Utilities.escapeXml(comment)+"\r\n "); 481 else 482 b.append("."+Utilities.escapeXml(comment)+"\r\n\r\n"); 483 } 484 } 485 } 486 487 protected class LineOutputStreamWriter extends OutputStreamWriter { 488 private LineOutputStreamWriter(OutputStream out) throws UnsupportedEncodingException { 489 super(out, "UTF-8"); 490 } 491 492 private void ln() throws IOException { 493 write("\r\n"); 494 } 495 496 private void ln(String s) throws IOException { 497 write(s); 498 write("\r\n"); 499 } 500 } 501 502 public boolean write(Complex complex, LineOutputStreamWriter writer, int indent) throws IOException { 503 if (complex.predicates.isEmpty()) 504 return false; 505 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 506 writer.write(" "+complex.predicates.get(0).predicate+" "+((StringType) complex.predicates.get(0).getObjects().get(0)).value); 507 return false; 508 } 509 String left = Utilities.padLeft("", ' ', indent); 510 int i = 0; 511 for (Predicate po : complex.predicates) { 512 writer.write("\r\n"); 513 boolean first = true; 514 for (Triple o : po.getObjects()) { 515 if (first) { 516 first = false; 517 writer.write(left+" "+po.getPredicate()+" "); 518 } else 519 writer.write(", "); 520 if (o instanceof StringType) 521 writer.write(((StringType) o).value); 522 else { 523 writer.write("["); 524 if (write((Complex) o, writer, indent+2)) 525 writer.write("\r\n"+left+" ]"); 526 else 527 writer.write(" ]"); 528 } 529 } 530 i++; 531 if (i < complex.predicates.size()) 532 writer.write(";"); 533 if (!Utilities.noString(po.comment)) 534 writer.write(" # "+escape(po.comment, false)); 535 } 536 return true; 537 } 538 539 public boolean write(Complex complex, StringBuilder b, int indent) throws Exception { 540 if (complex.predicates.isEmpty()) 541 return false; 542 if (complex.predicates.size() == 1 && complex.predicates.get(0).getObjects().size()== 1 && complex.predicates.get(0).getObjects().get(0) instanceof StringType && Utilities.noString(complex.predicates.get(0).comment)) { 543 b.append(" "+complex.predicates.get(0).makelink()+" "+Utilities.escapeXml(((StringType) complex.predicates.get(0).getObjects().get(0)).value)); 544 return false; 545 } 546 String left = Utilities.padLeft("", ' ', indent); 547 int i = 0; 548 for (Predicate po : complex.predicates) { 549 b.append("\r\n"); 550 boolean first = true; 551 for (Triple o : po.getObjects()) { 552 if (first) { 553 first = false; 554 b.append(left+" "+po.makelink()+" "); 555 } else 556 b.append(", "); 557 if (o instanceof StringType) 558 b.append(Utilities.escapeXml(((StringType) o).value)); 559 else { 560 b.append("["); 561 if (write((Complex) o, b, indent+2)) 562 b.append(left+" ]"); 563 else 564 b.append(" ]"); 565 } 566 } 567 i++; 568 if (i < complex.predicates.size()) 569 b.append(";"); 570 if (!Utilities.noString(po.comment)) 571 b.append(" # "+Utilities.escapeXml(escape(po.comment, false))); 572 } 573 return true; 574 } 575 576 577 public abstract class TTLObject { 578 protected int line; 579 protected int col; 580 581 abstract public boolean hasValue(String value); 582 583 public int getLine() { 584 return line; 585 } 586 587 public int getCol() { 588 return col; 589 } 590 591 592 } 593 594 595 public class TTLLiteral extends TTLObject { 596 597 private String value; 598 private String type; 599 protected TTLLiteral(int line, int col) { 600 this.line = line; 601 this.col = col; 602 } 603 @Override 604 public boolean hasValue(String value) { 605 return value.equals(this.value); 606 } 607 public String getValue() { 608 return value; 609 } 610 public String getType() { 611 return type; 612 } 613 614 } 615 616 public class TTLURL extends TTLObject { 617 private String uri; 618 619 protected TTLURL(int line, int col) { 620 this.line = line; 621 this.col = col; 622 } 623 624 public String getUri() { 625 return uri; 626 } 627 628 public void setUri(String uri) throws FHIRFormatError { 629 if (!uri.matches(IRI_URL)) 630 throw new FHIRFormatError("Illegal URI "+uri); 631 this.uri = uri; 632 } 633 634 @Override 635 public boolean hasValue(String value) { 636 return value.equals(this.uri); 637 } 638 } 639 640 public class TTLList extends TTLObject { 641 private List<TTLObject> list = new ArrayList<Turtle.TTLObject>(); 642 643 public TTLList(TTLObject obj) { 644 super(); 645 list.add(obj); 646 } 647 648 @Override 649 public boolean hasValue(String value) { 650 for (TTLObject obj : list) 651 if (obj.hasValue(value)) 652 return true; 653 return false; 654 } 655 656 public List<TTLObject> getList() { 657 return list; 658 } 659 660 } 661 public class TTLComplex extends TTLObject { 662 private Map<String, TTLObject> predicates = new HashMap<String, Turtle.TTLObject>(); 663 protected TTLComplex(int line, int col) { 664 this.line = line; 665 this.col = col; 666 } 667 public Map<String, TTLObject> getPredicates() { 668 return predicates; 669 } 670 @Override 671 public boolean hasValue(String value) { 672 return false; 673 } 674 public void addPredicate(String uri, TTLObject obj) { 675 if (!predicates.containsKey(uri)) 676 predicates.put(uri, obj); 677 else { 678 TTLObject eo = predicates.get(uri); 679 TTLList list = null; 680 if (eo instanceof TTLList) 681 list = (TTLList) eo; 682 else { 683 list = new TTLList(eo); 684 predicates.put(uri, list); 685 } 686 list.list.add(obj); 687 } 688 } 689 public void addPredicates(Map<String, TTLObject> values) { 690 for (String s : values.keySet()) { 691 addPredicate(s, values.get(s)); 692 } 693 } 694 } 695 696 private Map<TTLURL, TTLComplex> objects = new HashMap<TTLURL, Turtle.TTLComplex>(); 697 698 private Object base; 699 700 public enum LexerTokenType { 701 TOKEN, // [, ], :, @ 702 WORD, // a word 703 URI, // a URI <> 704 LITERAL // "..." 705 } 706 707 public class Lexer { 708 709 710 private String source; 711 private LexerTokenType type; 712 private int cursor, line, col, startLine, startCol; 713 private String token; 714 715 public Lexer(String source) throws FHIRFormatError { 716 this.source = source; 717 cursor = 0; 718 line = 1; 719 col = 1; 720 readNext(false); 721 } 722 723 private void skipWhitespace() { 724 while (cursor < source.length()) { 725 char ch = source.charAt(cursor); 726 if (Character.isWhitespace(ch)) 727 grab(); 728 else if (ch == '#') { 729 ch = grab(); 730 while (cursor < source.length()) { 731 ch = grab(); 732 if (ch == '\r' || ch == '\n') { 733 break; 734 } 735 } 736 } else 737 break; 738 } 739 } 740 741 private char grab() { 742 char c = source.charAt(cursor); 743 if (c == '\n') { 744 line++; 745 col = 1; 746 } else 747 col++; 748 749 cursor++; 750 return c; 751 } 752 753 private void readNext(boolean postColon) throws FHIRFormatError { 754 token = null; 755 type = null; 756 skipWhitespace(); 757 if (cursor >= source.length()) 758 return; 759 startLine = line; 760 startCol = col; 761 char ch = grab(); 762 StringBuilder b = new StringBuilder(); 763 switch (ch) { 764 case '@': 765 case '.': 766 case ':': 767 case ';': 768 case '^': 769 case ',': 770 case ']': 771 case '[': 772 case '(': 773 case ')': 774 type = LexerTokenType.TOKEN; 775 b.append(ch); 776 token = b.toString(); 777 return; 778 case '<': 779 while (cursor < source.length()) { 780 ch = grab(); 781 if (ch == '>') 782 break; 783 b.append(ch); 784 } 785 type = LexerTokenType.URI; 786 token = unescape(b.toString(), true); 787 return; 788 case '"': 789 b.append(ch); 790 String end = "\""; 791 while (cursor < source.length()) { 792 ch = grab(); 793 if (b.length() == 2 && ch != '"' && b.equals("\"\"")) { 794 cursor--; 795 break; 796 } 797 b.append(ch); 798 if (ch == '"') 799 if (b.toString().equals("\"\"\"")) 800 end = "\"\"\""; 801 else if (!b.toString().equals("\"\"") && b.toString().endsWith(end) && !b.toString().endsWith("\\"+end)) 802 break; 803 } 804 type = LexerTokenType.LITERAL; 805 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 806 return; 807 case '\'': 808 b.append(ch); 809 end = "'"; 810 while (cursor < source.length()) { 811 ch = grab(); 812 if (b.equals("''") && ch != '\'') { 813 cursor--; 814 break; 815 } 816 b.append(ch); 817 if (b.toString().equals("'''")) 818 end = "'''"; 819 else if (!b.toString().equals("''") && b.toString().endsWith(end)) 820 break; 821 } 822 type = LexerTokenType.LITERAL; 823 token = unescape(b.toString().substring(end.length(), b.length()-end.length()), false); 824 return; 825 default: 826 if (Utilities.charInRange(ch, '0', '9') || Utilities.charInRange(ch, 'a', 'z') || Utilities.charInRange(ch, 'A', 'Z') || Utilities.charInSet(ch, '_', '-', '+', '%')) { 827 b.append(ch); 828 while (cursor < source.length()) { 829 ch = grab(); 830 // if (!Utilities.charInRange(ch, '0', '9') && !Utilities.charInRange(ch, 'a', 'z') && !Utilities.charInRange(ch, 'A', 'Z') && !Utilities.charInSet(ch, '_', '-', '+', '.', '\\', '#')) 831 if (Character.isWhitespace(ch) || Utilities.charInSet(ch, ';', ']', ')', '~') || (( ch == ':') && !postColon)) 832 break; 833 b.append(ch); 834 } 835 type = LexerTokenType.WORD; 836 token = b.toString(); 837 cursor--; 838 return; 839 } else 840 throw error("unexpected lexer char "+ch); 841 } 842 } 843 844 private String unescape(String s, boolean isUri) throws FHIRFormatError { 845 StringBuilder b = new StringBuilder(); 846 int i = 0; 847 while (i < s.length()) { 848 char ch = s.charAt(i); 849 if (ch == '\\' && i < s.length()-1) { 850 i++; 851 switch (s.charAt(i)) { 852 case 't': 853 b.append('\t'); 854 break; 855 case 'r': 856 b.append('\r'); 857 break; 858 case 'n': 859 b.append('\n'); 860 break; 861 case 'f': 862 b.append('\f'); 863 break; 864 case '\'': 865 b.append('\''); 866 break; 867 case '\"': 868 b.append('\"'); 869 break; 870 case '\\': 871 b.append('\\'); 872 break; 873 case '/': 874 b.append('\\'); 875 break; 876 case 'U': 877 case 'u': 878 i++; 879 int l = 4; 880 int uc = Integer.parseInt(s.substring(i, i+l), 16); 881 if (uc < (isUri ? 33 : 32)) { 882 l = 8; 883 uc = Integer.parseInt(s.substring(i, i+8), 16); 884 } 885 if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) 886 throw new FHIRFormatError("Illegal unicode character"); 887 b.append((char) uc); 888 i = i + l; 889 break; 890 default: 891 throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i)); 892 } 893 } else { 894 b.append(ch); 895 } 896 i++; 897 } 898 return b.toString(); 899 } 900 901 public boolean done() { 902 return type == null; 903 } 904 905 public String next(LexerTokenType type, boolean postColon) throws FHIRFormatError { 906 if (type != null && this.type != type) 907 throw error("Unexpected type. Found "+this.type.toString()+" looking for a "+type.toString()); 908 String res = token; 909 readNext(postColon); 910 return res; 911 } 912 913 public String peek() throws Exception { 914 return token; 915 } 916 917 public LexerTokenType peekType() { 918 return type; 919 } 920 921 public void token(String token) throws FHIRFormatError { 922 if (!token.equals(this.token)) 923 throw error("Unexpected word "+this.token+" looking for "+token); 924 next(LexerTokenType.TOKEN, token.equals(":")); 925 } 926 927 public void word(String word) throws Exception { 928 if (!word.equals(this.token)) 929 throw error("Unexpected word "+this.token+" looking for "+word); 930 next(LexerTokenType.WORD, false); 931 } 932 933 public String word() throws FHIRFormatError { 934 String t = token; 935 next(LexerTokenType.WORD, false); 936 return t; 937 } 938 939 public String uri() throws FHIRFormatError { 940 if (this.type != LexerTokenType.URI) 941 throw error("Unexpected type. Found "+this.type.toString()+" looking for a URI"); 942 String t = token; 943 next(LexerTokenType.URI, false); 944 return t; 945 } 946 947 public String literal() throws FHIRFormatError { 948 if (this.type != LexerTokenType.LITERAL) 949 throw error("Unexpected type. Found "+this.type.toString()+" looking for a Literal"); 950 String t = token; 951 next(LexerTokenType.LITERAL, false); 952 return t; 953 } 954 955 public boolean peek(LexerTokenType type, String token) { 956 return this.type == type && this.token.equals(token); 957 } 958 959 public FHIRFormatError error(String message) { 960 return new FHIRFormatError("Syntax Error parsing Turtle on line "+Integer.toString(line)+" col "+Integer.toString(col)+": "+message); 961 } 962 963 } 964 // 965 // public void importTtl(Section sct, String ttl) throws Exception { 966 // if (!Utilities.noString(ttl)) { 967 // // System.out.println("import ttl: "+ttl); 968 // Lexer lexer = new Lexer(ttl); 969 // String subject = null; 970 // String predicate = null; 971 // while (!lexer.done()) { 972 // if (subject == null) 973 // subject = lexer.next(); 974 // if (predicate == null) 975 // predicate = lexer.next(); 976 // if (lexer.peekType() == null) { 977 // throw new Error("Unexpected end of input parsing turtle"); 978 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 979 // sct.triple(subject, predicate, lexer.next()); 980 // } else if (lexer.peek() == null) { 981 // throw new Error("Unexected - turtle lexer found no token"); 982 // } else if (lexer.peek().equals("[")) { 983 // sct.triple(subject, predicate, importComplex(lexer)); 984 // } else 985 // throw new Exception("Not done yet"); 986 // String n = lexer.next(); 987 // if (Utilities.noString(n)) 988 // break; 989 // if (n.equals(".")) { 990 // subject = null; 991 // predicate = null; 992 // } else if (n.equals(";")) { 993 // predicate = null; 994 // } else if (!n.equals(",")) 995 // throw new Exception("Unexpected token "+n); 996 // } 997 // } 998 // } 999 1000 public void parse(String source) throws FHIRFormatError { 1001 prefixes.clear(); 1002 prefixes.put("_", "urn:uuid:4425b440-2c33-4488-b9fc-cf9456139995#"); 1003 parse(new Lexer(source)); 1004 } 1005 1006 private void parse(Lexer lexer) throws FHIRFormatError { 1007 boolean doPrefixes = true; 1008 while (!lexer.done()) { 1009 if (doPrefixes && (lexer.peek(LexerTokenType.TOKEN, "@") || lexer.peek(LexerTokenType.WORD, "PREFIX") || lexer.peek(LexerTokenType.WORD, "BASE"))) { 1010 boolean sparqlStyle = false; 1011 boolean base = false; 1012 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1013 lexer.token("@"); 1014 String p = lexer.word(); 1015 if (p.equals("base")) 1016 base = true; 1017 else if (!p.equals("prefix")) 1018 throw new FHIRFormatError("Unexpected token "+p); 1019 } else { 1020 sparqlStyle = true; 1021 String p = lexer.word(); 1022 if (p.equals("BASE")) 1023 base = true; 1024 else if (!p.equals("PREFIX")) 1025 throw new FHIRFormatError("Unexpected token "+p); 1026 } 1027 String prefix = null; 1028 if (!base) { 1029 prefix = lexer.peekType() == LexerTokenType.WORD ? lexer.next(LexerTokenType.WORD, false) : null; 1030 lexer.token(":"); 1031 } 1032 String url = lexer.next(LexerTokenType.URI, false); 1033 if (!sparqlStyle) 1034 lexer.token("."); 1035 if (!base) 1036 prefix(prefix, url); 1037 else if (this.base == null) 1038 this.base = url; 1039 else 1040 throw new FHIRFormatError("Duplicate @base"); 1041 } else if (lexer.peekType() == LexerTokenType.URI) { 1042 doPrefixes = false; 1043 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1044 uri.setUri(lexer.uri()); 1045 TTLComplex complex = parseComplex(lexer); 1046 objects.put(uri, complex); 1047 lexer.token("."); 1048 } else if (lexer.peekType() == LexerTokenType.WORD) { 1049 doPrefixes = false; 1050 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1051 String pfx = lexer.word(); 1052 if (!prefixes.containsKey(pfx)) 1053 throw new FHIRFormatError("Unknown prefix "+pfx); 1054 lexer.token(":"); 1055 uri.setUri(prefixes.get(pfx)+lexer.word()); 1056 TTLComplex complex = parseComplex(lexer); 1057 objects.put(uri, complex); 1058 lexer.token("."); 1059 } else if (lexer.peek(LexerTokenType.TOKEN, ":")) { 1060 doPrefixes = false; 1061 TTLURL uri = new TTLURL(lexer.startLine, lexer.startCol); 1062 lexer.token(":"); 1063 if (!prefixes.containsKey(null)) 1064 throw new FHIRFormatError("Unknown prefix ''"); 1065 uri.setUri(prefixes.get(null)+lexer.word()); 1066 TTLComplex complex = parseComplex(lexer); 1067 objects.put(uri, complex); 1068 lexer.token("."); 1069 } else if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1070 doPrefixes = false; 1071 lexer.token("["); 1072 TTLComplex bnode = parseComplex(lexer); 1073 lexer.token("]"); 1074 TTLComplex complex = null; 1075 if (!lexer.peek(LexerTokenType.TOKEN, ".")) { 1076 complex = parseComplex(lexer); 1077 // at this point, we collapse bnode and complex, and give bnode a fictional identity 1078 bnode.addPredicates(complex.predicates); 1079 } 1080 1081 objects.put(anonymousId(), bnode); 1082 lexer.token("."); 1083 } else 1084 throw lexer.error("Unknown token "+lexer.token); 1085 } 1086 } 1087 1088 private TTLURL anonymousId() throws FHIRFormatError { 1089 TTLURL url = new TTLURL(-1, -1); 1090 url.setUri("urn:uuid:"+UUID.randomUUID().toString().toLowerCase()); 1091 return url; 1092 } 1093 1094 private TTLComplex parseComplex(Lexer lexer) throws FHIRFormatError { 1095 TTLComplex result = new TTLComplex(lexer.startLine, lexer.startCol); 1096 1097 boolean done = lexer.peek(LexerTokenType.TOKEN, "]"); 1098 while (!done) { 1099 String uri = null; 1100 if (lexer.peekType() == LexerTokenType.URI) 1101 uri = lexer.uri(); 1102 else { 1103 String t = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1104 if (lexer.type == LexerTokenType.TOKEN && lexer.token.equals(":")) { 1105 lexer.token(":"); 1106 if (!prefixes.containsKey(t)) 1107 throw new FHIRFormatError("unknown prefix "+t); 1108 uri = prefixes.get(t)+lexer.word(); 1109 } else if (t.equals("a")) 1110 uri = prefixes.get("rdfs")+"type"; 1111 else 1112 throw lexer.error("unexpected token"); 1113 } 1114 1115 boolean inlist = false; 1116 if (lexer.peek(LexerTokenType.TOKEN, "(")) { 1117 inlist = true; 1118 lexer.token("("); 1119 } 1120 1121 boolean rpt = false; 1122 do { 1123 if (lexer.peek(LexerTokenType.TOKEN, "[")) { 1124 lexer.token("["); 1125 result.addPredicate(uri, parseComplex(lexer)); 1126 lexer.token("]"); 1127 } else if (lexer.peekType() == LexerTokenType.URI) { 1128 TTLURL u = new TTLURL(lexer.startLine, lexer.startCol); 1129 u.setUri(lexer.uri()); 1130 result.addPredicate(uri, u); 1131 } else if (lexer.peekType() == LexerTokenType.LITERAL) { 1132 TTLLiteral u = new TTLLiteral(lexer.startLine, lexer.startCol); 1133 u.value = lexer.literal(); 1134 if (lexer.peek(LexerTokenType.TOKEN, "^")) { 1135 lexer.token("^"); 1136 lexer.token("^"); 1137 if (lexer.peekType() == LexerTokenType.URI) { 1138 u.type = lexer.uri(); 1139 } else { 1140 String l = lexer.word(); 1141 lexer.token(":"); 1142 u.type = prefixes.get(l)+ lexer.word(); 1143 } 1144 } 1145 if (lexer.peek(LexerTokenType.TOKEN, "@")) { 1146 //lang tag - skip it 1147 lexer.token("@"); 1148 String lang = lexer.word(); 1149 if (!lang.matches(LANG_REGEX)) { 1150 throw new FHIRFormatError("Invalid Language tag "+lang); 1151 } 1152 } 1153 result.addPredicate(uri, u); 1154 } else if (lexer.peekType() == LexerTokenType.WORD || lexer.peek(LexerTokenType.TOKEN, ":")) { 1155 int sl = lexer.startLine; 1156 int sc = lexer.startCol; 1157 String pfx = lexer.peekType() == LexerTokenType.WORD ? lexer.word() : null; 1158 if (Utilities.isDecimal(pfx) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1159 TTLLiteral u = new TTLLiteral(sl, sc); 1160 u.value = pfx; 1161 result.addPredicate(uri, u); 1162 } else if (("false".equals(pfx) || "true".equals(pfx)) && !lexer.peek(LexerTokenType.TOKEN, ":")) { 1163 TTLLiteral u = new TTLLiteral(sl, sc); 1164 u.value = pfx; 1165 result.addPredicate(uri, u); 1166 } else { 1167 if (!prefixes.containsKey(pfx)) 1168 throw new FHIRFormatError("Unknown prefix "+(pfx == null ? "''" : pfx)); 1169 TTLURL u = new TTLURL(sl, sc); 1170 lexer.token(":"); 1171 u.setUri(prefixes.get(pfx)+lexer.word()); 1172 result.addPredicate(uri, u); 1173 } 1174 } else if (!lexer.peek(LexerTokenType.TOKEN, ";") && (!inlist || !lexer.peek(LexerTokenType.TOKEN, ")"))) { 1175 throw new FHIRFormatError("unexpected token "+lexer.token); 1176 } 1177 1178 if (inlist) 1179 rpt = !lexer.peek(LexerTokenType.TOKEN, ")"); 1180 else { 1181 rpt = lexer.peek(LexerTokenType.TOKEN, ","); 1182 if (rpt) 1183 lexer.readNext(false); 1184 } 1185 } while (rpt); 1186 if (inlist) 1187 lexer.token(")"); 1188 1189 if (lexer.peek(LexerTokenType.TOKEN, ";")) { 1190 while ((lexer.peek(LexerTokenType.TOKEN, ";"))) 1191 lexer.token(";"); 1192 done = lexer.peek(LexerTokenType.TOKEN, ".") || lexer.peek(LexerTokenType.TOKEN, "]"); 1193 } else { 1194 done = true; 1195 } 1196 } 1197 return result; 1198 } 1199 1200 public Map<TTLURL, TTLComplex> getObjects() { 1201 return objects; 1202 } 1203 1204 public TTLComplex getObject(String url) { 1205 for (TTLURL t : objects.keySet()) { 1206 if (t.getUri().equals(url)) 1207 return objects.get(t); 1208 } 1209 return null; 1210 } 1211 1212 // public void parseFragment(Lexer lexer) throws Exception { 1213 // lexer.next(); // read [ 1214 // Complex obj = new Complex(); 1215 // while (!lexer.peek().equals("]")) { 1216 // String predicate = lexer.next(); 1217 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1218 // obj.predicate(predicate, lexer.next()); 1219 // } else if (lexer.peek().equals("[")) { 1220 // obj.predicate(predicate, importComplex(lexer)); 1221 // } else 1222 // throw new Exception("Not done yet"); 1223 // if (lexer.peek().equals(";")) 1224 // lexer.next(); 1225 // } 1226 // lexer.next(); // read ] 1227 // //return obj; 1228 // } 1229 // 1230 // public void importTtl(Section sct, String ttl) throws Exception { 1231 // if (!Utilities.noString(ttl)) { 1232 // // System.out.println("import ttl: "+ttl); 1233 // Lexer lexer = new Lexer(ttl); 1234 // String subject = null; 1235 // String predicate = null; 1236 // while (!lexer.done()) { 1237 // if (subject == null) 1238 // subject = lexer.next(); 1239 // if (predicate == null) 1240 // predicate = lexer.next(); 1241 // if (lexer.peekType() == null) { 1242 // throw new Error("Unexpected end of input parsing turtle"); 1243 // } if (lexer.peekType() == LexerTokenType.TOKEN) { 1244 // sct.triple(subject, predicate, lexer.next()); 1245 // } else if (lexer.peek() == null) { 1246 // throw new Error("Unexected - turtle lexer found no token"); 1247 // } else if (lexer.peek().equals("[")) { 1248 // sct.triple(subject, predicate, importComplex(lexer)); 1249 // } else 1250 // throw new Exception("Not done yet"); 1251 // String n = lexer.next(); 1252 // if (Utilities.noString(n)) 1253 // break; 1254 // if (n.equals(".")) { 1255 // subject = null; 1256 // predicate = null; 1257 // } else if (n.equals(";")) { 1258 // predicate = null; 1259 // } else if (!n.equals(",")) 1260 // throw new Exception("Unexpected token "+n); 1261 // } 1262 // } 1263 //} 1264 1265 // private Complex importComplex(Lexer lexer) throws Exception { 1266 // lexer.next(); // read [ 1267 // Complex obj = new Complex(); 1268 // while (!lexer.peek().equals("]")) { 1269 // String predicate = lexer.next(); 1270 // if (lexer.peekType() == LexerTokenType.TOKEN || lexer.peekType() == LexerTokenType.LITERAL) { 1271 // obj.predicate(predicate, lexer.next()); 1272 // } else if (lexer.peek().equals("[")) { 1273 // obj.predicate(predicate, importComplex(lexer)); 1274 // } else 1275 // throw new Exception("Not done yet"); 1276 // if (lexer.peek().equals(";")) 1277 // lexer.next(); 1278 // } 1279 // lexer.next(); // read ] 1280 // return obj; 1281 // } 1282 1283}