001package org.hl7.fhir.dstu3.elementmodel; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.util.ArrayList; 038import java.util.Collections; 039import java.util.Comparator; 040import java.util.List; 041 042import javax.xml.parsers.DocumentBuilder; 043import javax.xml.parsers.DocumentBuilderFactory; 044import javax.xml.parsers.SAXParser; 045import javax.xml.parsers.SAXParserFactory; 046import javax.xml.transform.Transformer; 047import javax.xml.transform.TransformerFactory; 048import javax.xml.transform.dom.DOMResult; 049import javax.xml.transform.sax.SAXSource; 050 051import org.hl7.fhir.dstu3.context.IWorkerContext; 052import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement; 053import org.hl7.fhir.dstu3.formats.FormatUtilities; 054import org.hl7.fhir.dstu3.formats.IParser.OutputStyle; 055import org.hl7.fhir.dstu3.model.DateTimeType; 056import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation; 057import org.hl7.fhir.dstu3.model.Enumeration; 058import org.hl7.fhir.dstu3.model.StructureDefinition; 059import org.hl7.fhir.dstu3.utils.ToolingExtensions; 060import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator; 061import org.hl7.fhir.dstu3.utils.formats.XmlLocationData; 062import org.hl7.fhir.exceptions.DefinitionException; 063import org.hl7.fhir.exceptions.FHIRException; 064import org.hl7.fhir.exceptions.FHIRFormatError; 065import org.hl7.fhir.utilities.Utilities; 066import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 068import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 069import org.hl7.fhir.utilities.xhtml.XhtmlNode; 070import org.hl7.fhir.utilities.xhtml.XhtmlParser; 071import org.hl7.fhir.utilities.xml.IXMLWriter; 072import org.hl7.fhir.utilities.xml.XMLUtil; 073import org.hl7.fhir.utilities.xml.XMLWriter; 074import org.w3c.dom.Document; 075import org.w3c.dom.Node; 076import org.xml.sax.InputSource; 077import org.xml.sax.XMLReader; 078 079public class XmlParser extends ParserBase { 080 private boolean allowXsiLocation; 081 082 public XmlParser(IWorkerContext context) { 083 super(context); 084 } 085 086 087 public boolean isAllowXsiLocation() { 088 return allowXsiLocation; 089 } 090 091 public void setAllowXsiLocation(boolean allowXsiLocation) { 092 this.allowXsiLocation = allowXsiLocation; 093 } 094 095 096 public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 097 Document doc = null; 098 try { 099 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 100 // xxe protection 101 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 102 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 103 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 104 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 105 factory.setXIncludeAware(false); 106 factory.setExpandEntityReferences(false); 107 108 factory.setNamespaceAware(true); 109 if (policy == ValidationPolicy.EVERYTHING) { 110 // use a slower parser that keeps location data 111 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 112 Transformer nullTransformer = transformerFactory.newTransformer(); 113 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 114 doc = docBuilder.newDocument(); 115 DOMResult domResult = new DOMResult(doc); 116 SAXParserFactory spf = SAXParserFactory.newInstance(); 117 spf.setNamespaceAware(true); 118 spf.setValidating(false); 119 // xxe protection 120 spf.setFeature("http://xml.org/sax/features/external-general-entities", false); 121 spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 122 SAXParser saxParser = spf.newSAXParser(); 123 XMLReader xmlReader = saxParser.getXMLReader(); 124 // xxe protection 125 xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false); 126 xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 127 128 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 129 InputSource inputSource = new InputSource(stream); 130 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 131 nullTransformer.transform(saxSource, domResult); 132 } else { 133 DocumentBuilder builder = factory.newDocumentBuilder(); 134 doc = builder.parse(stream); 135 } 136 } catch (Exception e) { 137 logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 138 doc = null; 139 } 140 if (doc == null) 141 return null; 142 else 143 return parse(doc); 144 } 145 146 private void checkForProcessingInstruction(Document document) throws FHIRFormatError { 147 if (policy == ValidationPolicy.EVERYTHING) { 148 Node node = document.getFirstChild(); 149 while (node != null) { 150 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 151 logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR); 152 node = node.getNextSibling(); 153 } 154 } 155 } 156 157 158 private int line(Node node) { 159 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 160 return loc == null ? 0 : loc.getStartLine(); 161 } 162 163 private int col(Node node) { 164 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 165 return loc == null ? 0 : loc.getStartColumn(); 166 } 167 168 public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 169 checkForProcessingInstruction(doc); 170 org.w3c.dom.Element element = doc.getDocumentElement(); 171 return parse(element); 172 } 173 174 public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 175 String ns = element.getNamespaceURI(); 176 String name = element.getLocalName(); 177 String path = "/"+pathPrefix(ns)+name; 178 179 StructureDefinition sd = getDefinition(line(element), col(element), ns, name); 180 if (sd == null) 181 return null; 182 183 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 184 checkElement(element, path, result.getProperty()); 185 result.markLocation(line(element), col(element)); 186 result.setType(element.getLocalName()); 187 parseChildren(path, element, result); 188 result.numberChildren(); 189 return result; 190 } 191 192 private String pathPrefix(String ns) { 193 if (Utilities.noString(ns)) 194 return ""; 195 if (ns.equals(FormatUtilities.FHIR_NS)) 196 return "f:"; 197 if (ns.equals(FormatUtilities.XHTML_NS)) 198 return "h:"; 199 if (ns.equals("urn:hl7-org:v3")) 200 return "v3:"; 201 return "?:"; 202 } 203 204 private boolean empty(org.w3c.dom.Element element) { 205 for (int i = 0; i < element.getAttributes().getLength(); i++) { 206 String n = element.getAttributes().item(i).getNodeName(); 207 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 208 return false; 209 } 210 if (!Utilities.noString(element.getTextContent().trim())) 211 return false; 212 213 Node n = element.getFirstChild(); 214 while (n != null) { 215 if (n.getNodeType() == Node.ELEMENT_NODE) 216 return false; 217 n = n.getNextSibling(); 218 } 219 return true; 220 } 221 222 private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError { 223 if (policy == ValidationPolicy.EVERYTHING) { 224 if (empty(element)) 225 logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR); 226 String ns = FormatUtilities.FHIR_NS; 227 if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 228 ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 229 else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 230 ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 231 if (!element.getNamespaceURI().equals(ns)) 232 logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR); 233 } 234 } 235 236 public Element parse(org.w3c.dom.Element base, String type) throws Exception { 237 StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type); 238 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 239 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 240 checkElement(base, path, result.getProperty()); 241 result.setType(base.getLocalName()); 242 parseChildren(path, base, result); 243 result.numberChildren(); 244 return result; 245 } 246 247 private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException { 248 // this parsing routine retains the original order in a the XML file, to support validation 249 reapComments(node, context); 250 List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node)); 251 252 String text = XMLUtil.getDirectText(node).trim(); 253 if (!Utilities.noString(text)) { 254 Property property = getTextProp(properties); 255 if (property != null) { 256 context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node))); 257 } else { 258 logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR); 259 } 260 } 261 262 for (int i = 0; i < node.getAttributes().getLength(); i++) { 263 Node attr = node.getAttributes().item(i); 264 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 265 Property property = getAttrProp(properties, attr.getNodeName()); 266 if (property != null) { 267 String av = attr.getNodeValue(); 268 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat")) 269 av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av); 270 if (property.getName().equals("value") && context.isPrimitive()) 271 context.setValue(av); 272 else 273 context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node))); 274 } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) { 275 logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR); 276 } 277 } 278 } 279 280 Node child = node.getFirstChild(); 281 while (child != null) { 282 if (child.getNodeType() == Node.ELEMENT_NODE) { 283 Property property = getElementProp(properties, child.getLocalName()); 284 if (property != null) { 285 if (!property.isChoice() && "xhtml".equals(property.getType())) { 286 XhtmlNode xhtml = new XhtmlParser().setValidatorMode(true).parseHtmlNode((org.w3c.dom.Element) child); 287 context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child))); 288 } else { 289 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 290 Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child)); 291 checkElement((org.w3c.dom.Element) child, npath, n.getProperty()); 292 boolean ok = true; 293 if (property.isChoice()) { 294 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 295 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 296 if (xsiType == null) { 297 logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR); 298 ok = false; 299 } else { 300 if (xsiType.contains(":")) 301 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 302 n.setType(xsiType); 303 } 304 } else 305 n.setType(n.getType()); 306 } 307 context.getChildren().add(n); 308 if (ok) { 309 if (property.isResource()) 310 parseResource(npath, (org.w3c.dom.Element) child, n, property); 311 else 312 parseChildren(npath, (org.w3c.dom.Element) child, n); 313 } 314 } 315 } else 316 logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR); 317 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){ 318 logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR); 319 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 320 logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR); 321 } 322 child = child.getNextSibling(); 323 } 324 } 325 326 private Property getElementProp(List<Property> properties, String nodeName) { 327 List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties); 328 // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x] 329 // and therefore the longer property names get evaluated first 330 Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() { 331 @Override 332 public int compare(Property o1, Property o2) { 333 return o2.getName().length() - o1.getName().length(); 334 } 335 }); 336 for (Property p : propsSortedByLongestFirst) 337 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 338 if (p.getName().equals(nodeName)) 339 return p; 340 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 341 return p; 342 } 343 return null; 344 } 345 346 private Property getAttrProp(List<Property> properties, String nodeName) { 347 for (Property p : properties) 348 if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 349 return p; 350 return null; 351 } 352 353 private Property getTextProp(List<Property> properties) { 354 for (Property p : properties) 355 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 356 return p; 357 return null; 358 } 359 360 private String convertForDateFormat(String fmt, String av) throws FHIRException { 361 if ("v3".equals(fmt)) { 362 DateTimeType d = DateTimeType.parseV3(av); 363 return d.asStringValue(); 364 } else 365 throw new FHIRException("Unknown Data format '"+fmt+"'"); 366 } 367 368 private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 369 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 370 String name = res.getLocalName(); 371 StructureDefinition sd = context.fetchTypeDefinition(name); 372 if (sd == null) 373 throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')"); 374 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty); 375 parent.setType(name); 376 parseChildren(res.getLocalName(), res, parent); 377 } 378 379 private void reapComments(org.w3c.dom.Element element, Element context) { 380 Node node = element.getPreviousSibling(); 381 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 382 if (node.getNodeType() == Node.COMMENT_NODE) 383 context.getComments().add(0, node.getTextContent()); 384 node = node.getPreviousSibling(); 385 } 386 node = element.getLastChild(); 387 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 388 node = node.getPreviousSibling(); 389 } 390 while (node != null) { 391 if (node.getNodeType() == Node.COMMENT_NODE) 392 context.getComments().add(node.getTextContent()); 393 node = node.getNextSibling(); 394 } 395 } 396 397 private boolean isAttr(Property property) { 398 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 399 if (r.getValue() == PropertyRepresentation.XMLATTR) { 400 return true; 401 } 402 } 403 return false; 404 } 405 406 private boolean isText(Property property) { 407 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 408 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 409 return true; 410 } 411 } 412 return false; 413 } 414 415 @Override 416 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException { 417 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 418 xml.setPretty(style == OutputStyle.PRETTY); 419 xml.start(); 420 xml.setDefaultNamespace(e.getProperty().getNamespace()); 421 composeElement(xml, e, e.getType()); 422 xml.end(); 423 424 } 425 426 public void compose(Element e, IXMLWriter xml) throws Exception { 427 xml.start(); 428 xml.setDefaultNamespace(e.getProperty().getNamespace()); 429 composeElement(xml, e, e.getType()); 430 xml.end(); 431 } 432 433 private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException { 434 for (String s : element.getComments()) { 435 xml.comment(s, true); 436 } 437 if (isText(element.getProperty())) { 438 if (linkResolver != null) 439 xml.link(linkResolver.resolveProperty(element.getProperty())); 440 xml.enter(elementName); 441 xml.text(element.getValue()); 442 xml.exit(elementName); 443 } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) { 444 if (element.getType().equals("xhtml")) { 445 xml.escapedText(element.getValue()); 446 } else if (isText(element.getProperty())) { 447 if (linkResolver != null) 448 xml.link(linkResolver.resolveProperty(element.getProperty())); 449 xml.text(element.getValue()); 450 } else { 451 if (element.hasValue()) { 452 if (linkResolver != null) 453 xml.link(linkResolver.resolveType(element.getType())); 454 xml.attribute("value", element.getValue()); 455 } 456 if (linkResolver != null) 457 xml.link(linkResolver.resolveProperty(element.getProperty())); 458 if (element.hasChildren()) { 459 xml.enter(elementName); 460 for (Element child : element.getChildren()) 461 composeElement(xml, child, child.getName()); 462 xml.exit(elementName); 463 } else 464 xml.element(elementName); 465 } 466 } else { 467 for (Element child : element.getChildren()) { 468 if (isAttr(child.getProperty())) { 469 if (linkResolver != null) 470 xml.link(linkResolver.resolveType(child.getType())); 471 xml.attribute(child.getName(), child.getValue()); 472 } 473 } 474 if (linkResolver != null) 475 xml.link(linkResolver.resolveProperty(element.getProperty())); 476 xml.enter(elementName); 477 if (element.getSpecial() != null) { 478 if (linkResolver != null) 479 xml.link(linkResolver.resolveProperty(element.getProperty())); 480 xml.enter(element.getType()); 481 } 482 for (Element child : element.getChildren()) { 483 if (isText(child.getProperty())) { 484 if (linkResolver != null) 485 xml.link(linkResolver.resolveProperty(element.getProperty())); 486 xml.text(child.getValue()); 487 } else if (!isAttr(child.getProperty())) 488 composeElement(xml, child, child.getName()); 489 } 490 if (element.getSpecial() != null) 491 xml.exit(element.getType()); 492 xml.exit(elementName); 493 } 494 } 495 496}