001package org.hl7.fhir.r4.elementmodel; 002 003import java.io.IOException; 004import java.io.InputStream; 005import java.io.OutputStream; 006import java.io.UnsupportedEncodingException; 007import java.util.Collections; 008import java.util.Comparator; 009import java.util.List; 010import java.util.ArrayList; 011 012import javax.xml.parsers.DocumentBuilder; 013import javax.xml.parsers.DocumentBuilderFactory; 014import javax.xml.parsers.SAXParser; 015import javax.xml.parsers.SAXParserFactory; 016import javax.xml.transform.Transformer; 017import javax.xml.transform.TransformerFactory; 018import javax.xml.transform.dom.DOMResult; 019import javax.xml.transform.sax.SAXSource; 020 021import org.hl7.fhir.r4.conformance.ProfileUtilities; 022import org.hl7.fhir.r4.context.IWorkerContext; 023import org.hl7.fhir.r4.elementmodel.Element.SpecialElement; 024import org.hl7.fhir.r4.formats.FormatUtilities; 025import org.hl7.fhir.r4.formats.IParser.OutputStyle; 026import org.hl7.fhir.r4.model.DateTimeType; 027import org.hl7.fhir.r4.model.ElementDefinition.PropertyRepresentation; 028import org.hl7.fhir.r4.model.Enumeration; 029import org.hl7.fhir.r4.model.StructureDefinition; 030import org.hl7.fhir.r4.utils.ToolingExtensions; 031import org.hl7.fhir.r4.utils.formats.XmlLocationAnnotator; 032import org.hl7.fhir.r4.utils.formats.XmlLocationData; 033import org.hl7.fhir.exceptions.DefinitionException; 034import org.hl7.fhir.exceptions.FHIRException; 035import org.hl7.fhir.exceptions.FHIRFormatError; 036import org.hl7.fhir.utilities.Utilities; 037import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 038import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 039import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 040import org.hl7.fhir.utilities.xhtml.XhtmlNode; 041import org.hl7.fhir.utilities.xhtml.XhtmlParser; 042import org.hl7.fhir.utilities.xml.IXMLWriter; 043import org.hl7.fhir.utilities.xml.XMLUtil; 044import org.hl7.fhir.utilities.xml.XMLWriter; 045import org.w3c.dom.Document; 046import org.w3c.dom.Node; 047import org.xml.sax.InputSource; 048import org.xml.sax.XMLReader; 049 050public class XmlParser extends ParserBase { 051 private boolean allowXsiLocation; 052 053 public XmlParser(IWorkerContext context) { 054 super(context); 055 } 056 057 058 public boolean isAllowXsiLocation() { 059 return allowXsiLocation; 060 } 061 062 public void setAllowXsiLocation(boolean allowXsiLocation) { 063 this.allowXsiLocation = allowXsiLocation; 064 } 065 066 067 public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 068 Document doc = null; 069 try { 070 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 071 // xxe protection 072 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 073 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 074 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 075 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 076 factory.setXIncludeAware(false); 077 factory.setExpandEntityReferences(false); 078 079 factory.setNamespaceAware(true); 080 if (policy == ValidationPolicy.EVERYTHING) { 081 // use a slower parser that keeps location data 082 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 083 Transformer nullTransformer = transformerFactory.newTransformer(); 084 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 085 doc = docBuilder.newDocument(); 086 DOMResult domResult = new DOMResult(doc); 087 SAXParserFactory spf = SAXParserFactory.newInstance(); 088 spf.setNamespaceAware(true); 089 spf.setValidating(false); 090 // xxe protection 091 spf.setFeature("http://xml.org/sax/features/external-general-entities", false); 092 spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 093 SAXParser saxParser = spf.newSAXParser(); 094 XMLReader xmlReader = saxParser.getXMLReader(); 095 // xxe protection 096 xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false); 097 xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 098 099 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 100 InputSource inputSource = new InputSource(stream); 101 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 102 nullTransformer.transform(saxSource, domResult); 103 } else { 104 DocumentBuilder builder = factory.newDocumentBuilder(); 105 doc = builder.parse(stream); 106 } 107 } catch (Exception e) { 108 logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 109 doc = null; 110 } 111 if (doc == null) 112 return null; 113 else 114 return parse(doc); 115 } 116 117 private void checkForProcessingInstruction(Document document) throws FHIRFormatError { 118 if (policy == ValidationPolicy.EVERYTHING) { 119 Node node = document.getFirstChild(); 120 while (node != null) { 121 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 122 logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR); 123 node = node.getNextSibling(); 124 } 125 } 126 } 127 128 129 private int line(Node node) { 130 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 131 return loc == null ? 0 : loc.getStartLine(); 132 } 133 134 private int col(Node node) { 135 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 136 return loc == null ? 0 : loc.getStartColumn(); 137 } 138 139 public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 140 checkForProcessingInstruction(doc); 141 org.w3c.dom.Element element = doc.getDocumentElement(); 142 return parse(element); 143 } 144 145 public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 146 String ns = element.getNamespaceURI(); 147 String name = element.getLocalName(); 148 String path = "/"+pathPrefix(ns)+name; 149 150 StructureDefinition sd = getDefinition(line(element), col(element), ns, name); 151 if (sd == null) 152 return null; 153 154 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 155 checkElement(element, path, result.getProperty()); 156 result.markLocation(line(element), col(element)); 157 result.setType(element.getLocalName()); 158 parseChildren(path, element, result); 159 result.numberChildren(); 160 return result; 161 } 162 163 private String pathPrefix(String ns) { 164 if (Utilities.noString(ns)) 165 return ""; 166 if (ns.equals(FormatUtilities.FHIR_NS)) 167 return "f:"; 168 if (ns.equals(FormatUtilities.XHTML_NS)) 169 return "h:"; 170 if (ns.equals("urn:hl7-org:v3")) 171 return "v3:"; 172 return "?:"; 173 } 174 175 private boolean empty(org.w3c.dom.Element element) { 176 for (int i = 0; i < element.getAttributes().getLength(); i++) { 177 String n = element.getAttributes().item(i).getNodeName(); 178 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 179 return false; 180 } 181 if (!Utilities.noString(element.getTextContent().trim())) 182 return false; 183 184 Node n = element.getFirstChild(); 185 while (n != null) { 186 if (n.getNodeType() == Node.ELEMENT_NODE) 187 return false; 188 n = n.getNextSibling(); 189 } 190 return true; 191 } 192 193 private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError { 194 if (policy == ValidationPolicy.EVERYTHING) { 195 if (empty(element)) 196 logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR); 197 String ns = FormatUtilities.FHIR_NS; 198 if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 199 ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 200 else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 201 ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 202 if (!element.getNamespaceURI().equals(ns)) 203 logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR); 204 } 205 } 206 207 public Element parse(org.w3c.dom.Element base, String type) throws Exception { 208 StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type); 209 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 210 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 211 checkElement(base, path, result.getProperty()); 212 result.setType(base.getLocalName()); 213 parseChildren(path, base, result); 214 result.numberChildren(); 215 return result; 216 } 217 218 private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException { 219 // this parsing routine retains the original order in a the XML file, to support validation 220 reapComments(node, context); 221 List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node)); 222 223 String text = XMLUtil.getDirectText(node).trim(); 224 if (!Utilities.noString(text)) { 225 Property property = getTextProp(properties); 226 if (property != null) { 227 context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node))); 228 } else { 229 logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR); 230 } 231 } 232 233 for (int i = 0; i < node.getAttributes().getLength(); i++) { 234 Node attr = node.getAttributes().item(i); 235 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 236 Property property = getAttrProp(properties, attr.getNodeName()); 237 if (property != null) { 238 String av = attr.getNodeValue(); 239 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat")) 240 av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av); 241 if (property.getName().equals("value") && context.isPrimitive()) 242 context.setValue(av); 243 else 244 context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node))); 245 } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) { 246 logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName()+" for type "+context.fhirType()+" (properties = "+properties+")", IssueSeverity.ERROR); 247 } 248 } 249 } 250 251 Node child = node.getFirstChild(); 252 while (child != null) { 253 if (child.getNodeType() == Node.ELEMENT_NODE) { 254 Property property = getElementProp(properties, child.getLocalName()); 255 if (property != null) { 256 if (!property.isChoice() && "xhtml".equals(property.getType())) { 257 XhtmlNode xhtml = new XhtmlParser().setValidatorMode(true).parseHtmlNode((org.w3c.dom.Element) child); 258 context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child))); 259 } else { 260 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 261 Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child)); 262 checkElement((org.w3c.dom.Element) child, npath, n.getProperty()); 263 boolean ok = true; 264 if (property.isChoice()) { 265 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 266 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 267 if (xsiType == null) { 268 logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR); 269 ok = false; 270 } else { 271 if (xsiType.contains(":")) 272 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 273 n.setType(xsiType); 274 } 275 } else 276 n.setType(n.getType()); 277 } 278 context.getChildren().add(n); 279 if (ok) { 280 if (property.isResource()) 281 parseResource(npath, (org.w3c.dom.Element) child, n, property); 282 else 283 parseChildren(npath, (org.w3c.dom.Element) child, n); 284 } 285 } 286 } else 287 logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR); 288 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){ 289 logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR); 290 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 291 logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR); 292 } 293 child = child.getNextSibling(); 294 } 295 } 296 297 private Property getElementProp(List<Property> properties, String nodeName) { 298 List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties); 299 // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x] 300 // and therefore the longer property names get evaluated first 301 Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() { 302 @Override 303 public int compare(Property o1, Property o2) { 304 return o2.getName().length() - o1.getName().length(); 305 } 306 }); 307 for (Property p : propsSortedByLongestFirst) 308 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 309 if (p.getName().equals(nodeName)) 310 return p; 311 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 312 return p; 313 } 314 return null; 315 } 316 317 private Property getAttrProp(List<Property> properties, String nodeName) { 318 for (Property p : properties) 319 if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 320 return p; 321 return null; 322 } 323 324 private Property getTextProp(List<Property> properties) { 325 for (Property p : properties) 326 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 327 return p; 328 return null; 329 } 330 331 private String convertForDateFormat(String fmt, String av) throws FHIRException { 332 if ("v3".equals(fmt)) { 333 DateTimeType d = DateTimeType.parseV3(av); 334 return d.asStringValue(); 335 } else 336 throw new FHIRException("Unknown Data format '"+fmt+"'"); 337 } 338 339 private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException { 340 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 341 String name = res.getLocalName(); 342 StructureDefinition sd = context.fetchResource(StructureDefinition.class, ProfileUtilities.sdNs(name)); 343 if (sd == null) 344 throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')"); 345 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty); 346 parent.setType(name); 347 parseChildren(res.getLocalName(), res, parent); 348 } 349 350 private void reapComments(org.w3c.dom.Element element, Element context) { 351 Node node = element.getPreviousSibling(); 352 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 353 if (node.getNodeType() == Node.COMMENT_NODE) 354 context.getComments().add(0, node.getTextContent()); 355 node = node.getPreviousSibling(); 356 } 357 node = element.getLastChild(); 358 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 359 node = node.getPreviousSibling(); 360 } 361 while (node != null) { 362 if (node.getNodeType() == Node.COMMENT_NODE) 363 context.getComments().add(node.getTextContent()); 364 node = node.getNextSibling(); 365 } 366 } 367 368 private boolean isAttr(Property property) { 369 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 370 if (r.getValue() == PropertyRepresentation.XMLATTR) { 371 return true; 372 } 373 } 374 return false; 375 } 376 377 private boolean isText(Property property) { 378 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 379 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 380 return true; 381 } 382 } 383 return false; 384 } 385 386 @Override 387 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException { 388 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 389 xml.setPretty(style == OutputStyle.PRETTY); 390 xml.start(); 391 xml.setDefaultNamespace(e.getProperty().getNamespace()); 392 composeElement(xml, e, e.getType(), true); 393 xml.end(); 394 395 } 396 397 public void compose(Element e, IXMLWriter xml) throws Exception { 398 xml.start(); 399 xml.setDefaultNamespace(e.getProperty().getNamespace()); 400 composeElement(xml, e, e.getType(), true); 401 xml.end(); 402 } 403 404 private void composeElement(IXMLWriter xml, Element element, String elementName, boolean root) throws IOException { 405 for (String s : element.getComments()) { 406 xml.comment(s, true); 407 } 408 if (isText(element.getProperty())) { 409 if (linkResolver != null) 410 xml.link(linkResolver.resolveProperty(element.getProperty())); 411 xml.enter(elementName); 412 xml.text(element.getValue()); 413 xml.exit(elementName); 414 } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) { 415 if (element.getType().equals("xhtml")) { 416 xml.escapedText(element.getValue()); 417 } else if (isText(element.getProperty())) { 418 if (linkResolver != null) 419 xml.link(linkResolver.resolveProperty(element.getProperty())); 420 xml.text(element.getValue()); 421 } else { 422 if (element.hasValue()) { 423 if (linkResolver != null) 424 xml.link(linkResolver.resolveType(element.getType())); 425 xml.attribute("value", element.getValue()); 426 } 427 if (linkResolver != null) 428 xml.link(linkResolver.resolveProperty(element.getProperty())); 429 if (element.hasChildren()) { 430 xml.enter(elementName); 431 for (Element child : element.getChildren()) 432 composeElement(xml, child, child.getName(), false); 433 xml.exit(elementName); 434 } else 435 xml.element(elementName); 436 } 437 } else { 438 for (Element child : element.getChildren()) { 439 if (isAttr(child.getProperty())) { 440 if (linkResolver != null) 441 xml.link(linkResolver.resolveType(child.getType())); 442 xml.attribute(child.getName(), child.getValue()); 443 } 444 } 445 if (linkResolver != null) 446 xml.link(linkResolver.resolveProperty(element.getProperty())); 447 xml.enter(elementName); 448 if (!root && element.getSpecial() != null) { 449 if (linkResolver != null) 450 xml.link(linkResolver.resolveProperty(element.getProperty())); 451 xml.enter(element.getType()); 452 } 453 for (Element child : element.getChildren()) { 454 if (isText(child.getProperty())) { 455 if (linkResolver != null) 456 xml.link(linkResolver.resolveProperty(element.getProperty())); 457 xml.text(child.getValue()); 458 } else if (!isAttr(child.getProperty())) 459 composeElement(xml, child, child.getName(), false); 460 } 461 if (!root && element.getSpecial() != null) 462 xml.exit(element.getType()); 463 xml.exit(elementName); 464 } 465 } 466 467}