001package org.hl7.fhir.dstu2016may.metamodel; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.OutputStream; 037import java.util.List; 038 039import javax.xml.parsers.DocumentBuilder; 040import javax.xml.parsers.DocumentBuilderFactory; 041import javax.xml.parsers.SAXParser; 042import javax.xml.parsers.SAXParserFactory; 043import javax.xml.transform.Transformer; 044import javax.xml.transform.TransformerFactory; 045import javax.xml.transform.dom.DOMResult; 046import javax.xml.transform.sax.SAXSource; 047 048import org.hl7.fhir.dstu2016may.formats.FormatUtilities; 049import org.hl7.fhir.dstu2016may.formats.IParser.OutputStyle; 050import org.hl7.fhir.dstu2016may.metamodel.Element.SpecialElement; 051import org.hl7.fhir.dstu2016may.model.DateTimeType; 052import org.hl7.fhir.dstu2016may.model.ElementDefinition.PropertyRepresentation; 053import org.hl7.fhir.dstu2016may.model.Enumeration; 054import org.hl7.fhir.dstu2016may.model.StructureDefinition; 055import org.hl7.fhir.dstu2016may.utils.IWorkerContext; 056import org.hl7.fhir.dstu2016may.utils.ToolingExtensions; 057import org.hl7.fhir.dstu2016may.utils.XmlLocationAnnotator; 058import org.hl7.fhir.dstu2016may.utils.XmlLocationData; 059import org.hl7.fhir.exceptions.FHIRException; 060import org.hl7.fhir.exceptions.FHIRFormatError; 061import org.hl7.fhir.utilities.Utilities; 062import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity; 063import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType; 064import org.hl7.fhir.utilities.xhtml.XhtmlComposer; 065import org.hl7.fhir.utilities.xhtml.XhtmlNode; 066import org.hl7.fhir.utilities.xhtml.XhtmlParser; 067import org.hl7.fhir.utilities.xml.XMLUtil; 068import org.hl7.fhir.utilities.xml.XMLWriter; 069import org.w3c.dom.Document; 070import org.w3c.dom.Node; 071import org.xml.sax.InputSource; 072import org.xml.sax.XMLReader; 073 074public class XmlParser extends ParserBase { 075 public XmlParser(IWorkerContext context) { 076 super(context); 077 } 078 079 public Element parse(InputStream stream) throws Exception { 080 Document doc = null; 081 try { 082 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 083 // xxe protection 084 factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 085 factory.setFeature("http://xml.org/sax/features/external-general-entities", false); 086 factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 087 factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 088 factory.setXIncludeAware(false); 089 factory.setExpandEntityReferences(false); 090 091 factory.setNamespaceAware(true); 092 if (policy == ValidationPolicy.EVERYTHING) { 093 // use a slower parser that keeps location data 094 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 095 Transformer nullTransformer = transformerFactory.newTransformer(); 096 DocumentBuilder docBuilder = factory.newDocumentBuilder(); 097 doc = docBuilder.newDocument(); 098 DOMResult domResult = new DOMResult(doc); 099 SAXParserFactory spf = SAXParserFactory.newInstance(); 100 spf.setNamespaceAware(true); 101 spf.setValidating(false); 102 SAXParser saxParser = spf.newSAXParser(); 103 XMLReader xmlReader = saxParser.getXMLReader(); 104 // xxe protection 105 spf.setFeature("http://xml.org/sax/features/external-general-entities", false); 106 spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 107 xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false); 108 xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); 109 110 XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc); 111 InputSource inputSource = new InputSource(stream); 112 SAXSource saxSource = new SAXSource(locationAnnotator, inputSource); 113 nullTransformer.transform(saxSource, domResult); 114 } else { 115 DocumentBuilder builder = factory.newDocumentBuilder(); 116 doc = builder.parse(stream); 117 } 118 } catch (Exception e) { 119 logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL); 120 doc = null; 121 } 122 if (doc == null) 123 return null; 124 else 125 return parse(doc); 126 } 127 128 private void checkForProcessingInstruction(Document document) throws FHIRFormatError { 129 if (policy == ValidationPolicy.EVERYTHING) { 130 Node node = document.getFirstChild(); 131 while (node != null) { 132 if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE) 133 logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR); 134 node = node.getNextSibling(); 135 } 136 } 137 } 138 139 140 private int line(Node node) { 141 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 142 return loc == null ? 0 : loc.getStartLine(); 143 } 144 145 private int col(Node node) { 146 XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY); 147 return loc == null ? 0 : loc.getStartColumn(); 148 } 149 150 public Element parse(Document doc) throws Exception { 151 checkForProcessingInstruction(doc); 152 org.w3c.dom.Element element = doc.getDocumentElement(); 153 return parse(element); 154 } 155 156 public Element parse(org.w3c.dom.Element element) throws Exception { 157 String ns = element.getNamespaceURI(); 158 String name = element.getLocalName(); 159 String path = "/"+pathPrefix(ns)+name; 160 161 StructureDefinition sd = getDefinition(line(element), col(element), ns, name); 162 if (sd == null) 163 return null; 164 165 Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 166 checkElement(element, path, result.getProperty()); 167 result.markLocation(line(element), col(element)); 168 result.setType(element.getLocalName()); 169 parseChildren(path, element, result); 170 result.numberChildren(); 171 return result; 172 } 173 174 private String pathPrefix(String ns) { 175 if (Utilities.noString(ns)) 176 return ""; 177 if (ns.equals(FormatUtilities.FHIR_NS)) 178 return "f:"; 179 if (ns.equals(FormatUtilities.XHTML_NS)) 180 return "h:"; 181 if (ns.equals("urn:hl7-org:v3")) 182 return "v3:"; 183 return "?:"; 184 } 185 186 private boolean empty(org.w3c.dom.Element element) { 187 for (int i = 0; i < element.getAttributes().getLength(); i++) { 188 String n = element.getAttributes().item(i).getNodeName(); 189 if (!n.equals("xmlns") && !n.startsWith("xmlns:")) 190 return false; 191 } 192 if (!Utilities.noString(element.getTextContent().trim())) 193 return false; 194 195 Node n = element.getFirstChild(); 196 while (n != null) { 197 if (n.getNodeType() == Node.ELEMENT_NODE) 198 return false; 199 n = n.getNextSibling(); 200 } 201 return true; 202 } 203 204 private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError { 205 if (policy == ValidationPolicy.EVERYTHING) { 206 if (empty(element)) 207 logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR); 208 String ns = FormatUtilities.FHIR_NS; 209 if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 210 ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 211 else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace")) 212 ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"); 213 if (!element.getNamespaceURI().equals(ns)) 214 logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR); 215 } 216 } 217 218 public Element parse(org.w3c.dom.Element base, String type) throws Exception { 219 StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type); 220 Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd)); 221 String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName(); 222 checkElement(base, path, result.getProperty()); 223 result.setType(base.getLocalName()); 224 parseChildren(path, base, result); 225 result.numberChildren(); 226 return result; 227 } 228 229 private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws Exception { 230 // this parsing routine retains the original order in a the XML file, to support validation 231 reapComments(node, context); 232 List<Property> properties = getChildProperties(context.getProperty(), context.getName(), XMLUtil.getXsiType(node)); 233 234 String text = XMLUtil.getDirectText(node).trim(); 235 if (!Utilities.noString(text)) { 236 Property property = getTextProp(properties); 237 if (property != null) { 238 context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node))); 239 } else { 240 logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR); 241 } 242 } 243 244 for (int i = 0; i < node.getAttributes().getLength(); i++) { 245 Node attr = node.getAttributes().item(i); 246 if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) { 247 Property property = getAttrProp(properties, attr.getNodeName()); 248 if (property != null) { 249 String av = attr.getNodeValue(); 250 if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat")) 251 av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av); 252 if (property.getName().equals("value") && context.isPrimitive()) 253 context.setValue(av); 254 else 255 context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node))); 256 } else { 257 logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"'", IssueSeverity.ERROR); 258 } 259 } 260 } 261 262 Node child = node.getFirstChild(); 263 while (child != null) { 264 if (child.getNodeType() == Node.ELEMENT_NODE) { 265 Property property = getElementProp(properties, child.getLocalName()); 266 if (property != null) { 267 if (!property.isChoice() && "xhtml".equals(property.getType())) { 268 XhtmlNode xhtml = new XhtmlParser().setValidatorMode(true).parseHtmlNode((org.w3c.dom.Element) child); 269 context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(true, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child))); 270 } else { 271 String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName(); 272 Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child)); 273 checkElement((org.w3c.dom.Element) child, npath, n.getProperty()); 274 boolean ok = true; 275 if (property.isChoice()) { 276 if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) { 277 String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type"); 278 if (xsiType == null) { 279 logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR); 280 ok = false; 281 } else { 282 if (xsiType.contains(":")) 283 xsiType = xsiType.substring(xsiType.indexOf(":")+1); 284 n.setType(xsiType); 285 } 286 } else 287 n.setType(n.getType()); 288 } 289 context.getChildren().add(n); 290 if (ok) { 291 if (property.isResource()) 292 parseResource(npath, (org.w3c.dom.Element) child, n); 293 else 294 parseChildren(npath, (org.w3c.dom.Element) child, n); 295 } 296 } 297 } else 298 logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR); 299 } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){ 300 logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR); 301 } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) { 302 logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR); 303 } 304 child = child.getNextSibling(); 305 } 306 } 307 308 private Property getElementProp(List<Property> properties, String nodeName) { 309 for (Property p : properties) 310 if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) { 311 if (p.getName().equals(nodeName)) 312 return p; 313 if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 314 return p; 315 } 316 return null; 317 } 318 319 private Property getAttrProp(List<Property> properties, String nodeName) { 320 for (Property p : properties) 321 if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 322 return p; 323 return null; 324 } 325 326 private Property getTextProp(List<Property> properties) { 327 for (Property p : properties) 328 if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 329 return p; 330 return null; 331 } 332 333 private String convertForDateFormat(String fmt, String av) throws FHIRException { 334 if ("v3".equals(fmt)) { 335 DateTimeType d = DateTimeType.parseV3(av); 336 return d.asStringValue(); 337 } else 338 throw new FHIRException("Unknown Data format '"+fmt+"'"); 339 } 340 341 private void parseResource(String string, org.w3c.dom.Element container, Element parent) throws Exception { 342 org.w3c.dom.Element res = XMLUtil.getFirstChild(container); 343 String name = res.getLocalName(); 344 StructureDefinition sd = context.fetchTypeDefinition(name); 345 if (sd == null) 346 throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')"); 347 parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), parent.getProperty().getName().equals("contained") ? SpecialElement.CONTAINED : SpecialElement.BUNDLE_ENTRY); 348 parent.setType(name); 349 parseChildren(res.getLocalName(), res, parent); 350 } 351 352 private void reapComments(org.w3c.dom.Element element, Element context) { 353 Node node = element.getPreviousSibling(); 354 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 355 if (node.getNodeType() == Node.COMMENT_NODE) 356 context.getComments().add(0, node.getTextContent()); 357 node = node.getPreviousSibling(); 358 } 359 node = element.getLastChild(); 360 while (node != null && node.getNodeType() != Node.ELEMENT_NODE) { 361 node = node.getPreviousSibling(); 362 } 363 while (node != null) { 364 if (node.getNodeType() == Node.COMMENT_NODE) 365 context.getComments().add(node.getTextContent()); 366 node = node.getNextSibling(); 367 } 368 } 369 370 private boolean isAttr(Property property) { 371 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 372 if (r.getValue() == PropertyRepresentation.XMLATTR) { 373 return true; 374 } 375 } 376 return false; 377 } 378 379 private boolean isText(Property property) { 380 for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) { 381 if (r.getValue() == PropertyRepresentation.XMLTEXT) { 382 return true; 383 } 384 } 385 return false; 386 } 387 388 @Override 389 public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws Exception { 390 XMLWriter xml = new XMLWriter(stream, "UTF-8"); 391 xml.setPretty(style == OutputStyle.PRETTY); 392 xml.start(); 393 xml.setDefaultNamespace(e.getProperty().getNamespace()); 394 composeElement(xml, e, e.getType()); 395 xml.end(); 396 397 } 398 399 private void composeElement(XMLWriter xml, Element element, String elementName) throws IOException { 400 for (String s : element.getComments()) { 401 xml.comment(s, true); 402 } 403 if (isText(element.getProperty())) { 404 xml.enter(elementName); 405 xml.text(element.getValue()); 406 xml.exit(elementName); 407 } else if (element.isPrimitive() || (element.hasType() && ParserBase.isPrimitive(element.getType()))) { 408 if (element.getType().equals("xhtml")) { 409 xml.escapedText(element.getValue()); 410 } else if (isText(element.getProperty())) { 411 xml.text(element.getValue()); 412 } else { 413 if (element.hasValue()) 414 xml.attribute("value", element.getValue()); 415 if (element.hasChildren()) { 416 xml.enter(elementName); 417 for (Element child : element.getChildren()) 418 composeElement(xml, child, child.getName()); 419 xml.exit(elementName); 420 } else 421 xml.element(elementName); 422 } 423 } else { 424 for (Element child : element.getChildren()) { 425 if (isAttr(child.getProperty())) 426 xml.attribute(child.getName(), child.getValue()); 427 } 428 xml.enter(elementName); 429 if (element.getSpecial() != null) 430 xml.enter(element.getType()); 431 for (Element child : element.getChildren()) { 432 if (isText(child.getProperty())) 433 xml.text(child.getValue()); 434 else if (!isAttr(child.getProperty())) 435 composeElement(xml, child, child.getName()); 436 } 437 if (element.getSpecial() != null) 438 xml.exit(element.getType()); 439 xml.exit(elementName); 440 } 441 } 442 443}