001package org.hl7.fhir.r4.elementmodel;
002
003import java.io.IOException;
004import java.io.InputStream;
005import java.io.OutputStream;
006import java.io.UnsupportedEncodingException;
007import java.util.Collections;
008import java.util.Comparator;
009import java.util.List;
010import java.util.ArrayList;
011
012import javax.xml.parsers.DocumentBuilder;
013import javax.xml.parsers.DocumentBuilderFactory;
014import javax.xml.parsers.SAXParser;
015import javax.xml.parsers.SAXParserFactory;
016import javax.xml.transform.Transformer;
017import javax.xml.transform.TransformerFactory;
018import javax.xml.transform.dom.DOMResult;
019import javax.xml.transform.sax.SAXSource;
020
021import org.hl7.fhir.r4.conformance.ProfileUtilities;
022import org.hl7.fhir.r4.context.IWorkerContext;
023import org.hl7.fhir.r4.elementmodel.Element.SpecialElement;
024import org.hl7.fhir.r4.formats.FormatUtilities;
025import org.hl7.fhir.r4.formats.IParser.OutputStyle;
026import org.hl7.fhir.r4.model.DateTimeType;
027import org.hl7.fhir.r4.model.ElementDefinition.PropertyRepresentation;
028import org.hl7.fhir.r4.model.Enumeration;
029import org.hl7.fhir.r4.model.StructureDefinition;
030import org.hl7.fhir.r4.utils.ToolingExtensions;
031import org.hl7.fhir.r4.utils.formats.XmlLocationAnnotator;
032import org.hl7.fhir.r4.utils.formats.XmlLocationData;
033import org.hl7.fhir.exceptions.DefinitionException;
034import org.hl7.fhir.exceptions.FHIRException;
035import org.hl7.fhir.exceptions.FHIRFormatError;
036import org.hl7.fhir.utilities.Utilities;
037import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
038import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
039import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
040import org.hl7.fhir.utilities.xhtml.XhtmlNode;
041import org.hl7.fhir.utilities.xhtml.XhtmlParser;
042import org.hl7.fhir.utilities.xml.IXMLWriter;
043import org.hl7.fhir.utilities.xml.XMLUtil;
044import org.hl7.fhir.utilities.xml.XMLWriter;
045import org.w3c.dom.Document;
046import org.w3c.dom.Node;
047import org.xml.sax.InputSource;
048import org.xml.sax.XMLReader;
049
050public class XmlParser extends ParserBase {
051  private boolean allowXsiLocation;
052
053  public XmlParser(IWorkerContext context) {
054    super(context);
055  }
056
057  
058  public boolean isAllowXsiLocation() {
059    return allowXsiLocation;
060  }
061
062  public void setAllowXsiLocation(boolean allowXsiLocation) {
063    this.allowXsiLocation = allowXsiLocation;
064  }
065
066
067  public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
068                Document doc = null;
069        try {
070                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
071                // xxe protection
072                factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
073                factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
074                factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
075                factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
076                factory.setXIncludeAware(false);
077                factory.setExpandEntityReferences(false);
078                        
079                factory.setNamespaceAware(true);
080                if (policy == ValidationPolicy.EVERYTHING) {
081                        // use a slower parser that keeps location data
082                        TransformerFactory transformerFactory = TransformerFactory.newInstance();
083                        Transformer nullTransformer = transformerFactory.newTransformer();
084                        DocumentBuilder docBuilder = factory.newDocumentBuilder();
085                        doc = docBuilder.newDocument();
086                        DOMResult domResult = new DOMResult(doc);
087                        SAXParserFactory spf = SAXParserFactory.newInstance();
088                        spf.setNamespaceAware(true);
089                        spf.setValidating(false);
090                // xxe protection
091                  spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
092        spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
093                        SAXParser saxParser = spf.newSAXParser();
094                        XMLReader xmlReader = saxParser.getXMLReader();
095                // xxe protection
096                  xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
097                  xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
098                                
099                        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
100                        InputSource inputSource = new InputSource(stream);
101                        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
102                        nullTransformer.transform(saxSource, domResult);
103                } else {
104                        DocumentBuilder builder = factory.newDocumentBuilder();
105                        doc = builder.parse(stream);
106                }
107        } catch (Exception e) {
108      logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
109      doc = null;
110        }
111        if (doc == null)
112                return null;
113        else
114      return parse(doc);
115  }
116
117  private void checkForProcessingInstruction(Document document) throws FHIRFormatError {
118    if (policy == ValidationPolicy.EVERYTHING) {
119      Node node = document.getFirstChild();
120      while (node != null) {
121        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
122          logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR);
123        node = node.getNextSibling();
124      }
125    }
126  }
127
128  
129  private int line(Node node) {
130                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
131                return loc == null ? 0 : loc.getStartLine();
132  }
133
134  private int col(Node node) {
135                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
136                return loc == null ? 0 : loc.getStartColumn();
137  }
138
139  public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
140    checkForProcessingInstruction(doc);
141    org.w3c.dom.Element element = doc.getDocumentElement();
142    return parse(element);
143  }
144  
145  public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
146    String ns = element.getNamespaceURI();
147    String name = element.getLocalName();
148    String path = "/"+pathPrefix(ns)+name;
149    
150    StructureDefinition sd = getDefinition(line(element), col(element), ns, name);
151    if (sd == null)
152      return null;
153
154    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
155    checkElement(element, path, result.getProperty());
156    result.markLocation(line(element), col(element));
157    result.setType(element.getLocalName());
158    parseChildren(path, element, result);
159    result.numberChildren();
160    return result;
161  }
162
163  private String pathPrefix(String ns) {
164    if (Utilities.noString(ns))
165      return "";
166    if (ns.equals(FormatUtilities.FHIR_NS))
167      return "f:";
168    if (ns.equals(FormatUtilities.XHTML_NS))
169      return "h:";
170    if (ns.equals("urn:hl7-org:v3"))
171      return "v3:";
172    return "?:";
173  }
174
175  private boolean empty(org.w3c.dom.Element element) {
176    for (int i = 0; i < element.getAttributes().getLength(); i++) {
177      String n = element.getAttributes().item(i).getNodeName();
178      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
179        return false;
180    }
181    if (!Utilities.noString(element.getTextContent().trim()))
182      return false;
183    
184    Node n = element.getFirstChild();
185    while (n != null) {
186      if (n.getNodeType() == Node.ELEMENT_NODE)
187        return false;
188      n = n.getNextSibling();
189    }
190    return true;
191  }
192  
193  private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError {
194    if (policy == ValidationPolicy.EVERYTHING) {
195      if (empty(element))
196        logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR);
197      String ns = FormatUtilities.FHIR_NS;
198      if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
199        ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
200      else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
201        ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
202      if (!element.getNamespaceURI().equals(ns))
203        logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR);
204    }
205  }
206
207  public Element parse(org.w3c.dom.Element base, String type) throws Exception {
208    StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type);
209    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
210    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
211    checkElement(base, path, result.getProperty());
212    result.setType(base.getLocalName());
213    parseChildren(path, base, result);
214    result.numberChildren();
215    return result;
216  }
217
218  private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
219        // this parsing routine retains the original order in a the XML file, to support validation
220        reapComments(node, context);
221    List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node));
222
223        String text = XMLUtil.getDirectText(node).trim();
224    if (!Utilities.noString(text)) {
225        Property property = getTextProp(properties);
226        if (property != null) {
227            context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node)));
228        } else {
229        logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR);
230        }               
231    }
232    
233    for (int i = 0; i < node.getAttributes().getLength(); i++) {
234        Node attr = node.getAttributes().item(i);
235        if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
236        Property property = getAttrProp(properties, attr.getNodeName());
237        if (property != null) {
238                  String av = attr.getNodeValue();
239                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"))
240                        av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av);
241                        if (property.getName().equals("value") && context.isPrimitive())
242                                context.setValue(av);
243                        else
244                    context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node)));
245        } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) {
246          logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName()+" for type "+context.fhirType()+" (properties = "+properties+")", IssueSeverity.ERROR);                     
247        }
248        }
249    }
250    
251    Node child = node.getFirstChild();
252    while (child != null) {
253        if (child.getNodeType() == Node.ELEMENT_NODE) {
254                Property property = getElementProp(properties, child.getLocalName());
255                if (property != null) {
256                        if (!property.isChoice() && "xhtml".equals(property.getType())) {
257                XhtmlNode xhtml = new XhtmlParser().setValidatorMode(true).parseHtmlNode((org.w3c.dom.Element) child);
258                                                context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child)));
259                        } else {
260                          String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
261                                Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child));
262                                checkElement((org.w3c.dom.Element) child, npath, n.getProperty());
263                                boolean ok = true;
264                                if (property.isChoice()) {
265                                        if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
266                                                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
267                                                if (xsiType == null) {
268                          logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR);
269                          ok = false;
270                                                } else {
271                                                        if (xsiType.contains(":"))
272                                                                xsiType = xsiType.substring(xsiType.indexOf(":")+1);
273                                                        n.setType(xsiType);
274                                                }
275                                        } else
276                                          n.setType(n.getType());
277                                }
278                                context.getChildren().add(n);
279                                if (ok) {
280                                        if (property.isResource())
281                parseResource(npath, (org.w3c.dom.Element) child, n, property);
282                                        else
283                                                parseChildren(npath, (org.w3c.dom.Element) child, n);
284                                }
285                        }
286        } else
287          logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR);                    
288        } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){
289        logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR);                      
290        } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
291        logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR);
292        }
293        child = child.getNextSibling();
294    }
295  }
296
297  private Property getElementProp(List<Property> properties, String nodeName) {
298                List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
299                // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
300                // and therefore the longer property names get evaluated first
301                Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
302                        @Override
303                        public int compare(Property o1, Property o2) {
304                                return o2.getName().length() - o1.getName().length();
305                        }
306                });
307        for (Property p : propsSortedByLongestFirst)
308                if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
309                  if (p.getName().equals(nodeName)) 
310                                  return p;
311                  if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
312                                  return p;
313                }
314        return null;
315        }
316
317        private Property getAttrProp(List<Property> properties, String nodeName) {
318        for (Property p : properties)
319                if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 
320                                return p;
321        return null;
322  }
323
324        private Property getTextProp(List<Property> properties) {
325        for (Property p : properties)
326                if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
327                                return p;
328        return null;
329        }
330
331        private String convertForDateFormat(String fmt, String av) throws FHIRException {
332        if ("v3".equals(fmt)) {
333                DateTimeType d = DateTimeType.parseV3(av);
334                return d.asStringValue();
335        } else
336                throw new FHIRException("Unknown Data format '"+fmt+"'");
337        }
338
339  private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
340        org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
341    String name = res.getLocalName();
342    StructureDefinition sd = context.fetchResource(StructureDefinition.class, ProfileUtilities.sdNs(name));
343    if (sd == null)
344      throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')");
345    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
346    parent.setType(name);
347    parseChildren(res.getLocalName(), res, parent);
348        }
349
350        private void reapComments(org.w3c.dom.Element element, Element context) {
351          Node node = element.getPreviousSibling();
352          while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
353                if (node.getNodeType() == Node.COMMENT_NODE)
354                        context.getComments().add(0, node.getTextContent());
355                node = node.getPreviousSibling();
356          }
357                node = element.getLastChild();
358                while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
359                        node = node.getPreviousSibling();
360                }
361                while (node != null) {
362                        if (node.getNodeType() == Node.COMMENT_NODE)
363                                context.getComments().add(node.getTextContent());
364                        node = node.getNextSibling();
365                }
366        }
367
368        private boolean isAttr(Property property) {
369                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
370                        if (r.getValue() == PropertyRepresentation.XMLATTR) {
371                                return true;
372                        }
373                }
374                return false;
375        }
376
377  private boolean isText(Property property) {
378                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
379                        if (r.getValue() == PropertyRepresentation.XMLTEXT) {
380                                return true;
381                        }
382                }
383                return false;
384  }
385
386        @Override
387  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException {
388    XMLWriter xml = new XMLWriter(stream, "UTF-8");
389    xml.setPretty(style == OutputStyle.PRETTY);
390    xml.start();
391    xml.setDefaultNamespace(e.getProperty().getNamespace());
392    composeElement(xml, e, e.getType(), true);
393    xml.end();
394
395  }
396
397  public void compose(Element e, IXMLWriter xml) throws Exception {
398    xml.start();
399    xml.setDefaultNamespace(e.getProperty().getNamespace());
400    composeElement(xml, e, e.getType(), true);
401    xml.end();
402  }
403
404  private void composeElement(IXMLWriter xml, Element element, String elementName, boolean root) throws IOException {
405    for (String s : element.getComments()) {
406      xml.comment(s, true);
407    }
408    if (isText(element.getProperty())) {
409      if (linkResolver != null)
410        xml.link(linkResolver.resolveProperty(element.getProperty()));
411      xml.enter(elementName);
412      xml.text(element.getValue());
413      xml.exit(elementName);      
414    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
415      if (element.getType().equals("xhtml")) {
416        xml.escapedText(element.getValue());
417      } else if (isText(element.getProperty())) {
418        if (linkResolver != null)
419          xml.link(linkResolver.resolveProperty(element.getProperty()));
420        xml.text(element.getValue());
421      } else {
422        if (element.hasValue()) {
423          if (linkResolver != null)
424            xml.link(linkResolver.resolveType(element.getType()));
425        xml.attribute("value", element.getValue());
426        }
427        if (linkResolver != null)
428          xml.link(linkResolver.resolveProperty(element.getProperty()));
429                                if (element.hasChildren()) {
430                                        xml.enter(elementName);
431                                        for (Element child : element.getChildren()) 
432                                                composeElement(xml, child, child.getName(), false);
433                                        xml.exit(elementName);
434                                } else
435        xml.element(elementName);
436      }
437    } else {
438      for (Element child : element.getChildren()) {
439        if (isAttr(child.getProperty())) {
440          if (linkResolver != null)
441            xml.link(linkResolver.resolveType(child.getType()));
442          xml.attribute(child.getName(), child.getValue());
443      }
444      }
445      if (linkResolver != null)
446        xml.link(linkResolver.resolveProperty(element.getProperty()));
447      xml.enter(elementName);
448      if (!root && element.getSpecial() != null) {
449        if (linkResolver != null)
450          xml.link(linkResolver.resolveProperty(element.getProperty()));
451        xml.enter(element.getType());
452      }
453      for (Element child : element.getChildren()) {
454        if (isText(child.getProperty())) {
455          if (linkResolver != null)
456            xml.link(linkResolver.resolveProperty(element.getProperty()));
457          xml.text(child.getValue());
458        } else if (!isAttr(child.getProperty()))
459          composeElement(xml, child, child.getName(), false);
460      }
461            if (!root && element.getSpecial() != null)
462        xml.exit(element.getType());
463      xml.exit(elementName);
464    }
465  }
466
467}