001package org.hl7.fhir.dstu3.elementmodel;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.OutputStream;
037import java.util.ArrayList;
038import java.util.Collections;
039import java.util.Comparator;
040import java.util.List;
041
042import javax.xml.parsers.DocumentBuilder;
043import javax.xml.parsers.DocumentBuilderFactory;
044import javax.xml.parsers.SAXParser;
045import javax.xml.parsers.SAXParserFactory;
046import javax.xml.transform.Transformer;
047import javax.xml.transform.TransformerFactory;
048import javax.xml.transform.dom.DOMResult;
049import javax.xml.transform.sax.SAXSource;
050
051import org.hl7.fhir.dstu3.context.IWorkerContext;
052import org.hl7.fhir.dstu3.elementmodel.Element.SpecialElement;
053import org.hl7.fhir.dstu3.formats.FormatUtilities;
054import org.hl7.fhir.dstu3.formats.IParser.OutputStyle;
055import org.hl7.fhir.dstu3.model.DateTimeType;
056import org.hl7.fhir.dstu3.model.ElementDefinition.PropertyRepresentation;
057import org.hl7.fhir.dstu3.model.Enumeration;
058import org.hl7.fhir.dstu3.model.StructureDefinition;
059import org.hl7.fhir.dstu3.utils.ToolingExtensions;
060import org.hl7.fhir.dstu3.utils.formats.XmlLocationAnnotator;
061import org.hl7.fhir.dstu3.utils.formats.XmlLocationData;
062import org.hl7.fhir.exceptions.DefinitionException;
063import org.hl7.fhir.exceptions.FHIRException;
064import org.hl7.fhir.exceptions.FHIRFormatError;
065import org.hl7.fhir.utilities.Utilities;
066import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
067import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
068import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
069import org.hl7.fhir.utilities.xhtml.XhtmlNode;
070import org.hl7.fhir.utilities.xhtml.XhtmlParser;
071import org.hl7.fhir.utilities.xml.IXMLWriter;
072import org.hl7.fhir.utilities.xml.XMLUtil;
073import org.hl7.fhir.utilities.xml.XMLWriter;
074import org.w3c.dom.Document;
075import org.w3c.dom.Node;
076import org.xml.sax.InputSource;
077import org.xml.sax.XMLReader;
078
079public class XmlParser extends ParserBase {
080  private boolean allowXsiLocation;
081
082  public XmlParser(IWorkerContext context) {
083    super(context);
084  }
085
086  
087  public boolean isAllowXsiLocation() {
088    return allowXsiLocation;
089  }
090
091  public void setAllowXsiLocation(boolean allowXsiLocation) {
092    this.allowXsiLocation = allowXsiLocation;
093  }
094
095
096  public Element parse(InputStream stream) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
097                Document doc = null;
098        try {
099                DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
100                // xxe protection
101                factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
102                factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
103                factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
104                factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
105                factory.setXIncludeAware(false);
106                factory.setExpandEntityReferences(false);
107                        
108                factory.setNamespaceAware(true);
109                if (policy == ValidationPolicy.EVERYTHING) {
110                        // use a slower parser that keeps location data
111                        TransformerFactory transformerFactory = TransformerFactory.newInstance();
112                        Transformer nullTransformer = transformerFactory.newTransformer();
113                        DocumentBuilder docBuilder = factory.newDocumentBuilder();
114                        doc = docBuilder.newDocument();
115                        DOMResult domResult = new DOMResult(doc);
116                        SAXParserFactory spf = SAXParserFactory.newInstance();
117                        spf.setNamespaceAware(true);
118                        spf.setValidating(false);
119                // xxe protection
120                  spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
121        spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
122                        SAXParser saxParser = spf.newSAXParser();
123                        XMLReader xmlReader = saxParser.getXMLReader();
124                // xxe protection
125                  xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
126                  xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
127                                
128                        XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
129                        InputSource inputSource = new InputSource(stream);
130                        SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
131                        nullTransformer.transform(saxSource, domResult);
132                } else {
133                        DocumentBuilder builder = factory.newDocumentBuilder();
134                        doc = builder.parse(stream);
135                }
136        } catch (Exception e) {
137      logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
138      doc = null;
139        }
140        if (doc == null)
141                return null;
142        else
143      return parse(doc);
144  }
145
146  private void checkForProcessingInstruction(Document document) throws FHIRFormatError {
147    if (policy == ValidationPolicy.EVERYTHING) {
148      Node node = document.getFirstChild();
149      while (node != null) {
150        if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
151          logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR);
152        node = node.getNextSibling();
153      }
154    }
155  }
156
157  
158  private int line(Node node) {
159                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
160                return loc == null ? 0 : loc.getStartLine();
161  }
162
163  private int col(Node node) {
164                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
165                return loc == null ? 0 : loc.getStartColumn();
166  }
167
168  public Element parse(Document doc) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
169    checkForProcessingInstruction(doc);
170    org.w3c.dom.Element element = doc.getDocumentElement();
171    return parse(element);
172  }
173  
174  public Element parse(org.w3c.dom.Element element) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
175    String ns = element.getNamespaceURI();
176    String name = element.getLocalName();
177    String path = "/"+pathPrefix(ns)+name;
178    
179    StructureDefinition sd = getDefinition(line(element), col(element), ns, name);
180    if (sd == null)
181      return null;
182
183    Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
184    checkElement(element, path, result.getProperty());
185    result.markLocation(line(element), col(element));
186    result.setType(element.getLocalName());
187    parseChildren(path, element, result);
188    result.numberChildren();
189    return result;
190  }
191
192  private String pathPrefix(String ns) {
193    if (Utilities.noString(ns))
194      return "";
195    if (ns.equals(FormatUtilities.FHIR_NS))
196      return "f:";
197    if (ns.equals(FormatUtilities.XHTML_NS))
198      return "h:";
199    if (ns.equals("urn:hl7-org:v3"))
200      return "v3:";
201    return "?:";
202  }
203
204  private boolean empty(org.w3c.dom.Element element) {
205    for (int i = 0; i < element.getAttributes().getLength(); i++) {
206      String n = element.getAttributes().item(i).getNodeName();
207      if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
208        return false;
209    }
210    if (!Utilities.noString(element.getTextContent().trim()))
211      return false;
212    
213    Node n = element.getFirstChild();
214    while (n != null) {
215      if (n.getNodeType() == Node.ELEMENT_NODE)
216        return false;
217      n = n.getNextSibling();
218    }
219    return true;
220  }
221  
222  private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError {
223    if (policy == ValidationPolicy.EVERYTHING) {
224      if (empty(element))
225        logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR);
226      String ns = FormatUtilities.FHIR_NS;
227      if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
228        ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
229      else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
230        ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
231      if (!element.getNamespaceURI().equals(ns))
232        logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR);
233    }
234  }
235
236  public Element parse(org.w3c.dom.Element base, String type) throws Exception {
237    StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type);
238    Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
239    String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
240    checkElement(base, path, result.getProperty());
241    result.setType(base.getLocalName());
242    parseChildren(path, base, result);
243    result.numberChildren();
244    return result;
245  }
246
247  private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws FHIRFormatError, FHIRException, IOException, DefinitionException {
248        // this parsing routine retains the original order in a the XML file, to support validation
249        reapComments(node, context);
250    List<Property> properties = context.getProperty().getChildProperties(context.getName(), XMLUtil.getXsiType(node));
251
252        String text = XMLUtil.getDirectText(node).trim();
253    if (!Utilities.noString(text)) {
254        Property property = getTextProp(properties);
255        if (property != null) {
256            context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node)));
257        } else {
258        logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR);
259        }               
260    }
261    
262    for (int i = 0; i < node.getAttributes().getLength(); i++) {
263        Node attr = node.getAttributes().item(i);
264        if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
265        Property property = getAttrProp(properties, attr.getNodeName());
266        if (property != null) {
267                  String av = attr.getNodeValue();
268                  if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"))
269                        av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av);
270                        if (property.getName().equals("value") && context.isPrimitive())
271                                context.setValue(av);
272                        else
273                    context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node)));
274        } else if (!allowXsiLocation || !attr.getNodeName().endsWith(":schemaLocation") ) {
275          logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"' on "+node.getNodeName(), IssueSeverity.ERROR);                      
276        }
277        }
278    }
279    
280    Node child = node.getFirstChild();
281    while (child != null) {
282        if (child.getNodeType() == Node.ELEMENT_NODE) {
283                Property property = getElementProp(properties, child.getLocalName());
284                if (property != null) {
285                        if (!property.isChoice() && "xhtml".equals(property.getType())) {
286                XhtmlNode xhtml = new XhtmlParser().setValidatorMode(true).parseHtmlNode((org.w3c.dom.Element) child);
287                                                context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(XhtmlComposer.XML).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child)));
288                        } else {
289                          String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
290                                Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child));
291                                checkElement((org.w3c.dom.Element) child, npath, n.getProperty());
292                                boolean ok = true;
293                                if (property.isChoice()) {
294                                        if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
295                                                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
296                                                if (xsiType == null) {
297                          logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR);
298                          ok = false;
299                                                } else {
300                                                        if (xsiType.contains(":"))
301                                                                xsiType = xsiType.substring(xsiType.indexOf(":")+1);
302                                                        n.setType(xsiType);
303                                                }
304                                        } else
305                                          n.setType(n.getType());
306                                }
307                                context.getChildren().add(n);
308                                if (ok) {
309                                        if (property.isResource())
310                parseResource(npath, (org.w3c.dom.Element) child, n, property);
311                                        else
312                                                parseChildren(npath, (org.w3c.dom.Element) child, n);
313                                }
314                        }
315        } else
316          logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR);                    
317        } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){
318        logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR);                      
319        } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
320        logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR);
321        }
322        child = child.getNextSibling();
323    }
324  }
325
326  private Property getElementProp(List<Property> properties, String nodeName) {
327                List<Property> propsSortedByLongestFirst = new ArrayList<Property>(properties);
328                // sort properties according to their name longest first, so .requestOrganizationReference comes first before .request[x]
329                // and therefore the longer property names get evaluated first
330                Collections.sort(propsSortedByLongestFirst, new Comparator<Property>() {
331                        @Override
332                        public int compare(Property o1, Property o2) {
333                                return o2.getName().length() - o1.getName().length();
334                        }
335                });
336        for (Property p : propsSortedByLongestFirst)
337                if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
338                  if (p.getName().equals(nodeName)) 
339                                  return p;
340                  if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
341                                  return p;
342                }
343        return null;
344        }
345
346        private Property getAttrProp(List<Property> properties, String nodeName) {
347        for (Property p : properties)
348                if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 
349                                return p;
350        return null;
351  }
352
353        private Property getTextProp(List<Property> properties) {
354        for (Property p : properties)
355                if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
356                                return p;
357        return null;
358        }
359
360        private String convertForDateFormat(String fmt, String av) throws FHIRException {
361        if ("v3".equals(fmt)) {
362                DateTimeType d = DateTimeType.parseV3(av);
363                return d.asStringValue();
364        } else
365                throw new FHIRException("Unknown Data format '"+fmt+"'");
366        }
367
368  private void parseResource(String string, org.w3c.dom.Element container, Element parent, Property elementProperty) throws FHIRFormatError, DefinitionException, FHIRException, IOException {
369        org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
370    String name = res.getLocalName();
371    StructureDefinition sd = context.fetchTypeDefinition(name);
372    if (sd == null)
373      throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')");
374    parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), SpecialElement.fromProperty(parent.getProperty()), elementProperty);
375    parent.setType(name);
376    parseChildren(res.getLocalName(), res, parent);
377        }
378
379        private void reapComments(org.w3c.dom.Element element, Element context) {
380          Node node = element.getPreviousSibling();
381          while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
382                if (node.getNodeType() == Node.COMMENT_NODE)
383                        context.getComments().add(0, node.getTextContent());
384                node = node.getPreviousSibling();
385          }
386                node = element.getLastChild();
387                while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
388                        node = node.getPreviousSibling();
389                }
390                while (node != null) {
391                        if (node.getNodeType() == Node.COMMENT_NODE)
392                                context.getComments().add(node.getTextContent());
393                        node = node.getNextSibling();
394                }
395        }
396
397        private boolean isAttr(Property property) {
398                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
399                        if (r.getValue() == PropertyRepresentation.XMLATTR) {
400                                return true;
401                        }
402                }
403                return false;
404        }
405
406  private boolean isText(Property property) {
407                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
408                        if (r.getValue() == PropertyRepresentation.XMLTEXT) {
409                                return true;
410                        }
411                }
412                return false;
413  }
414
415        @Override
416  public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws IOException {
417    XMLWriter xml = new XMLWriter(stream, "UTF-8");
418    xml.setPretty(style == OutputStyle.PRETTY);
419    xml.start();
420    xml.setDefaultNamespace(e.getProperty().getNamespace());
421    composeElement(xml, e, e.getType());
422    xml.end();
423
424  }
425
426  public void compose(Element e, IXMLWriter xml) throws Exception {
427    xml.start();
428    xml.setDefaultNamespace(e.getProperty().getNamespace());
429    composeElement(xml, e, e.getType());
430    xml.end();
431  }
432
433  private void composeElement(IXMLWriter xml, Element element, String elementName) throws IOException {
434    for (String s : element.getComments()) {
435      xml.comment(s, true);
436    }
437    if (isText(element.getProperty())) {
438      if (linkResolver != null)
439        xml.link(linkResolver.resolveProperty(element.getProperty()));
440      xml.enter(elementName);
441      xml.text(element.getValue());
442      xml.exit(elementName);      
443    } else if (element.isPrimitive() || (element.hasType() && isPrimitive(element.getType()))) {
444      if (element.getType().equals("xhtml")) {
445        xml.escapedText(element.getValue());
446      } else if (isText(element.getProperty())) {
447        if (linkResolver != null)
448          xml.link(linkResolver.resolveProperty(element.getProperty()));
449        xml.text(element.getValue());
450      } else {
451        if (element.hasValue()) {
452          if (linkResolver != null)
453            xml.link(linkResolver.resolveType(element.getType()));
454        xml.attribute("value", element.getValue());
455        }
456        if (linkResolver != null)
457          xml.link(linkResolver.resolveProperty(element.getProperty()));
458                                if (element.hasChildren()) {
459                                        xml.enter(elementName);
460                                        for (Element child : element.getChildren()) 
461                                                composeElement(xml, child, child.getName());
462                                        xml.exit(elementName);
463                                } else
464        xml.element(elementName);
465      }
466    } else {
467      for (Element child : element.getChildren()) {
468        if (isAttr(child.getProperty())) {
469          if (linkResolver != null)
470            xml.link(linkResolver.resolveType(child.getType()));
471          xml.attribute(child.getName(), child.getValue());
472      }
473      }
474      if (linkResolver != null)
475        xml.link(linkResolver.resolveProperty(element.getProperty()));
476      xml.enter(elementName);
477      if (element.getSpecial() != null) {
478        if (linkResolver != null)
479          xml.link(linkResolver.resolveProperty(element.getProperty()));
480        xml.enter(element.getType());
481      }
482      for (Element child : element.getChildren()) {
483        if (isText(child.getProperty())) {
484          if (linkResolver != null)
485            xml.link(linkResolver.resolveProperty(element.getProperty()));
486          xml.text(child.getValue());
487        } else if (!isAttr(child.getProperty()))
488          composeElement(xml, child, child.getName());
489      }
490            if (element.getSpecial() != null)
491        xml.exit(element.getType());
492      xml.exit(elementName);
493    }
494  }
495
496}