001package org.hl7.fhir.dstu2016may.metamodel;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.OutputStream;
037import java.util.List;
038
039import javax.xml.parsers.DocumentBuilder;
040import javax.xml.parsers.DocumentBuilderFactory;
041import javax.xml.parsers.SAXParser;
042import javax.xml.parsers.SAXParserFactory;
043import javax.xml.transform.Transformer;
044import javax.xml.transform.TransformerFactory;
045import javax.xml.transform.dom.DOMResult;
046import javax.xml.transform.sax.SAXSource;
047
048import org.hl7.fhir.dstu2016may.formats.FormatUtilities;
049import org.hl7.fhir.dstu2016may.formats.IParser.OutputStyle;
050import org.hl7.fhir.dstu2016may.metamodel.Element.SpecialElement;
051import org.hl7.fhir.dstu2016may.model.DateTimeType;
052import org.hl7.fhir.dstu2016may.model.ElementDefinition.PropertyRepresentation;
053import org.hl7.fhir.dstu2016may.model.Enumeration;
054import org.hl7.fhir.dstu2016may.model.StructureDefinition;
055import org.hl7.fhir.dstu2016may.utils.IWorkerContext;
056import org.hl7.fhir.dstu2016may.utils.ToolingExtensions;
057import org.hl7.fhir.dstu2016may.utils.XmlLocationAnnotator;
058import org.hl7.fhir.dstu2016may.utils.XmlLocationData;
059import org.hl7.fhir.exceptions.FHIRException;
060import org.hl7.fhir.exceptions.FHIRFormatError;
061import org.hl7.fhir.utilities.Utilities;
062import org.hl7.fhir.utilities.validation.ValidationMessage.IssueSeverity;
063import org.hl7.fhir.utilities.validation.ValidationMessage.IssueType;
064import org.hl7.fhir.utilities.xhtml.XhtmlComposer;
065import org.hl7.fhir.utilities.xhtml.XhtmlNode;
066import org.hl7.fhir.utilities.xhtml.XhtmlParser;
067import org.hl7.fhir.utilities.xml.XMLUtil;
068import org.hl7.fhir.utilities.xml.XMLWriter;
069import org.w3c.dom.Document;
070import org.w3c.dom.Node;
071import org.xml.sax.InputSource;
072import org.xml.sax.XMLReader;
073
074public class XmlParser extends ParserBase {
075        public XmlParser(IWorkerContext context) {
076                super(context);
077        }
078
079        public Element parse(InputStream stream) throws Exception {
080                Document doc = null;
081                try {
082                        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
083                        // xxe protection
084                        factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
085                        factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
086                        factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
087                        factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
088                        factory.setXIncludeAware(false);
089                        factory.setExpandEntityReferences(false);
090
091                        factory.setNamespaceAware(true);
092                        if (policy == ValidationPolicy.EVERYTHING) {
093                                // use a slower parser that keeps location data
094                                TransformerFactory transformerFactory = TransformerFactory.newInstance();
095                                Transformer nullTransformer = transformerFactory.newTransformer();
096                                DocumentBuilder docBuilder = factory.newDocumentBuilder();
097                                doc = docBuilder.newDocument();
098                                DOMResult domResult = new DOMResult(doc);
099                                SAXParserFactory spf = SAXParserFactory.newInstance();
100                                spf.setNamespaceAware(true);
101                                spf.setValidating(false);
102                                SAXParser saxParser = spf.newSAXParser();
103                                XMLReader xmlReader = saxParser.getXMLReader();
104                                // xxe protection
105                                spf.setFeature("http://xml.org/sax/features/external-general-entities", false);
106                                spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
107                                xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
108                                xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
109
110                                XmlLocationAnnotator locationAnnotator = new XmlLocationAnnotator(xmlReader, doc);
111                                InputSource inputSource = new InputSource(stream);
112                                SAXSource saxSource = new SAXSource(locationAnnotator, inputSource);
113                                nullTransformer.transform(saxSource, domResult);
114                        } else {
115                                DocumentBuilder builder = factory.newDocumentBuilder();
116                                doc = builder.parse(stream);
117                        }
118                } catch (Exception e) {
119                        logError(0, 0, "(syntax)", IssueType.INVALID, e.getMessage(), IssueSeverity.FATAL);
120                        doc = null;
121                }
122                if (doc == null)
123                        return null;
124                else
125                        return parse(doc);
126        }
127
128        private void checkForProcessingInstruction(Document document) throws FHIRFormatError {
129                if (policy == ValidationPolicy.EVERYTHING) {
130                        Node node = document.getFirstChild();
131                        while (node != null) {
132                                if (node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE)
133                                        logError(line(document), col(document), "(document)", IssueType.INVALID, "No processing instructions allowed in resources", IssueSeverity.ERROR);
134                                node = node.getNextSibling();
135                        }
136                }
137        }
138
139
140        private int line(Node node) {
141                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
142                return loc == null ? 0 : loc.getStartLine();
143        }
144
145        private int col(Node node) {
146                XmlLocationData loc = (XmlLocationData) node.getUserData(XmlLocationData.LOCATION_DATA_KEY);
147                return loc == null ? 0 : loc.getStartColumn();
148        }
149
150        public Element parse(Document doc) throws Exception {
151                checkForProcessingInstruction(doc);
152                org.w3c.dom.Element element = doc.getDocumentElement();
153                return parse(element);
154        }
155
156        public Element parse(org.w3c.dom.Element element) throws Exception {
157                String ns = element.getNamespaceURI();
158                String name = element.getLocalName();
159                String path = "/"+pathPrefix(ns)+name;
160
161                StructureDefinition sd = getDefinition(line(element), col(element), ns, name);
162                if (sd == null)
163                        return null;
164
165                Element result = new Element(element.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
166                checkElement(element, path, result.getProperty());
167                result.markLocation(line(element), col(element));
168                result.setType(element.getLocalName());
169                parseChildren(path, element, result);
170                result.numberChildren();
171                return result;
172        }
173
174        private String pathPrefix(String ns) {
175                if (Utilities.noString(ns))
176                        return "";
177                if (ns.equals(FormatUtilities.FHIR_NS))
178                        return "f:";
179                if (ns.equals(FormatUtilities.XHTML_NS))
180                        return "h:";
181                if (ns.equals("urn:hl7-org:v3"))
182                        return "v3:";
183                return "?:";
184        }
185
186        private boolean empty(org.w3c.dom.Element element) {
187                for (int i = 0; i < element.getAttributes().getLength(); i++) {
188                        String n = element.getAttributes().item(i).getNodeName();
189                        if (!n.equals("xmlns") && !n.startsWith("xmlns:"))
190                                return false;
191                }
192                if (!Utilities.noString(element.getTextContent().trim()))
193                        return false;
194
195                Node n = element.getFirstChild();
196                while (n != null) {
197                        if (n.getNodeType() == Node.ELEMENT_NODE)
198                                return false;
199                        n = n.getNextSibling();
200                }
201                return true;
202        }
203
204        private void checkElement(org.w3c.dom.Element element, String path, Property prop) throws FHIRFormatError {
205                if (policy == ValidationPolicy.EVERYTHING) {
206                        if (empty(element))
207                                logError(line(element), col(element), path, IssueType.INVALID, "Element must have some content", IssueSeverity.ERROR);
208                        String ns = FormatUtilities.FHIR_NS;
209                        if (ToolingExtensions.hasExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
210                                ns = ToolingExtensions.readStringExtension(prop.getDefinition(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
211                        else if (ToolingExtensions.hasExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace"))
212                                ns = ToolingExtensions.readStringExtension(prop.getStructure(), "http://hl7.org/fhir/StructureDefinition/elementdefinition-namespace");
213                        if (!element.getNamespaceURI().equals(ns))
214                                logError(line(element), col(element), path, IssueType.INVALID, "Wrong namespace - expected '"+ns+"'", IssueSeverity.ERROR);
215                }
216        }
217
218        public Element parse(org.w3c.dom.Element base, String type) throws Exception {
219                StructureDefinition sd = getDefinition(0, 0, FormatUtilities.FHIR_NS, type);
220                Element result = new Element(base.getLocalName(), new Property(context, sd.getSnapshot().getElement().get(0), sd));
221                String path = "/"+pathPrefix(base.getNamespaceURI())+base.getLocalName();
222                checkElement(base, path, result.getProperty());
223                result.setType(base.getLocalName());
224                parseChildren(path, base, result);
225                result.numberChildren();
226                return result;
227        }
228
229        private void parseChildren(String path, org.w3c.dom.Element node, Element context) throws Exception {
230                // this parsing routine retains the original order in a the XML file, to support validation
231                reapComments(node, context);
232                List<Property> properties = getChildProperties(context.getProperty(), context.getName(), XMLUtil.getXsiType(node));
233
234                String text = XMLUtil.getDirectText(node).trim();
235                if (!Utilities.noString(text)) {
236                        Property property = getTextProp(properties);
237                        if (property != null) {
238                                context.getChildren().add(new Element(property.getName(), property, property.getType(), text).markLocation(line(node), col(node)));
239                        } else {
240                                logError(line(node), col(node), path, IssueType.STRUCTURE, "Text should not be present", IssueSeverity.ERROR);
241                        }               
242                }
243
244                for (int i = 0; i < node.getAttributes().getLength(); i++) {
245                        Node attr = node.getAttributes().item(i);
246                        if (!(attr.getNodeName().equals("xmlns") || attr.getNodeName().startsWith("xmlns:"))) {
247                                Property property = getAttrProp(properties, attr.getNodeName());
248                                if (property != null) {
249                                        String av = attr.getNodeValue();
250                                        if (ToolingExtensions.hasExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"))
251                                                av = convertForDateFormat(ToolingExtensions.readStringExtension(property.getDefinition(), "http://www.healthintersections.com.au/fhir/StructureDefinition/elementdefinition-dateformat"), av);
252                                        if (property.getName().equals("value") && context.isPrimitive())
253                                                context.setValue(av);
254                                        else
255                                                context.getChildren().add(new Element(property.getName(), property, property.getType(), av).markLocation(line(node), col(node)));
256                                } else {
257                                        logError(line(node), col(node), path, IssueType.STRUCTURE, "Undefined attribute '@"+attr.getNodeName()+"'", IssueSeverity.ERROR);               
258                                }
259                        }
260                }
261
262                Node child = node.getFirstChild();
263                while (child != null) {
264                        if (child.getNodeType() == Node.ELEMENT_NODE) {
265                                Property property = getElementProp(properties, child.getLocalName());
266                                if (property != null) {
267                                        if (!property.isChoice() && "xhtml".equals(property.getType())) {
268                                                XhtmlNode xhtml = new XhtmlParser().setValidatorMode(true).parseHtmlNode((org.w3c.dom.Element) child);
269                                                context.getChildren().add(new Element("div", property, "xhtml", new XhtmlComposer(true, false).compose(xhtml)).setXhtml(xhtml).markLocation(line(child), col(child)));
270                                        } else {
271                                                String npath = path+"/"+pathPrefix(child.getNamespaceURI())+child.getLocalName();
272                                                Element n = new Element(child.getLocalName(), property).markLocation(line(child), col(child));
273                                                checkElement((org.w3c.dom.Element) child, npath, n.getProperty());
274                                                boolean ok = true;
275                                                if (property.isChoice()) {
276                                                        if (property.getDefinition().hasRepresentation(PropertyRepresentation.TYPEATTR)) {
277                                                                String xsiType = ((org.w3c.dom.Element) child).getAttributeNS(FormatUtilities.NS_XSI, "type");
278                                                                if (xsiType == null) {
279                                                                        logError(line(child), col(child), path, IssueType.STRUCTURE, "No type found on '"+child.getLocalName()+'"', IssueSeverity.ERROR);
280                                                                        ok = false;
281                                                                } else {
282                                                                        if (xsiType.contains(":"))
283                                                                                xsiType = xsiType.substring(xsiType.indexOf(":")+1);
284                                                                        n.setType(xsiType);
285                                                                }
286                                                        } else
287                                                                n.setType(n.getType());
288                                                }
289                                                context.getChildren().add(n);
290                                                if (ok) {
291                                                        if (property.isResource())
292                                                                parseResource(npath, (org.w3c.dom.Element) child, n);
293                                                        else
294                                                                parseChildren(npath, (org.w3c.dom.Element) child, n);
295                                                }
296                                        }
297                                } else
298                                        logError(line(child), col(child), path, IssueType.STRUCTURE, "Undefined element '"+child.getLocalName()+"'", IssueSeverity.ERROR);              
299                        } else if (child.getNodeType() == Node.CDATA_SECTION_NODE){
300                                logError(line(child), col(child), path, IssueType.STRUCTURE, "CDATA is not allowed", IssueSeverity.ERROR);                      
301                        } else if (!Utilities.existsInList(child.getNodeType(), 3, 8)) {
302                                logError(line(child), col(child), path, IssueType.STRUCTURE, "Node type "+Integer.toString(child.getNodeType())+" is not allowed", IssueSeverity.ERROR);
303                        }
304                        child = child.getNextSibling();
305                }
306        }
307
308        private Property getElementProp(List<Property> properties, String nodeName) {
309                for (Property p : properties)
310                        if (!p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR) && !p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) {
311                                if (p.getName().equals(nodeName)) 
312                                        return p;
313                                if (p.getName().endsWith("[x]") && nodeName.length() > p.getName().length()-3 && p.getName().substring(0, p.getName().length()-3).equals(nodeName.substring(0, p.getName().length()-3))) 
314                                        return p;
315                        }
316                return null;
317        }
318
319        private Property getAttrProp(List<Property> properties, String nodeName) {
320                for (Property p : properties)
321                        if (p.getName().equals(nodeName) && p.getDefinition().hasRepresentation(PropertyRepresentation.XMLATTR)) 
322                                return p;
323                return null;
324        }
325
326        private Property getTextProp(List<Property> properties) {
327                for (Property p : properties)
328                        if (p.getDefinition().hasRepresentation(PropertyRepresentation.XMLTEXT)) 
329                                return p;
330                return null;
331        }
332
333        private String convertForDateFormat(String fmt, String av) throws FHIRException {
334                if ("v3".equals(fmt)) {
335                        DateTimeType d = DateTimeType.parseV3(av);
336                        return d.asStringValue();
337                } else
338                        throw new FHIRException("Unknown Data format '"+fmt+"'");
339        }
340
341        private void parseResource(String string, org.w3c.dom.Element container, Element parent) throws Exception {
342                org.w3c.dom.Element res = XMLUtil.getFirstChild(container);
343                String name = res.getLocalName();
344                StructureDefinition sd = context.fetchTypeDefinition(name);
345                if (sd == null)
346                        throw new FHIRFormatError("Contained resource does not appear to be a FHIR resource (unknown name '"+res.getLocalName()+"')");
347                parent.updateProperty(new Property(context, sd.getSnapshot().getElement().get(0), sd), parent.getProperty().getName().equals("contained") ? SpecialElement.CONTAINED : SpecialElement.BUNDLE_ENTRY);
348                parent.setType(name);
349                parseChildren(res.getLocalName(), res, parent);
350        }
351
352        private void reapComments(org.w3c.dom.Element element, Element context) {
353                Node node = element.getPreviousSibling();
354                while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
355                        if (node.getNodeType() == Node.COMMENT_NODE)
356                                context.getComments().add(0, node.getTextContent());
357                        node = node.getPreviousSibling();
358                }
359                node = element.getLastChild();
360                while (node != null && node.getNodeType() != Node.ELEMENT_NODE) {
361                        node = node.getPreviousSibling();
362                }
363                while (node != null) {
364                        if (node.getNodeType() == Node.COMMENT_NODE)
365                                context.getComments().add(node.getTextContent());
366                        node = node.getNextSibling();
367                }
368        }
369
370        private boolean isAttr(Property property) {
371                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
372                        if (r.getValue() == PropertyRepresentation.XMLATTR) {
373                                return true;
374                        }
375                }
376                return false;
377        }
378
379        private boolean isText(Property property) {
380                for (Enumeration<PropertyRepresentation> r : property.getDefinition().getRepresentation()) {
381                        if (r.getValue() == PropertyRepresentation.XMLTEXT) {
382                                return true;
383                        }
384                }
385                return false;
386        }
387
388        @Override
389        public void compose(Element e, OutputStream stream, OutputStyle style, String base) throws Exception {
390                XMLWriter xml = new XMLWriter(stream, "UTF-8");
391                xml.setPretty(style == OutputStyle.PRETTY);
392                xml.start();
393                xml.setDefaultNamespace(e.getProperty().getNamespace());
394                composeElement(xml, e, e.getType());
395                xml.end();
396
397        }
398
399        private void composeElement(XMLWriter xml, Element element, String elementName) throws IOException {
400                for (String s : element.getComments()) {
401                        xml.comment(s, true);
402                }
403                if (isText(element.getProperty())) {
404                        xml.enter(elementName);
405                        xml.text(element.getValue());
406                        xml.exit(elementName);      
407                } else if (element.isPrimitive() || (element.hasType() && ParserBase.isPrimitive(element.getType()))) {
408                        if (element.getType().equals("xhtml")) {
409                                xml.escapedText(element.getValue());
410                        } else if (isText(element.getProperty())) {
411                                xml.text(element.getValue());
412                        } else {
413                                if (element.hasValue())
414                                  xml.attribute("value", element.getValue());
415                                if (element.hasChildren()) {
416                                        xml.enter(elementName);
417                                        for (Element child : element.getChildren()) 
418                                                composeElement(xml, child, child.getName());
419                                        xml.exit(elementName);
420                                } else
421                                xml.element(elementName);
422                        }
423                } else {
424                        for (Element child : element.getChildren()) {
425                                if (isAttr(child.getProperty()))
426                                        xml.attribute(child.getName(), child.getValue());
427                        }
428                        xml.enter(elementName);
429            if (element.getSpecial() != null)
430        xml.enter(element.getType());
431                        for (Element child : element.getChildren()) {
432                                if (isText(child.getProperty()))
433                                        xml.text(child.getValue());
434                                else if (!isAttr(child.getProperty()))
435                                        composeElement(xml, child, child.getName());
436                        }
437            if (element.getSpecial() != null)
438        xml.exit(element.getType());
439                        xml.exit(elementName);
440                }
441        }
442
443}