001package org.hl7.fhir.utilities.xml;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.ByteArrayInputStream;
035import java.io.File;
036import java.io.FileInputStream;
037import java.io.IOException;
038import java.io.InputStream;
039import java.io.InputStreamReader;
040import java.io.OutputStream;
041import java.util.ArrayList;
042import java.util.List;
043import java.util.Set;
044
045import javax.xml.parsers.DocumentBuilder;
046import javax.xml.parsers.DocumentBuilderFactory;
047import javax.xml.parsers.ParserConfigurationException;
048import javax.xml.transform.Result;
049import javax.xml.transform.Source;
050import javax.xml.transform.Transformer;
051import javax.xml.transform.TransformerException;
052import javax.xml.transform.TransformerFactory;
053import javax.xml.transform.dom.DOMSource;
054import javax.xml.transform.stream.StreamResult;
055
056import org.hl7.fhir.exceptions.FHIRException;
057import org.hl7.fhir.utilities.Utilities;
058import org.w3c.dom.Attr;
059import org.w3c.dom.Document;
060import org.w3c.dom.Element;
061import org.w3c.dom.Node;
062import org.w3c.dom.ls.DOMImplementationLS;
063import org.w3c.dom.ls.LSSerializer;
064import org.xml.sax.SAXException;
065
066public class XMLUtil {
067
068        public static final String SPACE_CHAR = "\u00A0";
069
070  public static boolean isNMToken(String name) {
071                if (name == null)
072                        return false;
073                for (int i = 0; i < name.length(); i++) 
074                        if (!isNMTokenChar(name.charAt(i)))
075                                return false;   
076                return name.length() > 0;
077        }
078
079        public static boolean isNMTokenChar(char c) {
080                return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c);
081        }
082
083        private static boolean isDigit(char c) {
084                return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 
085                        (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 
086                        (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 
087                        (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 
088                        (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
089        }
090
091        private static boolean isCombiningChar(char c) {
092                return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 
093                        (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 
094                        c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 
095                        c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 
096                        (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 
097                        (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 
098                        (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 
099                        (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 
100                        c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 
101                        (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 
102                        (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 
103                        (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 
104                        (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 
105                        (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 
106                        (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 
107                        (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 
108                        (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 
109                        (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 
110                        (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 
111                        c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 
112                        (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 
113                        (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 
114                        (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 
115                        (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') ||
116                        c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A';
117        }
118
119        private static boolean isExtender(char c) {
120                return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 
121                        c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 
122                        (c >= '\u30FC' && c <= '\u30FE');
123        }
124
125        private static boolean isLetter(char c) {
126                return isBaseChar(c) || isIdeographic(c);
127        }
128
129        private static boolean isBaseChar(char c) {
130                return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 
131                        (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 
132                        (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 
133                        (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 
134                        (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 
135                        c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 
136                        (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 
137                        c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 
138                        (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 
139                        (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 
140                        (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 
141                        c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 
142                        (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 
143                        (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 
144                        c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 
145                        (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 
146                        (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 
147                        (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 
148                        (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 
149                        (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 
150                        (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 
151                        c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 
152                        (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 
153                        (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 
154                        (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 
155                        (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 
156                        c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 
157                        (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 
158                        (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 
159                        (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 
160                        (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 
161                        (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 
162                        (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 
163                        (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 
164                        c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 
165                        (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 
166                        (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 
167                        (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 
168                        c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 
169                        (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 
170                        (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 
171                        (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 
172                        (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 
173                        (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 
174                        (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 
175                        (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 
176                        c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 
177                        (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 
178                        (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 
179                        c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 
180                        (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 
181                        c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 
182                        (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 
183                        (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 
184                        (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 
185                        c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 
186                        (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 
187                        (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 
188                        (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 
189                        (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 
190                        (c >= '\uAC00' && c <= '\uD7A3');
191        }
192
193        private static boolean isIdeographic(char c) {
194                return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
195        }
196
197        public static String determineEncoding(InputStream stream) throws IOException {
198                stream.mark(20000);
199                try {
200                        int b0 = stream.read();
201                        int b1 = stream.read();
202                        int b2 = stream.read();
203                        int b3 = stream.read();
204
205                        if (b0 == 0xFE && b1 == 0xFF)
206                                return "UTF-16BE";
207                        else if (b0 == 0xFF && b1 == 0xFE)
208                                return "UTF-16LE";
209                        else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF )
210                                return "UTF-8";
211                        else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F)
212                                return "UTF-16BE";
213                        else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00)
214                                return "UTF-16LE";
215                        else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) {
216//                              UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 
217//                              which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 
218//                              declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 
219//                              for the relevant ASCII characters, the encoding declaration itself may be read reliably
220                                InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII");
221                                String hdr = readFirstLine(rdr);
222                                return extractEncoding(hdr); 
223                        } else
224                                return null;
225                } finally {
226                        stream.reset();
227                }
228        }
229
230        private static String extractEncoding(String hdr) {
231                int i = hdr.indexOf("encoding=");
232                if (i == -1)
233                        return null;
234                hdr = hdr.substring(i+9);
235                char sep = hdr.charAt(0);
236                hdr = hdr.substring(1);
237                i = hdr.indexOf(sep);
238                if (i == -1)
239                        return null;
240                return hdr.substring(0, i);
241        }
242
243        private static String readFirstLine(InputStreamReader rdr) throws IOException {
244                char[] buf = new char[1];
245                StringBuffer bldr = new StringBuffer();
246                rdr.read(buf);
247                while (buf[0] != '>') {
248                        bldr.append(buf[0]);
249                        rdr.read(buf);
250                }
251                return bldr.toString();
252        }
253
254        
255    public static boolean charSetImpliesAscii(String charset) {
256                return charset.equals("ISO-8859-1") || charset.equals("US-ASCII");
257        }
258
259        
260        /**
261         * Converts the raw characters to XML escape characters.
262         * 
263         * @param rawContent
264         * @param charset Null when charset is not known, so we assume it's unicode
265         * @param isNoLines
266         * @return escape string
267         */
268        public static String escapeXML(String rawContent, String charset, boolean isNoLines) {
269                if (rawContent == null)
270                        return "";
271                else {
272                        StringBuffer sb = new StringBuffer();
273
274                        for (int i = 0; i < rawContent.length(); i++) {
275                                char ch = rawContent.charAt(i);
276                                if (ch == '\'')
277                                        sb.append("&#39;");
278                                else if (ch == '&')
279                                        sb.append("&amp;");
280                                else if (ch == '"')
281                                        sb.append("&quot;");
282                                else if (ch == '<')
283                                        sb.append("&lt;");
284                                else if (ch == '>')
285                                        sb.append("&gt;");
286                                else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 
287                                        // TODO - why is hashcode the only way to get the unicode number for the character
288                                        // in jre 5.0?
289                                        sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";");
290                                else if (isNoLines) {
291                                        if (ch == '\r')
292                                                sb.append("&#xA;");
293                                        else if (ch != '\n')
294                                                sb.append(ch);
295                                }
296                                else
297                                        sb.append(ch);
298                        }
299                        return sb.toString();
300                }
301        }
302
303  public static Element getFirstChild(Element e) {
304    if (e == null)
305      return null;
306    Node n = e.getFirstChild();
307    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
308      n = n.getNextSibling();
309    return (Element) n;
310  }
311
312  public static Element getNamedChild(Element e, String name) {
313    Element c = getFirstChild(e);
314    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
315      c = getNextSibling(c);
316    return c;
317  }
318
319  public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) {
320    Element c = getFirstChild(e);
321    while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname))))
322      c = getNextSibling(c);
323    return c;
324  }
325
326  public static Element getNextSibling(Element e) {
327    Node n = e.getNextSibling();
328    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
329      n = n.getNextSibling();
330    return (Element) n;
331  }
332
333  public static void getNamedChildren(Element e, String name, List<Element> set) {
334    Element c = getFirstChild(e);
335    while (c != null) {
336      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
337        set.add(c);
338      c = getNextSibling(c);
339    }
340  }
341
342  public static List<Element> getNamedChildren(Element e, String name) {
343    List<Element> res = new ArrayList<Element>();
344    Element c = getFirstChild(e);
345    while (c != null) {
346      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
347        res.add(c);
348      c = getNextSibling(c);
349    }
350    return res;
351  }
352
353  public static String htmlToXmlEscapedPlainText(Element r) {
354    StringBuilder s = new StringBuilder();
355    Node n = r.getFirstChild();
356    boolean ws = false;
357    while (n != null) {
358      if (n.getNodeType() == Node.TEXT_NODE) {
359        String t = n.getTextContent().trim();
360        if (Utilities.noString(t))
361          ws = true;
362        else {
363          if (ws)
364            s.append(" ");
365          ws = false;
366          s.append(t);
367        }
368      }
369      if (n.getNodeType() == Node.ELEMENT_NODE) {
370        if (ws)
371          s.append(" ");
372        ws = false;
373        s.append(htmlToXmlEscapedPlainText((Element) n));
374        if (r.getNodeName().equals("br") || r.getNodeName().equals("p"))
375          s.append("\r\n");
376      }
377      n = n.getNextSibling();      
378    }
379    return s.toString();
380  }
381
382  public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException  {
383    return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement());
384  }
385
386  public static String elementToString(Element el) {
387    if (el == null)
388      return "";
389    Document document = el.getOwnerDocument();
390    DOMImplementationLS domImplLS = (DOMImplementationLS) document
391        .getImplementation();
392    LSSerializer serializer = domImplLS.createLSSerializer();
393    return serializer.writeToString(el);
394  }
395
396  public static String getNamedChildValue(Element element, String name) {
397    Element e = getNamedChild(element, name);
398    return e == null ? null : e.getAttribute("value");
399  }
400
401  public static void setNamedChildValue(Element element, String name, String value) throws FHIRException  {
402    Element e = getNamedChild(element, name);
403    if (e == null)
404      throw new FHIRException("unable to find element "+name);
405    e.setAttribute("value", value);
406  }
407
408
409        public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) {
410    Element c = getFirstChild(focus);
411    while (c != null) {
412        String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
413      if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3))))
414        children.add(c);
415      c = getNextSibling(c);
416    }
417  }
418
419        public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) {
420    Element c = getFirstChild(focus);
421    while (c != null) {
422      String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
423      if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length())))))
424        children.add(c);
425      c = getNextSibling(c);
426    }
427  }
428        
429  public static boolean hasNamedChild(Element e, String name) {
430    Element c = getFirstChild(e);
431    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
432      c = getNextSibling(c);
433    return c != null;
434  }
435
436  public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException  {
437    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
438    factory.setNamespaceAware(false);
439    DocumentBuilder builder = factory.newDocumentBuilder();
440    return builder.parse(new ByteArrayInputStream(content.getBytes()));
441  }
442
443  public static Document parseToDom(byte[] content) throws ParserConfigurationException, SAXException, IOException  {
444    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
445    factory.setNamespaceAware(false);
446    DocumentBuilder builder = factory.newDocumentBuilder();
447    return builder.parse(new ByteArrayInputStream(content));
448  }
449
450  public static Document parseToDom(byte[] content, boolean ns) throws ParserConfigurationException, SAXException, IOException  {
451    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
452    factory.setNamespaceAware(ns);
453    DocumentBuilder builder = factory.newDocumentBuilder();
454    return builder.parse(new ByteArrayInputStream(content));
455  }
456
457  public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException  {
458    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
459    factory.setNamespaceAware(false);
460    DocumentBuilder builder = factory.newDocumentBuilder();
461    return builder.parse(new FileInputStream(filename));
462  }
463
464  public static Element getLastChild(Element e) {
465    if (e == null)
466      return null;
467    Node n = e.getLastChild();
468    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
469      n = n.getPreviousSibling();
470    return (Element) n;
471  }
472
473  public static Element getPrevSibling(Element e) {
474    Node n = e.getPreviousSibling();
475    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
476      n = n.getPreviousSibling();
477    return (Element) n;
478  }
479
480  public static String getNamedChildAttribute(Element element, String name, String aname) {
481    Element e = getNamedChild(element, name);
482    return e == null ? null : e.getAttribute(aname);
483  }
484
485  public static void writeDomToFile(Document doc, String filename) throws TransformerException {
486    TransformerFactory transformerFactory = TransformerFactory.newInstance();
487    Transformer transformer = transformerFactory.newTransformer();
488    DOMSource source = new DOMSource(doc);
489    StreamResult streamResult =  new StreamResult(new File(filename));
490    transformer.transform(source, streamResult);    
491  }
492
493  public static String getXsiType(org.w3c.dom.Element element) {
494    Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type");
495    return (a == null ? null : a.getTextContent());
496    
497  }
498
499        public static String getDirectText(org.w3c.dom.Element node) {
500    Node n = node.getFirstChild();
501    StringBuilder b = new StringBuilder();
502    while (n != null) {
503        if (n.getNodeType() == Node.TEXT_NODE) 
504                b.append(n.getTextContent());
505        n = n.getNextSibling();
506    }
507          return b.toString().trim();
508        }
509
510  public static void deleteByName(Element e, String name) {
511    List<Element> matches = getNamedChildren(e, name);
512    for (Element m : matches)
513      e.removeChild(m);    
514  }
515
516  public static void deleteAttr(Element e, String namespaceURI, String localName) {
517    if (e.hasAttributeNS(namespaceURI, localName))
518      e.removeAttributeNS(namespaceURI, localName);
519    
520  }
521
522  public static Node[] children(Element ed) {
523    Node[] res = new Node[ed.getChildNodes().getLength()];
524    for (int i = 0; i < ed.getChildNodes().getLength(); i++)
525      res[i] = ed.getChildNodes().item(i);
526    return res;
527  }
528
529  public static Element insertChild(Document doc, Element element, String name, String namespace, int indent) {
530    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
531    Element child = doc.createElementNS(namespace, name);
532    element.insertBefore(child, element.getFirstChild());
533    element.insertBefore(node, element.getFirstChild());
534    return child;
535  }
536
537  public static Element insertChild(Document doc, Element element, String name, String namespace, Node before, int indent) {
538    if (before == null) {
539      Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
540      element.insertBefore(node, before);
541    }
542    Element child = doc.createElementNS(namespace, name);
543    element.insertBefore(child, before);
544    if (before != null) {
545      Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
546      element.insertBefore(node, before);
547    }
548    return child;
549  }
550
551  public static void addTextTag(Document doc, Element element, String name, String namespace, String text, int indent) {
552    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
553    element.appendChild(node);
554    Element child = doc.createElementNS(namespace, name);
555    element.appendChild(child);
556    child.setAttribute("value", text);    
557  }
558
559  public static void saveToFile(Element root, OutputStream stream) throws TransformerException {
560    Transformer transformer = TransformerFactory.newInstance().newTransformer();
561    Result output = new StreamResult(stream);
562    Source input = new DOMSource(root);
563
564    transformer.transform(input, output);
565  }
566
567  public static void spacer(Document doc, Element element, int indent) {
568    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
569    element.appendChild(node);
570   
571  }
572
573  public static String getNamedChildText(Element element, String name) {
574    Element e = getNamedChild(element, name);
575    return e == null ? null : e.getTextContent();
576  }
577
578        
579}