001/*
002Copyright (c) 2011+, HL7, Inc
003All rights reserved.
004
005Redistribution and use in source and binary forms, with or without modification, 
006are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this 
009   list of conditions and the following disclaimer.
010 * Redistributions in binary form must reproduce the above copyright notice, 
011   this list of conditions and the following disclaimer in the documentation 
012   and/or other materials provided with the distribution.
013 * Neither the name of HL7 nor the names of its contributors may be used to 
014   endorse or promote products derived from this software without specific 
015   prior written permission.
016
017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
026POSSIBILITY OF SUCH DAMAGE.
027
028*/
029package org.hl7.fhir.utilities.xml;
030
031import java.io.ByteArrayInputStream;
032import java.io.File;
033import java.io.FileInputStream;
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.InputStreamReader;
037import java.io.OutputStream;
038import java.util.ArrayList;
039import java.util.List;
040import java.util.Set;
041
042import javax.xml.parsers.DocumentBuilder;
043import javax.xml.parsers.DocumentBuilderFactory;
044import javax.xml.parsers.ParserConfigurationException;
045import javax.xml.transform.Result;
046import javax.xml.transform.Source;
047import javax.xml.transform.Transformer;
048import javax.xml.transform.TransformerException;
049import javax.xml.transform.TransformerFactory;
050import javax.xml.transform.dom.DOMSource;
051import javax.xml.transform.stream.StreamResult;
052
053import org.hl7.fhir.exceptions.FHIRException;
054import org.hl7.fhir.utilities.Utilities;
055import org.w3c.dom.Attr;
056import org.w3c.dom.Document;
057import org.w3c.dom.Element;
058import org.w3c.dom.Node;
059import org.w3c.dom.ls.DOMImplementationLS;
060import org.w3c.dom.ls.LSSerializer;
061import org.xml.sax.SAXException;
062
063public class XMLUtil {
064
065        public static final String SPACE_CHAR = "\u00A0";
066
067  public static boolean isNMToken(String name) {
068                if (name == null)
069                        return false;
070                for (int i = 0; i < name.length(); i++) 
071                        if (!isNMTokenChar(name.charAt(i)))
072                                return false;   
073                return name.length() > 0;
074        }
075
076        public static boolean isNMTokenChar(char c) {
077                return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c);
078        }
079
080        private static boolean isDigit(char c) {
081                return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 
082                        (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 
083                        (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 
084                        (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 
085                        (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
086        }
087
088        private static boolean isCombiningChar(char c) {
089                return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 
090                        (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 
091                        c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 
092                        c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 
093                        (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 
094                        (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 
095                        (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 
096                        (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 
097                        c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 
098                        (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 
099                        (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 
100                        (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 
101                        (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 
102                        (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 
103                        (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 
104                        (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 
105                        (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 
106                        (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 
107                        (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 
108                        c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 
109                        (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 
110                        (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 
111                        (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 
112                        (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') ||
113                        c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A';
114        }
115
116        private static boolean isExtender(char c) {
117                return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 
118                        c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 
119                        (c >= '\u30FC' && c <= '\u30FE');
120        }
121
122        private static boolean isLetter(char c) {
123                return isBaseChar(c) || isIdeographic(c);
124        }
125
126        private static boolean isBaseChar(char c) {
127                return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 
128                        (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 
129                        (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 
130                        (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 
131                        (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 
132                        c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 
133                        (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 
134                        c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 
135                        (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 
136                        (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 
137                        (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 
138                        c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 
139                        (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 
140                        (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 
141                        c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 
142                        (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 
143                        (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 
144                        (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 
145                        (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 
146                        (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 
147                        (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 
148                        c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 
149                        (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 
150                        (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 
151                        (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 
152                        (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 
153                        c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 
154                        (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 
155                        (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 
156                        (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 
157                        (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 
158                        (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 
159                        (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 
160                        (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 
161                        c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 
162                        (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 
163                        (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 
164                        (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 
165                        c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 
166                        (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 
167                        (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 
168                        (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 
169                        (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 
170                        (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 
171                        (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 
172                        (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 
173                        c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 
174                        (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 
175                        (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 
176                        c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 
177                        (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 
178                        c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 
179                        (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 
180                        (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 
181                        (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 
182                        c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 
183                        (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 
184                        (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 
185                        (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 
186                        (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 
187                        (c >= '\uAC00' && c <= '\uD7A3');
188        }
189
190        private static boolean isIdeographic(char c) {
191                return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
192        }
193
194        public static String determineEncoding(InputStream stream) throws IOException {
195                stream.mark(20000);
196                try {
197                        int b0 = stream.read();
198                        int b1 = stream.read();
199                        int b2 = stream.read();
200                        int b3 = stream.read();
201
202                        if (b0 == 0xFE && b1 == 0xFF)
203                                return "UTF-16BE";
204                        else if (b0 == 0xFF && b1 == 0xFE)
205                                return "UTF-16LE";
206                        else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF )
207                                return "UTF-8";
208                        else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F)
209                                return "UTF-16BE";
210                        else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00)
211                                return "UTF-16LE";
212                        else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) {
213//                              UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 
214//                              which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 
215//                              declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 
216//                              for the relevant ASCII characters, the encoding declaration itself may be read reliably
217                                InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII");
218                                String hdr = readFirstLine(rdr);
219                                return extractEncoding(hdr); 
220                        } else
221                                return null;
222                } finally {
223                        stream.reset();
224                }
225        }
226
227        private static String extractEncoding(String hdr) {
228                int i = hdr.indexOf("encoding=");
229                if (i == -1)
230                        return null;
231                hdr = hdr.substring(i+9);
232                char sep = hdr.charAt(0);
233                hdr = hdr.substring(1);
234                i = hdr.indexOf(sep);
235                if (i == -1)
236                        return null;
237                return hdr.substring(0, i);
238        }
239
240        private static String readFirstLine(InputStreamReader rdr) throws IOException {
241                char[] buf = new char[1];
242                StringBuffer bldr = new StringBuffer();
243                rdr.read(buf);
244                while (buf[0] != '>') {
245                        bldr.append(buf[0]);
246                        rdr.read(buf);
247                }
248                return bldr.toString();
249        }
250
251        
252    public static boolean charSetImpliesAscii(String charset) {
253                return charset.equals("ISO-8859-1") || charset.equals("US-ASCII");
254        }
255
256        
257        /**
258         * Converts the raw characters to XML escape characters.
259         * 
260         * @param rawContent
261         * @param charset Null when charset is not known, so we assume it's unicode
262         * @param isNoLines
263         * @return escape string
264         */
265        public static String escapeXML(String rawContent, String charset, boolean isNoLines) {
266                if (rawContent == null)
267                        return "";
268                else {
269                        StringBuffer sb = new StringBuffer();
270
271                        for (int i = 0; i < rawContent.length(); i++) {
272                                char ch = rawContent.charAt(i);
273                                if (ch == '\'')
274                                        sb.append("&#39;");
275                                else if (ch == '&')
276                                        sb.append("&amp;");
277                                else if (ch == '"')
278                                        sb.append("&quot;");
279                                else if (ch == '<')
280                                        sb.append("&lt;");
281                                else if (ch == '>')
282                                        sb.append("&gt;");
283                                else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 
284                                        // TODO - why is hashcode the only way to get the unicode number for the character
285                                        // in jre 5.0?
286                                        sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";");
287                                else if (isNoLines) {
288                                        if (ch == '\r')
289                                                sb.append("&#xA;");
290                                        else if (ch != '\n')
291                                                sb.append(ch);
292                                }
293                                else
294                                        sb.append(ch);
295                        }
296                        return sb.toString();
297                }
298        }
299
300  public static Element getFirstChild(Element e) {
301    if (e == null)
302      return null;
303    Node n = e.getFirstChild();
304    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
305      n = n.getNextSibling();
306    return (Element) n;
307  }
308
309  public static Element getNamedChild(Element e, String name) {
310    Element c = getFirstChild(e);
311    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
312      c = getNextSibling(c);
313    return c;
314  }
315
316  public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) {
317    Element c = getFirstChild(e);
318    while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname))))
319      c = getNextSibling(c);
320    return c;
321  }
322
323  public static Element getNextSibling(Element e) {
324    Node n = e.getNextSibling();
325    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
326      n = n.getNextSibling();
327    return (Element) n;
328  }
329
330  public static void getNamedChildren(Element e, String name, List<Element> set) {
331    Element c = getFirstChild(e);
332    while (c != null) {
333      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
334        set.add(c);
335      c = getNextSibling(c);
336    }
337  }
338
339  public static List<Element> getNamedChildren(Element e, String name) {
340    List<Element> res = new ArrayList<Element>();
341    Element c = getFirstChild(e);
342    while (c != null) {
343      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
344        res.add(c);
345      c = getNextSibling(c);
346    }
347    return res;
348  }
349
350  public static String htmlToXmlEscapedPlainText(Element r) {
351    StringBuilder s = new StringBuilder();
352    Node n = r.getFirstChild();
353    boolean ws = false;
354    while (n != null) {
355      if (n.getNodeType() == Node.TEXT_NODE) {
356        String t = n.getTextContent().trim();
357        if (Utilities.noString(t))
358          ws = true;
359        else {
360          if (ws)
361            s.append(" ");
362          ws = false;
363          s.append(t);
364        }
365      }
366      if (n.getNodeType() == Node.ELEMENT_NODE) {
367        if (ws)
368          s.append(" ");
369        ws = false;
370        s.append(htmlToXmlEscapedPlainText((Element) n));
371        if (r.getNodeName().equals("br") || r.getNodeName().equals("p"))
372          s.append("\r\n");
373      }
374      n = n.getNextSibling();      
375    }
376    return s.toString();
377  }
378
379  public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException  {
380    return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement());
381  }
382
383  public static String elementToString(Element el) {
384    if (el == null)
385      return "";
386    Document document = el.getOwnerDocument();
387    DOMImplementationLS domImplLS = (DOMImplementationLS) document
388        .getImplementation();
389    LSSerializer serializer = domImplLS.createLSSerializer();
390    return serializer.writeToString(el);
391  }
392
393  public static String getNamedChildValue(Element element, String name) {
394    Element e = getNamedChild(element, name);
395    return e == null ? null : e.getAttribute("value");
396  }
397
398  public static void setNamedChildValue(Element element, String name, String value) throws FHIRException  {
399    Element e = getNamedChild(element, name);
400    if (e == null)
401      throw new FHIRException("unable to find element "+name);
402    e.setAttribute("value", value);
403  }
404
405
406        public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) {
407    Element c = getFirstChild(focus);
408    while (c != null) {
409        String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
410      if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3))))
411        children.add(c);
412      c = getNextSibling(c);
413    }
414  }
415
416        public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) {
417    Element c = getFirstChild(focus);
418    while (c != null) {
419      String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
420      if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length())))))
421        children.add(c);
422      c = getNextSibling(c);
423    }
424  }
425        
426  public static boolean hasNamedChild(Element e, String name) {
427    Element c = getFirstChild(e);
428    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
429      c = getNextSibling(c);
430    return c != null;
431  }
432
433  public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException  {
434    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
435    factory.setNamespaceAware(false);
436    DocumentBuilder builder = factory.newDocumentBuilder();
437    return builder.parse(new ByteArrayInputStream(content.getBytes()));
438  }
439
440  public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException  {
441    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
442    factory.setNamespaceAware(false);
443    DocumentBuilder builder = factory.newDocumentBuilder();
444    return builder.parse(new FileInputStream(filename));
445  }
446
447  public static Element getLastChild(Element e) {
448    if (e == null)
449      return null;
450    Node n = e.getLastChild();
451    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
452      n = n.getPreviousSibling();
453    return (Element) n;
454  }
455
456  public static Element getPrevSibling(Element e) {
457    Node n = e.getPreviousSibling();
458    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
459      n = n.getPreviousSibling();
460    return (Element) n;
461  }
462
463  public static String getNamedChildAttribute(Element element, String name, String aname) {
464    Element e = getNamedChild(element, name);
465    return e == null ? null : e.getAttribute(aname);
466  }
467
468  public static void writeDomToFile(Document doc, String filename) throws TransformerException {
469    TransformerFactory transformerFactory = TransformerFactory.newInstance();
470    Transformer transformer = transformerFactory.newTransformer();
471    DOMSource source = new DOMSource(doc);
472    StreamResult streamResult =  new StreamResult(new File(filename));
473    transformer.transform(source, streamResult);    
474  }
475
476  public static String getXsiType(org.w3c.dom.Element element) {
477    Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type");
478    return (a == null ? null : a.getTextContent());
479    
480  }
481
482        public static String getDirectText(org.w3c.dom.Element node) {
483    Node n = node.getFirstChild();
484    StringBuilder b = new StringBuilder();
485    while (n != null) {
486        if (n.getNodeType() == Node.TEXT_NODE) 
487                b.append(n.getTextContent());
488        n = n.getNextSibling();
489    }
490          return b.toString().trim();
491        }
492
493  public static void deleteByName(Element e, String name) {
494    List<Element> matches = getNamedChildren(e, name);
495    for (Element m : matches)
496      e.removeChild(m);    
497  }
498
499  public static void deleteAttr(Element e, String namespaceURI, String localName) {
500    if (e.hasAttributeNS(namespaceURI, localName))
501      e.removeAttributeNS(namespaceURI, localName);
502    
503  }
504
505  public static Node[] children(Element ed) {
506    Node[] res = new Node[ed.getChildNodes().getLength()];
507    for (int i = 0; i < ed.getChildNodes().getLength(); i++)
508      res[i] = ed.getChildNodes().item(i);
509    return res;
510  }
511
512  public static Element insertChild(Document doc, Element element, String name, String namespace, int indent) {
513    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
514    Element child = doc.createElementNS(namespace, name);
515    element.insertBefore(child, element.getFirstChild());
516    element.insertBefore(node, element.getFirstChild());
517    return child;
518  }
519
520  public static Element insertChild(Document doc, Element element, String name, String namespace, Node before, int indent) {
521    if (before == null) {
522      Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
523      element.insertBefore(node, before);
524    }
525    Element child = doc.createElementNS(namespace, name);
526    element.insertBefore(child, before);
527    if (before != null) {
528      Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
529      element.insertBefore(node, before);
530    }
531    return child;
532  }
533
534  public static void addTextTag(Document doc, Element element, String name, String namespace, String text, int indent) {
535    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
536    element.appendChild(node);
537    Element child = doc.createElementNS(namespace, name);
538    element.appendChild(child);
539    child.setAttribute("value", text);    
540  }
541
542  public static void saveToFile(Element root, OutputStream stream) throws TransformerException {
543    Transformer transformer = TransformerFactory.newInstance().newTransformer();
544    Result output = new StreamResult(stream);
545    Source input = new DOMSource(root);
546
547    transformer.transform(input, output);
548  }
549
550  public static void spacer(Document doc, Element element, int indent) {
551    Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent));
552    element.appendChild(node);
553   
554  }
555
556  public static String getNamedChildText(Element element, String name) {
557    Element e = getNamedChild(element, name);
558    return e == null ? null : e.getTextContent();
559  }
560
561        
562}