001/*
002Copyright (c) 2011+, HL7, Inc
003All rights reserved.
004
005Redistribution and use in source and binary forms, with or without modification, 
006are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this 
009   list of conditions and the following disclaimer.
010 * Redistributions in binary form must reproduce the above copyright notice, 
011   this list of conditions and the following disclaimer in the documentation 
012   and/or other materials provided with the distribution.
013 * Neither the name of HL7 nor the names of its contributors may be used to 
014   endorse or promote products derived from this software without specific 
015   prior written permission.
016
017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
026POSSIBILITY OF SUCH DAMAGE.
027
028*/
029package org.hl7.fhir.utilities.xml;
030
031import java.io.ByteArrayInputStream;
032import java.io.File;
033import java.io.FileInputStream;
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.InputStreamReader;
037import java.util.ArrayList;
038import java.util.List;
039import java.util.Set;
040
041import javax.xml.parsers.DocumentBuilder;
042import javax.xml.parsers.DocumentBuilderFactory;
043import javax.xml.parsers.ParserConfigurationException;
044import javax.xml.transform.Transformer;
045import javax.xml.transform.TransformerException;
046import javax.xml.transform.TransformerFactory;
047import javax.xml.transform.dom.DOMSource;
048import javax.xml.transform.stream.StreamResult;
049
050import org.hl7.fhir.exceptions.FHIRException;
051import org.hl7.fhir.utilities.Utilities;
052import org.w3c.dom.Attr;
053import org.w3c.dom.Document;
054import org.w3c.dom.Element;
055import org.w3c.dom.Node;
056import org.w3c.dom.ls.DOMImplementationLS;
057import org.w3c.dom.ls.LSSerializer;
058import org.xml.sax.SAXException;
059
060public class XMLUtil {
061
062        public static final String SPACE_CHAR = "\u00A0";
063
064  public static boolean isNMToken(String name) {
065                if (name == null)
066                        return false;
067                for (int i = 0; i < name.length(); i++) 
068                        if (!isNMTokenChar(name.charAt(i)))
069                                return false;   
070                return name.length() > 0;
071        }
072
073        public static boolean isNMTokenChar(char c) {
074                return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c);
075        }
076
077        private static boolean isDigit(char c) {
078                return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 
079                        (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 
080                        (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 
081                        (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 
082                        (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29');
083        }
084
085        private static boolean isCombiningChar(char c) {
086                return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 
087                        (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 
088                        c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 
089                        c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 
090                        (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 
091                        (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 
092                        (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 
093                        (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 
094                        c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 
095                        (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 
096                        (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 
097                        (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 
098                        (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 
099                        (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 
100                        (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 
101                        (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 
102                        (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 
103                        (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 
104                        (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 
105                        c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 
106                        (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 
107                        (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 
108                        (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 
109                        (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') ||
110                        c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A';
111        }
112
113        private static boolean isExtender(char c) {
114                return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 
115                        c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 
116                        (c >= '\u30FC' && c <= '\u30FE');
117        }
118
119        private static boolean isLetter(char c) {
120                return isBaseChar(c) || isIdeographic(c);
121        }
122
123        private static boolean isBaseChar(char c) {
124                return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 
125                        (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 
126                        (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 
127                        (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 
128                        (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 
129                        c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 
130                        (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 
131                        c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 
132                        (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 
133                        (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 
134                        (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 
135                        c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 
136                        (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 
137                        (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 
138                        c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 
139                        (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 
140                        (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 
141                        (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 
142                        (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 
143                        (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 
144                        (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 
145                        c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 
146                        (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 
147                        (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 
148                        (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 
149                        (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 
150                        c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 
151                        (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 
152                        (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 
153                        (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 
154                        (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 
155                        (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 
156                        (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 
157                        (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 
158                        c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 
159                        (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 
160                        (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 
161                        (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 
162                        c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 
163                        (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 
164                        (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 
165                        (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 
166                        (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 
167                        (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 
168                        (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 
169                        (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 
170                        c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 
171                        (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 
172                        (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 
173                        c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 
174                        (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 
175                        c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 
176                        (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 
177                        (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 
178                        (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 
179                        c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 
180                        (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 
181                        (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 
182                        (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 
183                        (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 
184                        (c >= '\uAC00' && c <= '\uD7A3');
185        }
186
187        private static boolean isIdeographic(char c) {
188                return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029');
189        }
190
191        public static String determineEncoding(InputStream stream) throws IOException {
192                stream.mark(20000);
193                try {
194                        int b0 = stream.read();
195                        int b1 = stream.read();
196                        int b2 = stream.read();
197                        int b3 = stream.read();
198
199                        if (b0 == 0xFE && b1 == 0xFF)
200                                return "UTF-16BE";
201                        else if (b0 == 0xFF && b1 == 0xFE)
202                                return "UTF-16LE";
203                        else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF )
204                                return "UTF-8";
205                        else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F)
206                                return "UTF-16BE";
207                        else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00)
208                                return "UTF-16LE";
209                        else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) {
210//                              UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 
211//                              which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 
212//                              declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 
213//                              for the relevant ASCII characters, the encoding declaration itself may be read reliably
214                                InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII");
215                                String hdr = readFirstLine(rdr);
216                                return extractEncoding(hdr); 
217                        } else
218                                return null;
219                } finally {
220                        stream.reset();
221                }
222        }
223
224        private static String extractEncoding(String hdr) {
225                int i = hdr.indexOf("encoding=");
226                if (i == -1)
227                        return null;
228                hdr = hdr.substring(i+9);
229                char sep = hdr.charAt(0);
230                hdr = hdr.substring(1);
231                i = hdr.indexOf(sep);
232                if (i == -1)
233                        return null;
234                return hdr.substring(0, i);
235        }
236
237        private static String readFirstLine(InputStreamReader rdr) throws IOException {
238                char[] buf = new char[1];
239                StringBuffer bldr = new StringBuffer();
240                rdr.read(buf);
241                while (buf[0] != '>') {
242                        bldr.append(buf[0]);
243                        rdr.read(buf);
244                }
245                return bldr.toString();
246        }
247
248        
249    public static boolean charSetImpliesAscii(String charset) {
250                return charset.equals("ISO-8859-1") || charset.equals("US-ASCII");
251        }
252
253        
254        /**
255         * Converts the raw characters to XML escape characters.
256         * 
257         * @param rawContent
258         * @param charset Null when charset is not known, so we assume it's unicode
259         * @param isNoLines
260         * @return escape string
261         */
262        public static String escapeXML(String rawContent, String charset, boolean isNoLines) {
263                if (rawContent == null)
264                        return "";
265                else {
266                        StringBuffer sb = new StringBuffer();
267
268                        for (int i = 0; i < rawContent.length(); i++) {
269                                char ch = rawContent.charAt(i);
270                                if (ch == '\'')
271                                        sb.append("&#39;");
272                                else if (ch == '&')
273                                        sb.append("&amp;");
274                                else if (ch == '"')
275                                        sb.append("&quot;");
276                                else if (ch == '<')
277                                        sb.append("&lt;");
278                                else if (ch == '>')
279                                        sb.append("&gt;");
280                                else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 
281                                        // TODO - why is hashcode the only way to get the unicode number for the character
282                                        // in jre 5.0?
283                                        sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";");
284                                else if (isNoLines) {
285                                        if (ch == '\r')
286                                                sb.append("&#xA;");
287                                        else if (ch != '\n')
288                                                sb.append(ch);
289                                }
290                                else
291                                        sb.append(ch);
292                        }
293                        return sb.toString();
294                }
295        }
296
297  public static Element getFirstChild(Element e) {
298    if (e == null)
299      return null;
300    Node n = e.getFirstChild();
301    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
302      n = n.getNextSibling();
303    return (Element) n;
304  }
305
306  public static Element getNamedChild(Element e, String name) {
307    Element c = getFirstChild(e);
308    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
309      c = getNextSibling(c);
310    return c;
311  }
312
313  public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) {
314    Element c = getFirstChild(e);
315    while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname))))
316      c = getNextSibling(c);
317    return c;
318  }
319
320  public static Element getNextSibling(Element e) {
321    Node n = e.getNextSibling();
322    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
323      n = n.getNextSibling();
324    return (Element) n;
325  }
326
327  public static void getNamedChildren(Element e, String name, List<Element> set) {
328    Element c = getFirstChild(e);
329    while (c != null) {
330      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
331        set.add(c);
332      c = getNextSibling(c);
333    }
334  }
335
336  public static List<Element> getNamedChildren(Element e, String name) {
337    List<Element> res = new ArrayList<Element>();
338    Element c = getFirstChild(e);
339    while (c != null) {
340      if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) )
341        res.add(c);
342      c = getNextSibling(c);
343    }
344    return res;
345  }
346
347  public static String htmlToXmlEscapedPlainText(Element r) {
348    StringBuilder s = new StringBuilder();
349    Node n = r.getFirstChild();
350    boolean ws = false;
351    while (n != null) {
352      if (n.getNodeType() == Node.TEXT_NODE) {
353        String t = n.getTextContent().trim();
354        if (Utilities.noString(t))
355          ws = true;
356        else {
357          if (ws)
358            s.append(" ");
359          ws = false;
360          s.append(t);
361        }
362      }
363      if (n.getNodeType() == Node.ELEMENT_NODE) {
364        if (ws)
365          s.append(" ");
366        ws = false;
367        s.append(htmlToXmlEscapedPlainText((Element) n));
368        if (r.getNodeName().equals("br") || r.getNodeName().equals("p"))
369          s.append("\r\n");
370      }
371      n = n.getNextSibling();      
372    }
373    return s.toString();
374  }
375
376  public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException  {
377    return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement());
378  }
379
380  public static String elementToString(Element el) {
381    if (el == null)
382      return "";
383    Document document = el.getOwnerDocument();
384    DOMImplementationLS domImplLS = (DOMImplementationLS) document
385        .getImplementation();
386    LSSerializer serializer = domImplLS.createLSSerializer();
387    return serializer.writeToString(el);
388  }
389
390  public static String getNamedChildValue(Element element, String name) {
391    Element e = getNamedChild(element, name);
392    return e == null ? null : e.getAttribute("value");
393  }
394
395  public static void setNamedChildValue(Element element, String name, String value) throws FHIRException  {
396    Element e = getNamedChild(element, name);
397    if (e == null)
398      throw new FHIRException("unable to find element "+name);
399    e.setAttribute("value", value);
400  }
401
402
403        public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) {
404    Element c = getFirstChild(focus);
405    while (c != null) {
406        String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
407      if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3))))
408        children.add(c);
409      c = getNextSibling(c);
410    }
411  }
412
413        public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) {
414    Element c = getFirstChild(focus);
415    while (c != null) {
416      String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 
417      if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length())))))
418        children.add(c);
419      c = getNextSibling(c);
420    }
421  }
422        
423  public static boolean hasNamedChild(Element e, String name) {
424    Element c = getFirstChild(e);
425    while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName()))
426      c = getNextSibling(c);
427    return c != null;
428  }
429
430  public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException  {
431    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
432    factory.setNamespaceAware(false);
433    DocumentBuilder builder = factory.newDocumentBuilder();
434    return builder.parse(new ByteArrayInputStream(content.getBytes()));
435  }
436
437  public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException  {
438    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
439    factory.setNamespaceAware(false);
440    DocumentBuilder builder = factory.newDocumentBuilder();
441    return builder.parse(new FileInputStream(filename));
442  }
443
444  public static Element getLastChild(Element e) {
445    if (e == null)
446      return null;
447    Node n = e.getLastChild();
448    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
449      n = n.getPreviousSibling();
450    return (Element) n;
451  }
452
453  public static Element getPrevSibling(Element e) {
454    Node n = e.getPreviousSibling();
455    while (n != null && n.getNodeType() != Node.ELEMENT_NODE)
456      n = n.getPreviousSibling();
457    return (Element) n;
458  }
459
460  public static String getNamedChildAttribute(Element element, String name, String aname) {
461    Element e = getNamedChild(element, name);
462    return e == null ? null : e.getAttribute(aname);
463  }
464
465  public static void writeDomToFile(Document doc, String filename) throws TransformerException {
466    TransformerFactory transformerFactory = TransformerFactory.newInstance();
467    Transformer transformer = transformerFactory.newTransformer();
468    DOMSource source = new DOMSource(doc);
469    StreamResult streamResult =  new StreamResult(new File(filename));
470    transformer.transform(source, streamResult);    
471  }
472
473  public static String getXsiType(org.w3c.dom.Element element) {
474    Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type");
475    return (a == null ? null : a.getTextContent());
476    
477  }
478
479        public static String getDirectText(org.w3c.dom.Element node) {
480    Node n = node.getFirstChild();
481    StringBuilder b = new StringBuilder();
482    while (n != null) {
483        if (n.getNodeType() == Node.TEXT_NODE) 
484                b.append(n.getTextContent());
485        n = n.getNextSibling();
486    }
487          return b.toString().trim();
488        }
489
490  public static void deleteByName(Element e, String name) {
491    List<Element> matches = getNamedChildren(e, name);
492    for (Element m : matches)
493      e.removeChild(m);    
494  }
495
496  public static void deleteAttr(Element e, String namespaceURI, String localName) {
497    if (e.hasAttributeNS(namespaceURI, localName))
498      e.removeAttributeNS(namespaceURI, localName);
499    
500  }
501
502        
503}