001/* 002Copyright (c) 2011+, HL7, Inc 003All rights reserved. 004 005Redistribution and use in source and binary forms, with or without modification, 006are permitted provided that the following conditions are met: 007 008 * Redistributions of source code must retain the above copyright notice, this 009 list of conditions and the following disclaimer. 010 * Redistributions in binary form must reproduce the above copyright notice, 011 this list of conditions and the following disclaimer in the documentation 012 and/or other materials provided with the distribution. 013 * Neither the name of HL7 nor the names of its contributors may be used to 014 endorse or promote products derived from this software without specific 015 prior written permission. 016 017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 026POSSIBILITY OF SUCH DAMAGE. 027 028*/ 029package org.hl7.fhir.utilities.xml; 030 031import java.io.ByteArrayInputStream; 032import java.io.File; 033import java.io.FileInputStream; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.InputStreamReader; 037import java.io.OutputStream; 038import java.util.ArrayList; 039import java.util.List; 040import java.util.Set; 041 042import javax.xml.parsers.DocumentBuilder; 043import javax.xml.parsers.DocumentBuilderFactory; 044import javax.xml.parsers.ParserConfigurationException; 045import javax.xml.transform.Result; 046import javax.xml.transform.Source; 047import javax.xml.transform.Transformer; 048import javax.xml.transform.TransformerException; 049import javax.xml.transform.TransformerFactory; 050import javax.xml.transform.dom.DOMSource; 051import javax.xml.transform.stream.StreamResult; 052 053import org.hl7.fhir.exceptions.FHIRException; 054import org.hl7.fhir.utilities.Utilities; 055import org.w3c.dom.Attr; 056import org.w3c.dom.Document; 057import org.w3c.dom.Element; 058import org.w3c.dom.Node; 059import org.w3c.dom.ls.DOMImplementationLS; 060import org.w3c.dom.ls.LSSerializer; 061import org.xml.sax.SAXException; 062 063public class XMLUtil { 064 065 public static final String SPACE_CHAR = "\u00A0"; 066 067 public static boolean isNMToken(String name) { 068 if (name == null) 069 return false; 070 for (int i = 0; i < name.length(); i++) 071 if (!isNMTokenChar(name.charAt(i))) 072 return false; 073 return name.length() > 0; 074 } 075 076 public static boolean isNMTokenChar(char c) { 077 return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c); 078 } 079 080 private static boolean isDigit(char c) { 081 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 082 (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 083 (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 084 (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 085 (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29'); 086 } 087 088 private static boolean isCombiningChar(char c) { 089 return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 090 (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 091 c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 092 c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 093 (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 094 (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 095 (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 096 (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 097 c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 098 (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 099 (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 100 (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 101 (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 102 (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 103 (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 104 (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 105 (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 106 (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 107 (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 108 c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 109 (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 110 (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 111 (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 112 (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') || 113 c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A'; 114 } 115 116 private static boolean isExtender(char c) { 117 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 118 c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 119 (c >= '\u30FC' && c <= '\u30FE'); 120 } 121 122 private static boolean isLetter(char c) { 123 return isBaseChar(c) || isIdeographic(c); 124 } 125 126 private static boolean isBaseChar(char c) { 127 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 128 (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 129 (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 130 (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 131 (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 132 c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 133 (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 134 c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 135 (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 136 (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 137 (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 138 c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 139 (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 140 (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 141 c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 142 (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 143 (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 144 (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 145 (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 146 (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 147 (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 148 c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 149 (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 150 (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 151 (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 152 (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 153 c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 154 (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 155 (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 156 (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 157 (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 158 (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 159 (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 160 (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 161 c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 162 (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 163 (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 164 (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 165 c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 166 (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 167 (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 168 (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 169 (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 170 (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 171 (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 172 (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 173 c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 174 (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 175 (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 176 c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 177 (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 178 c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 179 (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 180 (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 181 (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 182 c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 183 (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 184 (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 185 (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 186 (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 187 (c >= '\uAC00' && c <= '\uD7A3'); 188 } 189 190 private static boolean isIdeographic(char c) { 191 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029'); 192 } 193 194 public static String determineEncoding(InputStream stream) throws IOException { 195 stream.mark(20000); 196 try { 197 int b0 = stream.read(); 198 int b1 = stream.read(); 199 int b2 = stream.read(); 200 int b3 = stream.read(); 201 202 if (b0 == 0xFE && b1 == 0xFF) 203 return "UTF-16BE"; 204 else if (b0 == 0xFF && b1 == 0xFE) 205 return "UTF-16LE"; 206 else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF ) 207 return "UTF-8"; 208 else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) 209 return "UTF-16BE"; 210 else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) 211 return "UTF-16LE"; 212 else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) { 213// UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 214// which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 215// declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 216// for the relevant ASCII characters, the encoding declaration itself may be read reliably 217 InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII"); 218 String hdr = readFirstLine(rdr); 219 return extractEncoding(hdr); 220 } else 221 return null; 222 } finally { 223 stream.reset(); 224 } 225 } 226 227 private static String extractEncoding(String hdr) { 228 int i = hdr.indexOf("encoding="); 229 if (i == -1) 230 return null; 231 hdr = hdr.substring(i+9); 232 char sep = hdr.charAt(0); 233 hdr = hdr.substring(1); 234 i = hdr.indexOf(sep); 235 if (i == -1) 236 return null; 237 return hdr.substring(0, i); 238 } 239 240 private static String readFirstLine(InputStreamReader rdr) throws IOException { 241 char[] buf = new char[1]; 242 StringBuffer bldr = new StringBuffer(); 243 rdr.read(buf); 244 while (buf[0] != '>') { 245 bldr.append(buf[0]); 246 rdr.read(buf); 247 } 248 return bldr.toString(); 249 } 250 251 252 public static boolean charSetImpliesAscii(String charset) { 253 return charset.equals("ISO-8859-1") || charset.equals("US-ASCII"); 254 } 255 256 257 /** 258 * Converts the raw characters to XML escape characters. 259 * 260 * @param rawContent 261 * @param charset Null when charset is not known, so we assume it's unicode 262 * @param isNoLines 263 * @return escape string 264 */ 265 public static String escapeXML(String rawContent, String charset, boolean isNoLines) { 266 if (rawContent == null) 267 return ""; 268 else { 269 StringBuffer sb = new StringBuffer(); 270 271 for (int i = 0; i < rawContent.length(); i++) { 272 char ch = rawContent.charAt(i); 273 if (ch == '\'') 274 sb.append("'"); 275 else if (ch == '&') 276 sb.append("&"); 277 else if (ch == '"') 278 sb.append("""); 279 else if (ch == '<') 280 sb.append("<"); 281 else if (ch == '>') 282 sb.append(">"); 283 else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 284 // TODO - why is hashcode the only way to get the unicode number for the character 285 // in jre 5.0? 286 sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";"); 287 else if (isNoLines) { 288 if (ch == '\r') 289 sb.append("
"); 290 else if (ch != '\n') 291 sb.append(ch); 292 } 293 else 294 sb.append(ch); 295 } 296 return sb.toString(); 297 } 298 } 299 300 public static Element getFirstChild(Element e) { 301 if (e == null) 302 return null; 303 Node n = e.getFirstChild(); 304 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 305 n = n.getNextSibling(); 306 return (Element) n; 307 } 308 309 public static Element getNamedChild(Element e, String name) { 310 Element c = getFirstChild(e); 311 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 312 c = getNextSibling(c); 313 return c; 314 } 315 316 public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) { 317 Element c = getFirstChild(e); 318 while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname)))) 319 c = getNextSibling(c); 320 return c; 321 } 322 323 public static Element getNextSibling(Element e) { 324 Node n = e.getNextSibling(); 325 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 326 n = n.getNextSibling(); 327 return (Element) n; 328 } 329 330 public static void getNamedChildren(Element e, String name, List<Element> set) { 331 Element c = getFirstChild(e); 332 while (c != null) { 333 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 334 set.add(c); 335 c = getNextSibling(c); 336 } 337 } 338 339 public static List<Element> getNamedChildren(Element e, String name) { 340 List<Element> res = new ArrayList<Element>(); 341 Element c = getFirstChild(e); 342 while (c != null) { 343 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 344 res.add(c); 345 c = getNextSibling(c); 346 } 347 return res; 348 } 349 350 public static String htmlToXmlEscapedPlainText(Element r) { 351 StringBuilder s = new StringBuilder(); 352 Node n = r.getFirstChild(); 353 boolean ws = false; 354 while (n != null) { 355 if (n.getNodeType() == Node.TEXT_NODE) { 356 String t = n.getTextContent().trim(); 357 if (Utilities.noString(t)) 358 ws = true; 359 else { 360 if (ws) 361 s.append(" "); 362 ws = false; 363 s.append(t); 364 } 365 } 366 if (n.getNodeType() == Node.ELEMENT_NODE) { 367 if (ws) 368 s.append(" "); 369 ws = false; 370 s.append(htmlToXmlEscapedPlainText((Element) n)); 371 if (r.getNodeName().equals("br") || r.getNodeName().equals("p")) 372 s.append("\r\n"); 373 } 374 n = n.getNextSibling(); 375 } 376 return s.toString(); 377 } 378 379 public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException { 380 return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement()); 381 } 382 383 public static String elementToString(Element el) { 384 if (el == null) 385 return ""; 386 Document document = el.getOwnerDocument(); 387 DOMImplementationLS domImplLS = (DOMImplementationLS) document 388 .getImplementation(); 389 LSSerializer serializer = domImplLS.createLSSerializer(); 390 return serializer.writeToString(el); 391 } 392 393 public static String getNamedChildValue(Element element, String name) { 394 Element e = getNamedChild(element, name); 395 return e == null ? null : e.getAttribute("value"); 396 } 397 398 public static void setNamedChildValue(Element element, String name, String value) throws FHIRException { 399 Element e = getNamedChild(element, name); 400 if (e == null) 401 throw new FHIRException("unable to find element "+name); 402 e.setAttribute("value", value); 403 } 404 405 406 public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) { 407 Element c = getFirstChild(focus); 408 while (c != null) { 409 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 410 if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3)))) 411 children.add(c); 412 c = getNextSibling(c); 413 } 414 } 415 416 public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) { 417 Element c = getFirstChild(focus); 418 while (c != null) { 419 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 420 if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length()))))) 421 children.add(c); 422 c = getNextSibling(c); 423 } 424 } 425 426 public static boolean hasNamedChild(Element e, String name) { 427 Element c = getFirstChild(e); 428 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 429 c = getNextSibling(c); 430 return c != null; 431 } 432 433 public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException { 434 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 435 factory.setNamespaceAware(false); 436 DocumentBuilder builder = factory.newDocumentBuilder(); 437 return builder.parse(new ByteArrayInputStream(content.getBytes())); 438 } 439 440 public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException { 441 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 442 factory.setNamespaceAware(false); 443 DocumentBuilder builder = factory.newDocumentBuilder(); 444 return builder.parse(new FileInputStream(filename)); 445 } 446 447 public static Element getLastChild(Element e) { 448 if (e == null) 449 return null; 450 Node n = e.getLastChild(); 451 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 452 n = n.getPreviousSibling(); 453 return (Element) n; 454 } 455 456 public static Element getPrevSibling(Element e) { 457 Node n = e.getPreviousSibling(); 458 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 459 n = n.getPreviousSibling(); 460 return (Element) n; 461 } 462 463 public static String getNamedChildAttribute(Element element, String name, String aname) { 464 Element e = getNamedChild(element, name); 465 return e == null ? null : e.getAttribute(aname); 466 } 467 468 public static void writeDomToFile(Document doc, String filename) throws TransformerException { 469 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 470 Transformer transformer = transformerFactory.newTransformer(); 471 DOMSource source = new DOMSource(doc); 472 StreamResult streamResult = new StreamResult(new File(filename)); 473 transformer.transform(source, streamResult); 474 } 475 476 public static String getXsiType(org.w3c.dom.Element element) { 477 Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type"); 478 return (a == null ? null : a.getTextContent()); 479 480 } 481 482 public static String getDirectText(org.w3c.dom.Element node) { 483 Node n = node.getFirstChild(); 484 StringBuilder b = new StringBuilder(); 485 while (n != null) { 486 if (n.getNodeType() == Node.TEXT_NODE) 487 b.append(n.getTextContent()); 488 n = n.getNextSibling(); 489 } 490 return b.toString().trim(); 491 } 492 493 public static void deleteByName(Element e, String name) { 494 List<Element> matches = getNamedChildren(e, name); 495 for (Element m : matches) 496 e.removeChild(m); 497 } 498 499 public static void deleteAttr(Element e, String namespaceURI, String localName) { 500 if (e.hasAttributeNS(namespaceURI, localName)) 501 e.removeAttributeNS(namespaceURI, localName); 502 503 } 504 505 public static Node[] children(Element ed) { 506 Node[] res = new Node[ed.getChildNodes().getLength()]; 507 for (int i = 0; i < ed.getChildNodes().getLength(); i++) 508 res[i] = ed.getChildNodes().item(i); 509 return res; 510 } 511 512 public static Element insertChild(Document doc, Element element, String name, String namespace, int indent) { 513 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 514 Element child = doc.createElementNS(namespace, name); 515 element.insertBefore(child, element.getFirstChild()); 516 element.insertBefore(node, element.getFirstChild()); 517 return child; 518 } 519 520 public static Element insertChild(Document doc, Element element, String name, String namespace, Node before, int indent) { 521 if (before == null) { 522 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 523 element.insertBefore(node, before); 524 } 525 Element child = doc.createElementNS(namespace, name); 526 element.insertBefore(child, before); 527 if (before != null) { 528 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 529 element.insertBefore(node, before); 530 } 531 return child; 532 } 533 534 public static void addTextTag(Document doc, Element element, String name, String namespace, String text, int indent) { 535 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 536 element.appendChild(node); 537 Element child = doc.createElementNS(namespace, name); 538 element.appendChild(child); 539 child.setAttribute("value", text); 540 } 541 542 public static void saveToFile(Element root, OutputStream stream) throws TransformerException { 543 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 544 Result output = new StreamResult(stream); 545 Source input = new DOMSource(root); 546 547 transformer.transform(input, output); 548 } 549 550 public static void spacer(Document doc, Element element, int indent) { 551 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 552 element.appendChild(node); 553 554 } 555 556 public static String getNamedChildText(Element element, String name) { 557 Element e = getNamedChild(element, name); 558 return e == null ? null : e.getTextContent(); 559 } 560 561 562}