001/* 002Copyright (c) 2011+, HL7, Inc 003All rights reserved. 004 005Redistribution and use in source and binary forms, with or without modification, 006are permitted provided that the following conditions are met: 007 008 * Redistributions of source code must retain the above copyright notice, this 009 list of conditions and the following disclaimer. 010 * Redistributions in binary form must reproduce the above copyright notice, 011 this list of conditions and the following disclaimer in the documentation 012 and/or other materials provided with the distribution. 013 * Neither the name of HL7 nor the names of its contributors may be used to 014 endorse or promote products derived from this software without specific 015 prior written permission. 016 017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 026POSSIBILITY OF SUCH DAMAGE. 027 028*/ 029package org.hl7.fhir.utilities.xml; 030 031import java.io.ByteArrayInputStream; 032import java.io.File; 033import java.io.FileInputStream; 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.InputStreamReader; 037import java.util.ArrayList; 038import java.util.List; 039import java.util.Set; 040 041import javax.xml.parsers.DocumentBuilder; 042import javax.xml.parsers.DocumentBuilderFactory; 043import javax.xml.parsers.ParserConfigurationException; 044import javax.xml.transform.Transformer; 045import javax.xml.transform.TransformerException; 046import javax.xml.transform.TransformerFactory; 047import javax.xml.transform.dom.DOMSource; 048import javax.xml.transform.stream.StreamResult; 049 050import org.hl7.fhir.exceptions.FHIRException; 051import org.hl7.fhir.utilities.Utilities; 052import org.w3c.dom.Attr; 053import org.w3c.dom.Document; 054import org.w3c.dom.Element; 055import org.w3c.dom.Node; 056import org.w3c.dom.ls.DOMImplementationLS; 057import org.w3c.dom.ls.LSSerializer; 058import org.xml.sax.SAXException; 059 060public class XMLUtil { 061 062 public static final String SPACE_CHAR = "\u00A0"; 063 064 public static boolean isNMToken(String name) { 065 if (name == null) 066 return false; 067 for (int i = 0; i < name.length(); i++) 068 if (!isNMTokenChar(name.charAt(i))) 069 return false; 070 return name.length() > 0; 071 } 072 073 public static boolean isNMTokenChar(char c) { 074 return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c); 075 } 076 077 private static boolean isDigit(char c) { 078 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 079 (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 080 (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 081 (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 082 (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29'); 083 } 084 085 private static boolean isCombiningChar(char c) { 086 return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 087 (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 088 c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 089 c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 090 (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 091 (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 092 (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 093 (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 094 c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 095 (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 096 (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 097 (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 098 (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 099 (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 100 (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 101 (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 102 (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 103 (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 104 (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 105 c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 106 (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 107 (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 108 (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 109 (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') || 110 c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A'; 111 } 112 113 private static boolean isExtender(char c) { 114 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 115 c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 116 (c >= '\u30FC' && c <= '\u30FE'); 117 } 118 119 private static boolean isLetter(char c) { 120 return isBaseChar(c) || isIdeographic(c); 121 } 122 123 private static boolean isBaseChar(char c) { 124 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 125 (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 126 (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 127 (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 128 (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 129 c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 130 (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 131 c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 132 (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 133 (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 134 (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 135 c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 136 (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 137 (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 138 c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 139 (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 140 (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 141 (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 142 (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 143 (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 144 (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 145 c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 146 (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 147 (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 148 (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 149 (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 150 c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 151 (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 152 (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 153 (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 154 (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 155 (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 156 (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 157 (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 158 c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 159 (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 160 (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 161 (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 162 c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 163 (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 164 (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 165 (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 166 (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 167 (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 168 (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 169 (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 170 c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 171 (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 172 (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 173 c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 174 (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 175 c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 176 (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 177 (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 178 (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 179 c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 180 (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 181 (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 182 (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 183 (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 184 (c >= '\uAC00' && c <= '\uD7A3'); 185 } 186 187 private static boolean isIdeographic(char c) { 188 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029'); 189 } 190 191 public static String determineEncoding(InputStream stream) throws IOException { 192 stream.mark(20000); 193 try { 194 int b0 = stream.read(); 195 int b1 = stream.read(); 196 int b2 = stream.read(); 197 int b3 = stream.read(); 198 199 if (b0 == 0xFE && b1 == 0xFF) 200 return "UTF-16BE"; 201 else if (b0 == 0xFF && b1 == 0xFE) 202 return "UTF-16LE"; 203 else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF ) 204 return "UTF-8"; 205 else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) 206 return "UTF-16BE"; 207 else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) 208 return "UTF-16LE"; 209 else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) { 210// UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 211// which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 212// declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 213// for the relevant ASCII characters, the encoding declaration itself may be read reliably 214 InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII"); 215 String hdr = readFirstLine(rdr); 216 return extractEncoding(hdr); 217 } else 218 return null; 219 } finally { 220 stream.reset(); 221 } 222 } 223 224 private static String extractEncoding(String hdr) { 225 int i = hdr.indexOf("encoding="); 226 if (i == -1) 227 return null; 228 hdr = hdr.substring(i+9); 229 char sep = hdr.charAt(0); 230 hdr = hdr.substring(1); 231 i = hdr.indexOf(sep); 232 if (i == -1) 233 return null; 234 return hdr.substring(0, i); 235 } 236 237 private static String readFirstLine(InputStreamReader rdr) throws IOException { 238 char[] buf = new char[1]; 239 StringBuffer bldr = new StringBuffer(); 240 rdr.read(buf); 241 while (buf[0] != '>') { 242 bldr.append(buf[0]); 243 rdr.read(buf); 244 } 245 return bldr.toString(); 246 } 247 248 249 public static boolean charSetImpliesAscii(String charset) { 250 return charset.equals("ISO-8859-1") || charset.equals("US-ASCII"); 251 } 252 253 254 /** 255 * Converts the raw characters to XML escape characters. 256 * 257 * @param rawContent 258 * @param charset Null when charset is not known, so we assume it's unicode 259 * @param isNoLines 260 * @return escape string 261 */ 262 public static String escapeXML(String rawContent, String charset, boolean isNoLines) { 263 if (rawContent == null) 264 return ""; 265 else { 266 StringBuffer sb = new StringBuffer(); 267 268 for (int i = 0; i < rawContent.length(); i++) { 269 char ch = rawContent.charAt(i); 270 if (ch == '\'') 271 sb.append("'"); 272 else if (ch == '&') 273 sb.append("&"); 274 else if (ch == '"') 275 sb.append("""); 276 else if (ch == '<') 277 sb.append("<"); 278 else if (ch == '>') 279 sb.append(">"); 280 else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 281 // TODO - why is hashcode the only way to get the unicode number for the character 282 // in jre 5.0? 283 sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";"); 284 else if (isNoLines) { 285 if (ch == '\r') 286 sb.append("
"); 287 else if (ch != '\n') 288 sb.append(ch); 289 } 290 else 291 sb.append(ch); 292 } 293 return sb.toString(); 294 } 295 } 296 297 public static Element getFirstChild(Element e) { 298 if (e == null) 299 return null; 300 Node n = e.getFirstChild(); 301 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 302 n = n.getNextSibling(); 303 return (Element) n; 304 } 305 306 public static Element getNamedChild(Element e, String name) { 307 Element c = getFirstChild(e); 308 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 309 c = getNextSibling(c); 310 return c; 311 } 312 313 public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) { 314 Element c = getFirstChild(e); 315 while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname)))) 316 c = getNextSibling(c); 317 return c; 318 } 319 320 public static Element getNextSibling(Element e) { 321 Node n = e.getNextSibling(); 322 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 323 n = n.getNextSibling(); 324 return (Element) n; 325 } 326 327 public static void getNamedChildren(Element e, String name, List<Element> set) { 328 Element c = getFirstChild(e); 329 while (c != null) { 330 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 331 set.add(c); 332 c = getNextSibling(c); 333 } 334 } 335 336 public static List<Element> getNamedChildren(Element e, String name) { 337 List<Element> res = new ArrayList<Element>(); 338 Element c = getFirstChild(e); 339 while (c != null) { 340 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 341 res.add(c); 342 c = getNextSibling(c); 343 } 344 return res; 345 } 346 347 public static String htmlToXmlEscapedPlainText(Element r) { 348 StringBuilder s = new StringBuilder(); 349 Node n = r.getFirstChild(); 350 boolean ws = false; 351 while (n != null) { 352 if (n.getNodeType() == Node.TEXT_NODE) { 353 String t = n.getTextContent().trim(); 354 if (Utilities.noString(t)) 355 ws = true; 356 else { 357 if (ws) 358 s.append(" "); 359 ws = false; 360 s.append(t); 361 } 362 } 363 if (n.getNodeType() == Node.ELEMENT_NODE) { 364 if (ws) 365 s.append(" "); 366 ws = false; 367 s.append(htmlToXmlEscapedPlainText((Element) n)); 368 if (r.getNodeName().equals("br") || r.getNodeName().equals("p")) 369 s.append("\r\n"); 370 } 371 n = n.getNextSibling(); 372 } 373 return s.toString(); 374 } 375 376 public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException { 377 return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement()); 378 } 379 380 public static String elementToString(Element el) { 381 if (el == null) 382 return ""; 383 Document document = el.getOwnerDocument(); 384 DOMImplementationLS domImplLS = (DOMImplementationLS) document 385 .getImplementation(); 386 LSSerializer serializer = domImplLS.createLSSerializer(); 387 return serializer.writeToString(el); 388 } 389 390 public static String getNamedChildValue(Element element, String name) { 391 Element e = getNamedChild(element, name); 392 return e == null ? null : e.getAttribute("value"); 393 } 394 395 public static void setNamedChildValue(Element element, String name, String value) throws FHIRException { 396 Element e = getNamedChild(element, name); 397 if (e == null) 398 throw new FHIRException("unable to find element "+name); 399 e.setAttribute("value", value); 400 } 401 402 403 public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) { 404 Element c = getFirstChild(focus); 405 while (c != null) { 406 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 407 if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3)))) 408 children.add(c); 409 c = getNextSibling(c); 410 } 411 } 412 413 public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) { 414 Element c = getFirstChild(focus); 415 while (c != null) { 416 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 417 if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length()))))) 418 children.add(c); 419 c = getNextSibling(c); 420 } 421 } 422 423 public static boolean hasNamedChild(Element e, String name) { 424 Element c = getFirstChild(e); 425 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 426 c = getNextSibling(c); 427 return c != null; 428 } 429 430 public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException { 431 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 432 factory.setNamespaceAware(false); 433 DocumentBuilder builder = factory.newDocumentBuilder(); 434 return builder.parse(new ByteArrayInputStream(content.getBytes())); 435 } 436 437 public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException { 438 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 439 factory.setNamespaceAware(false); 440 DocumentBuilder builder = factory.newDocumentBuilder(); 441 return builder.parse(new FileInputStream(filename)); 442 } 443 444 public static Element getLastChild(Element e) { 445 if (e == null) 446 return null; 447 Node n = e.getLastChild(); 448 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 449 n = n.getPreviousSibling(); 450 return (Element) n; 451 } 452 453 public static Element getPrevSibling(Element e) { 454 Node n = e.getPreviousSibling(); 455 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 456 n = n.getPreviousSibling(); 457 return (Element) n; 458 } 459 460 public static String getNamedChildAttribute(Element element, String name, String aname) { 461 Element e = getNamedChild(element, name); 462 return e == null ? null : e.getAttribute(aname); 463 } 464 465 public static void writeDomToFile(Document doc, String filename) throws TransformerException { 466 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 467 Transformer transformer = transformerFactory.newTransformer(); 468 DOMSource source = new DOMSource(doc); 469 StreamResult streamResult = new StreamResult(new File(filename)); 470 transformer.transform(source, streamResult); 471 } 472 473 public static String getXsiType(org.w3c.dom.Element element) { 474 Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type"); 475 return (a == null ? null : a.getTextContent()); 476 477 } 478 479 public static String getDirectText(org.w3c.dom.Element node) { 480 Node n = node.getFirstChild(); 481 StringBuilder b = new StringBuilder(); 482 while (n != null) { 483 if (n.getNodeType() == Node.TEXT_NODE) 484 b.append(n.getTextContent()); 485 n = n.getNextSibling(); 486 } 487 return b.toString().trim(); 488 } 489 490 public static void deleteByName(Element e, String name) { 491 List<Element> matches = getNamedChildren(e, name); 492 for (Element m : matches) 493 e.removeChild(m); 494 } 495 496 public static void deleteAttr(Element e, String namespaceURI, String localName) { 497 if (e.hasAttributeNS(namespaceURI, localName)) 498 e.removeAttributeNS(namespaceURI, localName); 499 500 } 501 502 503}