001package org.hl7.fhir.utilities.xml; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.ByteArrayInputStream; 035import java.io.File; 036import java.io.FileInputStream; 037import java.io.IOException; 038import java.io.InputStream; 039import java.io.InputStreamReader; 040import java.io.OutputStream; 041import java.util.ArrayList; 042import java.util.List; 043import java.util.Set; 044 045import javax.xml.parsers.DocumentBuilder; 046import javax.xml.parsers.DocumentBuilderFactory; 047import javax.xml.parsers.ParserConfigurationException; 048import javax.xml.transform.Result; 049import javax.xml.transform.Source; 050import javax.xml.transform.Transformer; 051import javax.xml.transform.TransformerException; 052import javax.xml.transform.TransformerFactory; 053import javax.xml.transform.dom.DOMSource; 054import javax.xml.transform.stream.StreamResult; 055 056import org.hl7.fhir.exceptions.FHIRException; 057import org.hl7.fhir.utilities.Utilities; 058import org.w3c.dom.Attr; 059import org.w3c.dom.Document; 060import org.w3c.dom.Element; 061import org.w3c.dom.Node; 062import org.w3c.dom.ls.DOMImplementationLS; 063import org.w3c.dom.ls.LSSerializer; 064import org.xml.sax.SAXException; 065 066public class XMLUtil { 067 068 public static final String SPACE_CHAR = "\u00A0"; 069 070 public static boolean isNMToken(String name) { 071 if (name == null) 072 return false; 073 for (int i = 0; i < name.length(); i++) 074 if (!isNMTokenChar(name.charAt(i))) 075 return false; 076 return name.length() > 0; 077 } 078 079 public static boolean isNMTokenChar(char c) { 080 return isLetter(c) || isDigit(c) || c == '.' || c == '-' || c == '_' || c == ':' || isCombiningChar(c) || isExtender(c); 081 } 082 083 private static boolean isDigit(char c) { 084 return (c >= '\u0030' && c <= '\u0039') || (c >= '\u0660' && c <= '\u0669') || (c >= '\u06F0' && c <= '\u06F9') || 085 (c >= '\u0966' && c <= '\u096F') || (c >= '\u09E6' && c <= '\u09EF') || (c >= '\u0A66' && c <= '\u0A6F') || 086 (c >= '\u0AE6' && c <= '\u0AEF') || (c >= '\u0B66' && c <= '\u0B6F') || (c >= '\u0BE7' && c <= '\u0BEF') || 087 (c >= '\u0C66' && c <= '\u0C6F') || (c >= '\u0CE6' && c <= '\u0CEF') || (c >= '\u0D66' && c <= '\u0D6F') || 088 (c >= '\u0E50' && c <= '\u0E59') || (c >= '\u0ED0' && c <= '\u0ED9') || (c >= '\u0F20' && c <= '\u0F29'); 089 } 090 091 private static boolean isCombiningChar(char c) { 092 return (c >= '\u0300' && c <= '\u0345') || (c >= '\u0360' && c <= '\u0361') || (c >= '\u0483' && c <= '\u0486') || 093 (c >= '\u0591' && c <= '\u05A1') || (c >= '\u05A3' && c <= '\u05B9') || (c >= '\u05BB' && c <= '\u05BD') || 094 c == '\u05BF' || (c >= '\u05C1' && c <= '\u05C2') || c == '\u05C4' || (c >= '\u064B' && c <= '\u0652') || 095 c == '\u0670' || (c >= '\u06D6' && c <= '\u06DC') || (c >= '\u06DD' && c <= '\u06DF') || (c >= '\u06E0' && c <= '\u06E4') || 096 (c >= '\u06E7' && c <= '\u06E8') || (c >= '\u06EA' && c <= '\u06ED') || (c >= '\u0901' && c <= '\u0903') || c == '\u093C' || 097 (c >= '\u093E' && c <= '\u094C') || c == '\u094D' || (c >= '\u0951' && c <= '\u0954') || (c >= '\u0962' && c <= '\u0963') || 098 (c >= '\u0981' && c <= '\u0983') || c == '\u09BC' || c == '\u09BE' || c == '\u09BF' || (c >= '\u09C0' && c <= '\u09C4') || 099 (c >= '\u09C7' && c <= '\u09C8') || (c >= '\u09CB' && c <= '\u09CD') || c == '\u09D7' || (c >= '\u09E2' && c <= '\u09E3') || 100 c == '\u0A02' || c == '\u0A3C' || c == '\u0A3E' || c == '\u0A3F' || (c >= '\u0A40' && c <= '\u0A42') || 101 (c >= '\u0A47' && c <= '\u0A48') || (c >= '\u0A4B' && c <= '\u0A4D') || (c >= '\u0A70' && c <= '\u0A71') || 102 (c >= '\u0A81' && c <= '\u0A83') || c == '\u0ABC' || (c >= '\u0ABE' && c <= '\u0AC5') || (c >= '\u0AC7' && c <= '\u0AC9') || 103 (c >= '\u0ACB' && c <= '\u0ACD') || (c >= '\u0B01' && c <= '\u0B03') || c == '\u0B3C' || (c >= '\u0B3E' && c <= '\u0B43') || 104 (c >= '\u0B47' && c <= '\u0B48') || (c >= '\u0B4B' && c <= '\u0B4D') || (c >= '\u0B56' && c <= '\u0B57') || 105 (c >= '\u0B82' && c <= '\u0B83') || (c >= '\u0BBE' && c <= '\u0BC2') || (c >= '\u0BC6' && c <= '\u0BC8') || 106 (c >= '\u0BCA' && c <= '\u0BCD') || c == '\u0BD7' || (c >= '\u0C01' && c <= '\u0C03') || (c >= '\u0C3E' && c <= '\u0C44') || 107 (c >= '\u0C46' && c <= '\u0C48') || (c >= '\u0C4A' && c <= '\u0C4D') || (c >= '\u0C55' && c <= '\u0C56') || 108 (c >= '\u0C82' && c <= '\u0C83') || (c >= '\u0CBE' && c <= '\u0CC4') || (c >= '\u0CC6' && c <= '\u0CC8') || 109 (c >= '\u0CCA' && c <= '\u0CCD') || (c >= '\u0CD5' && c <= '\u0CD6') || (c >= '\u0D02' && c <= '\u0D03') || 110 (c >= '\u0D3E' && c <= '\u0D43') || (c >= '\u0D46' && c <= '\u0D48') || (c >= '\u0D4A' && c <= '\u0D4D') || c == '\u0D57' || 111 c == '\u0E31' || (c >= '\u0E34' && c <= '\u0E3A') || (c >= '\u0E47' && c <= '\u0E4E') || c == '\u0EB1' || 112 (c >= '\u0EB4' && c <= '\u0EB9') || (c >= '\u0EBB' && c <= '\u0EBC') || (c >= '\u0EC8' && c <= '\u0ECD') || 113 (c >= '\u0F18' && c <= '\u0F19') || c == '\u0F35' || c == '\u0F37' || c == '\u0F39' || c == '\u0F3E' || c == '\u0F3F' || 114 (c >= '\u0F71' && c <= '\u0F84') || (c >= '\u0F86' && c <= '\u0F8B') || (c >= '\u0F90' && c <= '\u0F95') || c == '\u0F97' || 115 (c >= '\u0F99' && c <= '\u0FAD') || (c >= '\u0FB1' && c <= '\u0FB7') || c == '\u0FB9' || (c >= '\u20D0' && c <= '\u20DC') || 116 c == '\u20E1' || (c >= '\u302A' && c <= '\u302F') || c == '\u3099' || c == '\u309A'; 117 } 118 119 private static boolean isExtender(char c) { 120 return c == '\u00B7' || c == '\u02D0' || c == '\u02D1' || c == '\u0387' || c == '\u0640' || c == '\u0E46' || 121 c == '\u0EC6' || c == '\u3005' || (c >= '\u3031' && c <= '\u3035') || (c >= '\u309D' && c <= '\u309E') || 122 (c >= '\u30FC' && c <= '\u30FE'); 123 } 124 125 private static boolean isLetter(char c) { 126 return isBaseChar(c) || isIdeographic(c); 127 } 128 129 private static boolean isBaseChar(char c) { 130 return (c >= '\u0041' && c <= '\u005A') || (c >= '\u0061' && c <= '\u007A') || (c >= '\u00C0' && c <= '\u00D6') || 131 (c >= '\u00D8' && c <= '\u00F6') || (c >= '\u00F8' && c <= '\u00FF') || (c >= '\u0100' && c <= '\u0131') || 132 (c >= '\u0134' && c <= '\u013E') || (c >= '\u0141' && c <= '\u0148') || (c >= '\u014A' && c <= '\u017E') || 133 (c >= '\u0180' && c <= '\u01C3') || (c >= '\u01CD' && c <= '\u01F0') || (c >= '\u01F4' && c <= '\u01F5') || 134 (c >= '\u01FA' && c <= '\u0217') || (c >= '\u0250' && c <= '\u02A8') || (c >= '\u02BB' && c <= '\u02C1') || 135 c == '\u0386' || (c >= '\u0388' && c <= '\u038A') || c == '\u038C' || (c >= '\u038E' && c <= '\u03A1') || 136 (c >= '\u03A3' && c <= '\u03CE') || (c >= '\u03D0' && c <= '\u03D6') || c == '\u03DA' || c == '\u03DC' || c == '\u03DE' || 137 c == '\u03E0' || (c >= '\u03E2' && c <= '\u03F3') || (c >= '\u0401' && c <= '\u040C') || (c >= '\u040E' && c <= '\u044F') || 138 (c >= '\u0451' && c <= '\u045C') || (c >= '\u045E' && c <= '\u0481') || (c >= '\u0490' && c <= '\u04C4') || 139 (c >= '\u04C7' && c <= '\u04C8') || (c >= '\u04CB' && c <= '\u04CC') || (c >= '\u04D0' && c <= '\u04EB') || 140 (c >= '\u04EE' && c <= '\u04F5') || (c >= '\u04F8' && c <= '\u04F9') || (c >= '\u0531' && c <= '\u0556') || 141 c == '\u0559' || (c >= '\u0561' && c <= '\u0586') || (c >= '\u05D0' && c <= '\u05EA') || (c >= '\u05F0' && c <= '\u05F2') || 142 (c >= '\u0621' && c <= '\u063A') || (c >= '\u0641' && c <= '\u064A') || (c >= '\u0671' && c <= '\u06B7') || 143 (c >= '\u06BA' && c <= '\u06BE') || (c >= '\u06C0' && c <= '\u06CE') || (c >= '\u06D0' && c <= '\u06D3') || 144 c == '\u06D5' || (c >= '\u06E5' && c <= '\u06E6') || (c >= '\u0905' && c <= '\u0939') || c == '\u093D' || 145 (c >= '\u0958' && c <= '\u0961') || (c >= '\u0985' && c <= '\u098C') || (c >= '\u098F' && c <= '\u0990') || 146 (c >= '\u0993' && c <= '\u09A8') || (c >= '\u09AA' && c <= '\u09B0') || c == '\u09B2' || 147 (c >= '\u09B6' && c <= '\u09B9') || (c >= '\u09DC' && c <= '\u09DD') || (c >= '\u09DF' && c <= '\u09E1') || 148 (c >= '\u09F0' && c <= '\u09F1') || (c >= '\u0A05' && c <= '\u0A0A') || (c >= '\u0A0F' && c <= '\u0A10') || 149 (c >= '\u0A13' && c <= '\u0A28') || (c >= '\u0A2A' && c <= '\u0A30') || (c >= '\u0A32' && c <= '\u0A33') || 150 (c >= '\u0A35' && c <= '\u0A36') || (c >= '\u0A38' && c <= '\u0A39') || (c >= '\u0A59' && c <= '\u0A5C') || 151 c == '\u0A5E' || (c >= '\u0A72' && c <= '\u0A74') || (c >= '\u0A85' && c <= '\u0A8B') || c == '\u0A8D' || 152 (c >= '\u0A8F' && c <= '\u0A91') || (c >= '\u0A93' && c <= '\u0AA8') || (c >= '\u0AAA' && c <= '\u0AB0') || 153 (c >= '\u0AB2' && c <= '\u0AB3') || (c >= '\u0AB5' && c <= '\u0AB9') || c == '\u0ABD' || c == '\u0AE0' || 154 (c >= '\u0B05' && c <= '\u0B0C') || (c >= '\u0B0F' && c <= '\u0B10') || (c >= '\u0B13' && c <= '\u0B28') || 155 (c >= '\u0B2A' && c <= '\u0B30') || (c >= '\u0B32' && c <= '\u0B33') || (c >= '\u0B36' && c <= '\u0B39') || 156 c == '\u0B3D' || (c >= '\u0B5C' && c <= '\u0B5D') || (c >= '\u0B5F' && c <= '\u0B61') || 157 (c >= '\u0B85' && c <= '\u0B8A') || (c >= '\u0B8E' && c <= '\u0B90') || (c >= '\u0B92' && c <= '\u0B95') || 158 (c >= '\u0B99' && c <= '\u0B9A') || c == '\u0B9C' || (c >= '\u0B9E' && c <= '\u0B9F') || 159 (c >= '\u0BA3' && c <= '\u0BA4') || (c >= '\u0BA8' && c <= '\u0BAA') || (c >= '\u0BAE' && c <= '\u0BB5') || 160 (c >= '\u0BB7' && c <= '\u0BB9') || (c >= '\u0C05' && c <= '\u0C0C') || (c >= '\u0C0E' && c <= '\u0C10') || 161 (c >= '\u0C12' && c <= '\u0C28') || (c >= '\u0C2A' && c <= '\u0C33') || (c >= '\u0C35' && c <= '\u0C39') || 162 (c >= '\u0C60' && c <= '\u0C61') || (c >= '\u0C85' && c <= '\u0C8C') || (c >= '\u0C8E' && c <= '\u0C90') || 163 (c >= '\u0C92' && c <= '\u0CA8') || (c >= '\u0CAA' && c <= '\u0CB3') || (c >= '\u0CB5' && c <= '\u0CB9') || 164 c == '\u0CDE' || (c >= '\u0CE0' && c <= '\u0CE1') || (c >= '\u0D05' && c <= '\u0D0C') || 165 (c >= '\u0D0E' && c <= '\u0D10') || (c >= '\u0D12' && c <= '\u0D28') || (c >= '\u0D2A' && c <= '\u0D39') || 166 (c >= '\u0D60' && c <= '\u0D61') || (c >= '\u0E01' && c <= '\u0E2E') || c == '\u0E30' || 167 (c >= '\u0E32' && c <= '\u0E33') || (c >= '\u0E40' && c <= '\u0E45') || (c >= '\u0E81' && c <= '\u0E82') || 168 c == '\u0E84' || (c >= '\u0E87' && c <= '\u0E88') || c == '\u0E8A' || c == '\u0E8D' || (c >= '\u0E94' && c <= '\u0E97') || 169 (c >= '\u0E99' && c <= '\u0E9F') || (c >= '\u0EA1' && c <= '\u0EA3') || c == '\u0EA5' || c == '\u0EA7' || 170 (c >= '\u0EAA' && c <= '\u0EAB') || (c >= '\u0EAD' && c <= '\u0EAE') || c == '\u0EB0' || 171 (c >= '\u0EB2' && c <= '\u0EB3') || c == '\u0EBD' || (c >= '\u0EC0' && c <= '\u0EC4') || 172 (c >= '\u0F40' && c <= '\u0F47') || (c >= '\u0F49' && c <= '\u0F69') || (c >= '\u10A0' && c <= '\u10C5') || 173 (c >= '\u10D0' && c <= '\u10F6') || c == '\u1100' || (c >= '\u1102' && c <= '\u1103') || 174 (c >= '\u1105' && c <= '\u1107') || c == '\u1109' || (c >= '\u110B' && c <= '\u110C') || 175 (c >= '\u110E' && c <= '\u1112') || c == '\u113C' || c == '\u113E' || c == '\u1140' || c == '\u114C' || 176 c == '\u114E' || c == '\u1150' || (c >= '\u1154' && c <= '\u1155') || c == '\u1159' || 177 (c >= '\u115F' && c <= '\u1161') || c == '\u1163' || c == '\u1165' || c == '\u1167' || c == '\u1169' || 178 (c >= '\u116D' && c <= '\u116E') || (c >= '\u1172' && c <= '\u1173') || c == '\u1175' || 179 c == '\u119E' || c == '\u11A8' || c == '\u11AB' || (c >= '\u11AE' && c <= '\u11AF') || 180 (c >= '\u11B7' && c <= '\u11B8') || c == '\u11BA' || (c >= '\u11BC' && c <= '\u11C2') || 181 c == '\u11EB' || c == '\u11F0' || c == '\u11F9' || (c >= '\u1E00' && c <= '\u1E9B') || (c >= '\u1EA0' && c <= '\u1EF9') || 182 (c >= '\u1F00' && c <= '\u1F15') || (c >= '\u1F18' && c <= '\u1F1D') || (c >= '\u1F20' && c <= '\u1F45') || 183 (c >= '\u1F48' && c <= '\u1F4D') || (c >= '\u1F50' && c <= '\u1F57') || c == '\u1F59' || c == '\u1F5B' || c == '\u1F5D' || 184 (c >= '\u1F5F' && c <= '\u1F7D') || (c >= '\u1F80' && c <= '\u1FB4') || (c >= '\u1FB6' && c <= '\u1FBC') || 185 c == '\u1FBE' || (c >= '\u1FC2' && c <= '\u1FC4') || (c >= '\u1FC6' && c <= '\u1FCC') || 186 (c >= '\u1FD0' && c <= '\u1FD3') || (c >= '\u1FD6' && c <= '\u1FDB') || (c >= '\u1FE0' && c <= '\u1FEC') || 187 (c >= '\u1FF2' && c <= '\u1FF4') || (c >= '\u1FF6' && c <= '\u1FFC') || c == '\u2126' || 188 (c >= '\u212A' && c <= '\u212B') || c == '\u212E' || (c >= '\u2180' && c <= '\u2182') || 189 (c >= '\u3041' && c <= '\u3094') || (c >= '\u30A1' && c <= '\u30FA') || (c >= '\u3105' && c <= '\u312C') || 190 (c >= '\uAC00' && c <= '\uD7A3'); 191 } 192 193 private static boolean isIdeographic(char c) { 194 return (c >= '\u4E00' && c <= '\u9FA5') || c == '\u3007' || (c >= '\u3021' && c <= '\u3029'); 195 } 196 197 public static String determineEncoding(InputStream stream) throws IOException { 198 stream.mark(20000); 199 try { 200 int b0 = stream.read(); 201 int b1 = stream.read(); 202 int b2 = stream.read(); 203 int b3 = stream.read(); 204 205 if (b0 == 0xFE && b1 == 0xFF) 206 return "UTF-16BE"; 207 else if (b0 == 0xFF && b1 == 0xFE) 208 return "UTF-16LE"; 209 else if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF ) 210 return "UTF-8"; 211 else if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) 212 return "UTF-16BE"; 213 else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) 214 return "UTF-16LE"; 215 else if (b0 == 0x3C && b1 == 0x3F && b2 == 0x78 && b3 == 0x6D) { 216// UTF-8, ISO 646, ASCII, some part of ISO 8859, Shift-JIS, EUC, or any other 7-bit, 8-bit, or mixed-width encoding 217// which ensures that the characters of ASCII have their normal positions, width, and values; the actual encoding 218// declaration must be read to detect which of these applies, but since all of these encodings use the same bit patterns 219// for the relevant ASCII characters, the encoding declaration itself may be read reliably 220 InputStreamReader rdr = new InputStreamReader(stream, "US-ASCII"); 221 String hdr = readFirstLine(rdr); 222 return extractEncoding(hdr); 223 } else 224 return null; 225 } finally { 226 stream.reset(); 227 } 228 } 229 230 private static String extractEncoding(String hdr) { 231 int i = hdr.indexOf("encoding="); 232 if (i == -1) 233 return null; 234 hdr = hdr.substring(i+9); 235 char sep = hdr.charAt(0); 236 hdr = hdr.substring(1); 237 i = hdr.indexOf(sep); 238 if (i == -1) 239 return null; 240 return hdr.substring(0, i); 241 } 242 243 private static String readFirstLine(InputStreamReader rdr) throws IOException { 244 char[] buf = new char[1]; 245 StringBuffer bldr = new StringBuffer(); 246 rdr.read(buf); 247 while (buf[0] != '>') { 248 bldr.append(buf[0]); 249 rdr.read(buf); 250 } 251 return bldr.toString(); 252 } 253 254 255 public static boolean charSetImpliesAscii(String charset) { 256 return charset.equals("ISO-8859-1") || charset.equals("US-ASCII"); 257 } 258 259 260 /** 261 * Converts the raw characters to XML escape characters. 262 * 263 * @param rawContent 264 * @param charset Null when charset is not known, so we assume it's unicode 265 * @param isNoLines 266 * @return escape string 267 */ 268 public static String escapeXML(String rawContent, String charset, boolean isNoLines) { 269 if (rawContent == null) 270 return ""; 271 else { 272 StringBuffer sb = new StringBuffer(); 273 274 for (int i = 0; i < rawContent.length(); i++) { 275 char ch = rawContent.charAt(i); 276 if (ch == '\'') 277 sb.append("'"); 278 else if (ch == '&') 279 sb.append("&"); 280 else if (ch == '"') 281 sb.append("""); 282 else if (ch == '<') 283 sb.append("<"); 284 else if (ch == '>') 285 sb.append(">"); 286 else if (ch > '~' && charset != null && charSetImpliesAscii(charset)) 287 // TODO - why is hashcode the only way to get the unicode number for the character 288 // in jre 5.0? 289 sb.append("&#x"+Integer.toHexString(ch).toUpperCase()+";"); 290 else if (isNoLines) { 291 if (ch == '\r') 292 sb.append("
"); 293 else if (ch != '\n') 294 sb.append(ch); 295 } 296 else 297 sb.append(ch); 298 } 299 return sb.toString(); 300 } 301 } 302 303 public static Element getFirstChild(Element e) { 304 if (e == null) 305 return null; 306 Node n = e.getFirstChild(); 307 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 308 n = n.getNextSibling(); 309 return (Element) n; 310 } 311 312 public static Element getNamedChild(Element e, String name) { 313 Element c = getFirstChild(e); 314 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 315 c = getNextSibling(c); 316 return c; 317 } 318 319 public static Element getNamedChildByAttribute(Element e, String name, String nname, String nvalue) { 320 Element c = getFirstChild(e); 321 while (c != null && !((name.equals(c.getLocalName()) || name.equals(c.getNodeName())) && nvalue.equals(c.getAttribute(nname)))) 322 c = getNextSibling(c); 323 return c; 324 } 325 326 public static Element getNextSibling(Element e) { 327 Node n = e.getNextSibling(); 328 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 329 n = n.getNextSibling(); 330 return (Element) n; 331 } 332 333 public static void getNamedChildren(Element e, String name, List<Element> set) { 334 Element c = getFirstChild(e); 335 while (c != null) { 336 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 337 set.add(c); 338 c = getNextSibling(c); 339 } 340 } 341 342 public static List<Element> getNamedChildren(Element e, String name) { 343 List<Element> res = new ArrayList<Element>(); 344 Element c = getFirstChild(e); 345 while (c != null) { 346 if (name.equals(c.getLocalName()) || name.equals(c.getNodeName()) ) 347 res.add(c); 348 c = getNextSibling(c); 349 } 350 return res; 351 } 352 353 public static String htmlToXmlEscapedPlainText(Element r) { 354 StringBuilder s = new StringBuilder(); 355 Node n = r.getFirstChild(); 356 boolean ws = false; 357 while (n != null) { 358 if (n.getNodeType() == Node.TEXT_NODE) { 359 String t = n.getTextContent().trim(); 360 if (Utilities.noString(t)) 361 ws = true; 362 else { 363 if (ws) 364 s.append(" "); 365 ws = false; 366 s.append(t); 367 } 368 } 369 if (n.getNodeType() == Node.ELEMENT_NODE) { 370 if (ws) 371 s.append(" "); 372 ws = false; 373 s.append(htmlToXmlEscapedPlainText((Element) n)); 374 if (r.getNodeName().equals("br") || r.getNodeName().equals("p")) 375 s.append("\r\n"); 376 } 377 n = n.getNextSibling(); 378 } 379 return s.toString(); 380 } 381 382 public static String htmlToXmlEscapedPlainText(String definition) throws ParserConfigurationException, SAXException, IOException { 383 return htmlToXmlEscapedPlainText(parseToDom("<div>"+definition+"</div>").getDocumentElement()); 384 } 385 386 public static String elementToString(Element el) { 387 if (el == null) 388 return ""; 389 Document document = el.getOwnerDocument(); 390 DOMImplementationLS domImplLS = (DOMImplementationLS) document 391 .getImplementation(); 392 LSSerializer serializer = domImplLS.createLSSerializer(); 393 return serializer.writeToString(el); 394 } 395 396 public static String getNamedChildValue(Element element, String name) { 397 Element e = getNamedChild(element, name); 398 return e == null ? null : e.getAttribute("value"); 399 } 400 401 public static void setNamedChildValue(Element element, String name, String value) throws FHIRException { 402 Element e = getNamedChild(element, name); 403 if (e == null) 404 throw new FHIRException("unable to find element "+name); 405 e.setAttribute("value", value); 406 } 407 408 409 public static void getNamedChildrenWithWildcard(Element focus, String name, List<Element> children) { 410 Element c = getFirstChild(focus); 411 while (c != null) { 412 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 413 if (name.equals(n) || (name.endsWith("[x]") && n.startsWith(name.substring(0, name.length()-3)))) 414 children.add(c); 415 c = getNextSibling(c); 416 } 417 } 418 419 public static void getNamedChildrenWithTails(Element focus, String name, List<Element> children, Set<String> typeTails) { 420 Element c = getFirstChild(focus); 421 while (c != null) { 422 String n = c.getLocalName() != null ? c.getLocalName() : c.getNodeName(); 423 if (n.equals(name) || (!n.equals("responseCode") && (n.startsWith(name) && typeTails.contains(n.substring(name.length()))))) 424 children.add(c); 425 c = getNextSibling(c); 426 } 427 } 428 429 public static boolean hasNamedChild(Element e, String name) { 430 Element c = getFirstChild(e); 431 while (c != null && !name.equals(c.getLocalName()) && !name.equals(c.getNodeName())) 432 c = getNextSibling(c); 433 return c != null; 434 } 435 436 public static Document parseToDom(String content) throws ParserConfigurationException, SAXException, IOException { 437 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 438 factory.setNamespaceAware(false); 439 DocumentBuilder builder = factory.newDocumentBuilder(); 440 return builder.parse(new ByteArrayInputStream(content.getBytes())); 441 } 442 443 public static Document parseToDom(byte[] content) throws ParserConfigurationException, SAXException, IOException { 444 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 445 factory.setNamespaceAware(false); 446 DocumentBuilder builder = factory.newDocumentBuilder(); 447 return builder.parse(new ByteArrayInputStream(content)); 448 } 449 450 public static Document parseToDom(byte[] content, boolean ns) throws ParserConfigurationException, SAXException, IOException { 451 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 452 factory.setNamespaceAware(ns); 453 DocumentBuilder builder = factory.newDocumentBuilder(); 454 return builder.parse(new ByteArrayInputStream(content)); 455 } 456 457 public static Document parseFileToDom(String filename) throws ParserConfigurationException, SAXException, IOException { 458 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 459 factory.setNamespaceAware(false); 460 DocumentBuilder builder = factory.newDocumentBuilder(); 461 return builder.parse(new FileInputStream(filename)); 462 } 463 464 public static Element getLastChild(Element e) { 465 if (e == null) 466 return null; 467 Node n = e.getLastChild(); 468 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 469 n = n.getPreviousSibling(); 470 return (Element) n; 471 } 472 473 public static Element getPrevSibling(Element e) { 474 Node n = e.getPreviousSibling(); 475 while (n != null && n.getNodeType() != Node.ELEMENT_NODE) 476 n = n.getPreviousSibling(); 477 return (Element) n; 478 } 479 480 public static String getNamedChildAttribute(Element element, String name, String aname) { 481 Element e = getNamedChild(element, name); 482 return e == null ? null : e.getAttribute(aname); 483 } 484 485 public static void writeDomToFile(Document doc, String filename) throws TransformerException { 486 TransformerFactory transformerFactory = TransformerFactory.newInstance(); 487 Transformer transformer = transformerFactory.newTransformer(); 488 DOMSource source = new DOMSource(doc); 489 StreamResult streamResult = new StreamResult(new File(filename)); 490 transformer.transform(source, streamResult); 491 } 492 493 public static String getXsiType(org.w3c.dom.Element element) { 494 Attr a = element.getAttributeNodeNS("http://www.w3.org/2001/XMLSchema-instance", "type"); 495 return (a == null ? null : a.getTextContent()); 496 497 } 498 499 public static String getDirectText(org.w3c.dom.Element node) { 500 Node n = node.getFirstChild(); 501 StringBuilder b = new StringBuilder(); 502 while (n != null) { 503 if (n.getNodeType() == Node.TEXT_NODE) 504 b.append(n.getTextContent()); 505 n = n.getNextSibling(); 506 } 507 return b.toString().trim(); 508 } 509 510 public static void deleteByName(Element e, String name) { 511 List<Element> matches = getNamedChildren(e, name); 512 for (Element m : matches) 513 e.removeChild(m); 514 } 515 516 public static void deleteAttr(Element e, String namespaceURI, String localName) { 517 if (e.hasAttributeNS(namespaceURI, localName)) 518 e.removeAttributeNS(namespaceURI, localName); 519 520 } 521 522 public static Node[] children(Element ed) { 523 Node[] res = new Node[ed.getChildNodes().getLength()]; 524 for (int i = 0; i < ed.getChildNodes().getLength(); i++) 525 res[i] = ed.getChildNodes().item(i); 526 return res; 527 } 528 529 public static Element insertChild(Document doc, Element element, String name, String namespace, int indent) { 530 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 531 Element child = doc.createElementNS(namespace, name); 532 element.insertBefore(child, element.getFirstChild()); 533 element.insertBefore(node, element.getFirstChild()); 534 return child; 535 } 536 537 public static Element insertChild(Document doc, Element element, String name, String namespace, Node before, int indent) { 538 if (before == null) { 539 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 540 element.insertBefore(node, before); 541 } 542 Element child = doc.createElementNS(namespace, name); 543 element.insertBefore(child, before); 544 if (before != null) { 545 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 546 element.insertBefore(node, before); 547 } 548 return child; 549 } 550 551 public static void addTextTag(Document doc, Element element, String name, String namespace, String text, int indent) { 552 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 553 element.appendChild(node); 554 Element child = doc.createElementNS(namespace, name); 555 element.appendChild(child); 556 child.setAttribute("value", text); 557 } 558 559 public static void saveToFile(Element root, OutputStream stream) throws TransformerException { 560 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 561 Result output = new StreamResult(stream); 562 Source input = new DOMSource(root); 563 564 transformer.transform(input, output); 565 } 566 567 public static void spacer(Document doc, Element element, int indent) { 568 Node node = doc.createTextNode("\n"+Utilities.padLeft("", ' ', indent)); 569 element.appendChild(node); 570 571 } 572 573 public static String getNamedChildText(Element element, String name) { 574 Element e = getNamedChild(element, name); 575 return e == null ? null : e.getTextContent(); 576 } 577 578 579}