001/* 002Copyright (c) 2011+, HL7, Inc 003All rights reserved. 004 005Redistribution and use in source and binary forms, with or without modification, 006are permitted provided that the following conditions are met: 007 008 * Redistributions of source code must retain the above copyright notice, this 009 list of conditions and the following disclaimer. 010 * Redistributions in binary form must reproduce the above copyright notice, 011 this list of conditions and the following disclaimer in the documentation 012 and/or other materials provided with the distribution. 013 * Neither the name of HL7 nor the names of its contributors may be used to 014 endorse or promote products derived from this software without specific 015 prior written permission. 016 017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 026POSSIBILITY OF SUCH DAMAGE. 027 028*/ 029package org.hl7.fhir.utilities; 030 031import java.io.InputStream; 032import java.util.ArrayList; 033import java.util.HashMap; 034import java.util.List; 035import java.util.Map; 036 037import javax.xml.parsers.DocumentBuilder; 038import javax.xml.parsers.DocumentBuilderFactory; 039 040import org.hl7.fhir.exceptions.FHIRException; 041import org.hl7.fhir.utilities.xml.XMLUtil; 042import org.w3c.dom.DOMException; 043import org.w3c.dom.Document; 044import org.w3c.dom.Element; 045import org.w3c.dom.Node; 046import org.w3c.dom.NodeList; 047 048public class XLSXmlParser { 049 050 private static final String XLS_NS = "urn:schemas-microsoft-com:office:spreadsheet"; 051 052 public class Row extends ArrayList<String> { private static final long serialVersionUID = 1L; } 053 054 public class Sheet { 055 public String title; 056 public Row columns; 057 public List<Row> rows = new ArrayList<Row>(); 058 059 public boolean hasColumn(String column) { 060 for (int i = 0; i < columns.size(); i++) { 061 if (columns.get(i).equalsIgnoreCase(column)) 062 return true; 063 } 064 return false; 065 } 066 067 public boolean hasColumn(int row, String column) { 068 String s = getColumn(row, column); 069 return s != null && !s.equals(""); 070 } 071 072 public String getColumn(int row, String column) { 073 int c = -1; 074 String s = ""; 075 for (int i = 0; i < columns.size(); i++) { 076 s = s + ","+columns.get(i); 077 if (columns.get(i).equalsIgnoreCase(column)) 078 c = i; 079 } 080 if (c == -1) 081 return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1)); 082 else if (rows.get(row).size() <= c) 083 return ""; 084 else { 085 s = rows.get(row).get(c); 086 return s == null ? "" : s.trim(); 087 } 088 } 089 090 public List<String> getColumnNamesBySuffix(String suffix) { 091 List<String> names = new ArrayList<String>(); 092 for (int i = 0; i < columns.size(); i++) { 093 if (columns.get(i).endsWith(suffix)) 094 names.add(columns.get(i)); 095 } 096 return names; 097 } 098 099 public String getByColumnPrefix(int row, String column) { 100 int c = -1; 101 String s = ""; 102 for (int i = 0; i < columns.size(); i++) { 103 s = s + ","+columns.get(i); 104 if (columns.get(i).startsWith(column)) 105 c = i; 106 } 107 if (c == -1) 108 return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1)); 109 else if (rows.get(row).size() <= c) 110 return ""; 111 else 112 return rows.get(row).get(c).trim(); 113 } 114 115 public List<Row> getRows() { 116 return rows; 117 } 118 119 public int getIntColumn(int row, String column) { 120 String value = getColumn(row, column); 121 if (Utilities.noString(value)) 122 return 0; 123 else 124 return Integer.parseInt(value); 125 } 126 127 public String getNonEmptyColumn(int row, String column) throws FHIRException { 128 String value = getColumn(row, column); 129 if (Utilities.noString(value)) 130 throw new FHIRException("The colummn "+column+" cannot be empty"); 131 return value; 132 } 133 134 public boolean hasColumnContent(String col) { 135 int i = columns.indexOf(col); 136 if (i == -1) 137 return false; 138 for (Row r : rows) { 139 if (r.size() > i && !Utilities.noString(r.get(i))) 140 return true; 141 } 142 return false; 143 } 144 145 146 } 147 148 private Map<String, Sheet> sheets; 149 private Document xml; 150 private String name; 151 152 public XLSXmlParser(InputStream in, String name) throws FHIRException { 153 this.name = name; 154 try { 155 xml = parseXml(in); 156 sheets = new HashMap<String, Sheet>(); 157 readXml(); 158 } catch (Exception e) { 159 throw new FHIRException("unable to load "+name+": "+e.getMessage(), e); 160 } 161 } 162 163 private Document parseXml(InputStream in) throws FHIRException { 164 try { 165 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 166 factory.setNamespaceAware(true); 167 DocumentBuilder builder = factory.newDocumentBuilder(); 168 return builder.parse(in); 169 } catch (Exception e) { 170 throw new FHIRException("Error processing "+name+": "+e.getMessage(), e); 171 } 172 } 173 174 private void readXml() throws FHIRException { 175 Element root = xml.getDocumentElement(); 176 check(root.getNamespaceURI().equals(XLS_NS), "Spreadsheet namespace incorrect"); 177 check(root.getNodeName().equals("Workbook"), "Spreadsheet element name incorrect"); 178 Node node = root.getFirstChild(); 179 while (node != null) { 180 if (node.getNodeName().equals("Worksheet")) 181 processWorksheet((Element)node); 182 node = node.getNextSibling(); 183 } 184 } 185 186 private Integer rowIndex; 187 private void processWorksheet(Element node) throws FHIRException { 188 Sheet sheet = new Sheet(); 189 sheet.title = node.getAttributeNS(XLS_NS, "Name"); 190 sheets.put(node.getAttributeNS(XLS_NS, "Name"), sheet); 191 NodeList table = node.getElementsByTagNameNS(XLS_NS, "Table"); 192 check(table.getLength() == 1, "multiple table elements"); 193 NodeList rows = ((Element)table.item(0)).getElementsByTagNameNS(XLS_NS, "Row"); 194 if (rows.getLength() == 0) 195 return; 196 rowIndex = 1; 197 sheet.columns = readRow((Element) rows.item(0)); 198 for (int i = 1; i < rows.getLength(); i++) { 199 rowIndex++; 200 sheet.rows.add(readRow((Element) rows.item(i))); 201 } 202 203 //Remove empty rows at the end of the sheet 204 while( sheet.rows.size() != 0 && isEmptyRow(sheet.rows.get(sheet.rows.size()-1) ) ) 205 sheet.rows.remove(sheet.rows.size()-1); 206 } 207 208 209 private boolean isEmptyRow(Row w) 210 { 211 for( int col=0; col<w.size(); col++ ) 212 if( !w.get(col).trim().isEmpty() ) return false; 213 214 return true; 215 } 216 217 private Row readRow(Element row) throws DOMException, FHIRException { 218 Row res = new Row(); 219 int ndx = 1; 220 NodeList cells = row.getElementsByTagNameNS(XLS_NS, "Cell"); 221 for (int i = 0; i < cells.getLength(); i++) { 222 Element cell = (Element) cells.item(i); 223 if (cell.hasAttributeNS(XLS_NS, "Index")) { 224 int index = Integer.parseInt(cell.getAttributeNS(XLS_NS, "Index")); 225 while (ndx < index) { 226 res.add(""); 227 ndx++; 228 } 229 } 230 res.add(readData(cell, ndx, res.size() > 0 ? res.get(0) : "?")); 231 ndx++; 232 } 233 return res; 234 } 235 236 private String readData(Element cell, int col, String s) throws DOMException, FHIRException { 237 List<Element> data = new ArrayList<Element>(); 238 XMLUtil.getNamedChildren(cell, "Data", data); // cell.getElementsByTagNameNS(XLS_NS, "Data"); 239 if (data.size() == 0) 240 return ""; 241 check(data.size() == 1, "Multiple Data encountered ("+Integer.toString(data.size())+" @ col "+Integer.toString(col)+" - "+cell.getTextContent()+" ("+s+"))"); 242 Element d = data.get(0); 243 String type = d.getAttributeNS(XLS_NS, "Type"); 244 if ("Boolean".equals(type)) { 245 if (d.getTextContent().equals("1")) 246 return "True"; 247 else 248 return "False"; 249 } else if ("String".equals(type)) { 250 return d.getTextContent(); 251 } else if ("Number".equals(type)) { 252 return d.getTextContent(); 253 } else if ("DateTime".equals(type)) { 254 return d.getTextContent(); 255 } else if ("Error".equals(type)) { 256 return null; 257 } else 258 throw new FHIRException("Cell Type is not known ("+d.getAttributeNodeNS(XLS_NS, "Type")+") in "+getLocation()); 259 } 260 261 private void check(boolean test, String message) throws FHIRException { 262 if (!test) 263 throw new FHIRException(message+" in "+getLocation()); 264 } 265 266 private String getLocation() { 267 return name+", row "+rowIndex.toString(); 268 } 269 270 public Map<String, Sheet> getSheets() { 271 return sheets; 272 } 273 274 275}