001/* 002Copyright (c) 2011+, HL7, Inc 003All rights reserved. 004 005Redistribution and use in source and binary forms, with or without modification, 006are permitted provided that the following conditions are met: 007 008 * Redistributions of source code must retain the above copyright notice, this 009 list of conditions and the following disclaimer. 010 * Redistributions in binary form must reproduce the above copyright notice, 011 this list of conditions and the following disclaimer in the documentation 012 and/or other materials provided with the distribution. 013 * Neither the name of HL7 nor the names of its contributors may be used to 014 endorse or promote products derived from this software without specific 015 prior written permission. 016 017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 026POSSIBILITY OF SUCH DAMAGE. 027 028*/ 029package org.hl7.fhir.utilities.xls; 030 031import java.io.InputStream; 032import java.util.ArrayList; 033import java.util.HashMap; 034import java.util.List; 035import java.util.Map; 036 037import javax.xml.parsers.DocumentBuilder; 038import javax.xml.parsers.DocumentBuilderFactory; 039 040import org.hl7.fhir.exceptions.FHIRException; 041import org.hl7.fhir.utilities.Utilities; 042import org.hl7.fhir.utilities.xml.XMLUtil; 043import org.w3c.dom.DOMException; 044import org.w3c.dom.Document; 045import org.w3c.dom.Element; 046import org.w3c.dom.Node; 047import org.w3c.dom.NodeList; 048 049public class XLSXmlParser { 050 051 private static final String XLS_NS = "urn:schemas-microsoft-com:office:spreadsheet"; 052 053 public class Row extends ArrayList<String> { private static final long serialVersionUID = 1L; } 054 055 public class Sheet { 056 public String title; 057 public Row columns; 058 public List<Row> rows = new ArrayList<Row>(); 059 060 public boolean hasColumn(String column) { 061 for (int i = 0; i < columns.size(); i++) { 062 if (columns.get(i).equalsIgnoreCase(column)) 063 return true; 064 } 065 return false; 066 } 067 068 public boolean hasColumn(int row, String column) { 069 String s = getColumn(row, column); 070 return s != null && !s.equals(""); 071 } 072 073 public String getColumn(int row, String column) { 074 int c = -1; 075 String s = ""; 076 for (int i = 0; i < columns.size(); i++) { 077 s = s + ","+columns.get(i); 078 if (columns.get(i).equalsIgnoreCase(column)) 079 c = i; 080 } 081 if (c == -1) 082 return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1)); 083 else if (rows.get(row).size() <= c) 084 return ""; 085 else { 086 s = rows.get(row).get(c); 087 return s == null ? "" : s.trim().replace("\t", " ").replace("\u00A0", " "); 088 } 089 } 090 091 public List<String> getColumnNamesBySuffix(String suffix) { 092 List<String> names = new ArrayList<String>(); 093 for (int i = 0; i < columns.size(); i++) { 094 if (columns.get(i).endsWith(suffix)) 095 names.add(columns.get(i)); 096 } 097 return names; 098 } 099 100 public String getByColumnPrefix(int row, String column) { 101 int c = -1; 102 String s = ""; 103 for (int i = 0; i < columns.size(); i++) { 104 s = s + ","+columns.get(i); 105 if (columns.get(i).startsWith(column)) 106 c = i; 107 } 108 if (c == -1) 109 return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1)); 110 else if (rows.get(row).size() <= c) 111 return ""; 112 else 113 return rows.get(row).get(c).trim(); 114 } 115 116 public List<Row> getRows() { 117 return rows; 118 } 119 120 public int getIntColumn(int row, String column) { 121 String value = getColumn(row, column); 122 if (Utilities.noString(value)) 123 return 0; 124 else 125 return Integer.parseInt(value); 126 } 127 128 public String getNonEmptyColumn(int row, String column) throws FHIRException { 129 String value = getColumn(row, column); 130 if (Utilities.noString(value)) 131 throw new FHIRException("The colummn "+column+" cannot be empty"); 132 return value; 133 } 134 135 public boolean hasColumnContent(String col) { 136 int i = columns.indexOf(col); 137 if (i == -1) 138 return false; 139 for (Row r : rows) { 140 if (r.size() > i && !Utilities.noString(r.get(i))) 141 return true; 142 } 143 return false; 144 } 145 146 147 } 148 149 private Map<String, Sheet> sheets; 150 private Document xml; 151 private String name; 152 153 public XLSXmlParser(InputStream in, String name) throws FHIRException { 154 this.name = name; 155 try { 156 xml = parseXml(in); 157 sheets = new HashMap<String, Sheet>(); 158 readXml(); 159 } catch (Exception e) { 160 throw new FHIRException("unable to load "+name+": "+e.getMessage(), e); 161 } 162 } 163 164 private Document parseXml(InputStream in) throws FHIRException { 165 try { 166 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 167 factory.setNamespaceAware(true); 168 DocumentBuilder builder = factory.newDocumentBuilder(); 169 return builder.parse(in); 170 } catch (Exception e) { 171 throw new FHIRException("Error processing "+name+": "+e.getMessage(), e); 172 } 173 } 174 175 private void readXml() throws FHIRException { 176 Element root = xml.getDocumentElement(); 177 check(root.getNamespaceURI().equals(XLS_NS), "Spreadsheet namespace incorrect"); 178 check(root.getNodeName().equals("Workbook"), "Spreadsheet element name incorrect"); 179 Node node = root.getFirstChild(); 180 while (node != null) { 181 if (node.getNodeName().equals("Worksheet")) 182 processWorksheet((Element)node); 183 node = node.getNextSibling(); 184 } 185 } 186 187 private Integer rowIndex; 188 private void processWorksheet(Element node) throws FHIRException { 189 Sheet sheet = new Sheet(); 190 sheet.title = node.getAttributeNS(XLS_NS, "Name"); 191 sheets.put(node.getAttributeNS(XLS_NS, "Name"), sheet); 192 NodeList table = node.getElementsByTagNameNS(XLS_NS, "Table"); 193 check(table.getLength() == 1, "multiple table elements"); 194 NodeList rows = ((Element)table.item(0)).getElementsByTagNameNS(XLS_NS, "Row"); 195 if (rows.getLength() == 0) 196 return; 197 rowIndex = 1; 198 sheet.columns = readRow((Element) rows.item(0)); 199 for (int i = 1; i < rows.getLength(); i++) { 200 rowIndex++; 201 sheet.rows.add(readRow((Element) rows.item(i))); 202 } 203 204 //Remove empty rows at the end of the sheet 205 while( sheet.rows.size() != 0 && isEmptyRow(sheet.rows.get(sheet.rows.size()-1) ) ) 206 sheet.rows.remove(sheet.rows.size()-1); 207 } 208 209 210 private boolean isEmptyRow(Row w) 211 { 212 for( int col=0; col<w.size(); col++ ) 213 if( !w.get(col).trim().isEmpty() ) return false; 214 215 return true; 216 } 217 218 private Row readRow(Element row) throws DOMException, FHIRException { 219 Row res = new Row(); 220 int ndx = 1; 221 NodeList cells = row.getElementsByTagNameNS(XLS_NS, "Cell"); 222 for (int i = 0; i < cells.getLength(); i++) { 223 Element cell = (Element) cells.item(i); 224 if (cell.hasAttributeNS(XLS_NS, "Index")) { 225 int index = Integer.parseInt(cell.getAttributeNS(XLS_NS, "Index")); 226 while (ndx < index) { 227 res.add(""); 228 ndx++; 229 } 230 } 231 res.add(readData(cell, ndx, res.size() > 0 ? res.get(0) : "?")); 232 ndx++; 233 } 234 return res; 235 } 236 237 private String readData(Element cell, int col, String s) throws DOMException, FHIRException { 238 List<Element> data = new ArrayList<Element>(); 239 XMLUtil.getNamedChildren(cell, "Data", data); // cell.getElementsByTagNameNS(XLS_NS, "Data"); 240 if (data.size() == 0) 241 return ""; 242 check(data.size() == 1, "Multiple Data encountered ("+Integer.toString(data.size())+" @ col "+Integer.toString(col)+" - "+cell.getTextContent()+" ("+s+"))"); 243 Element d = data.get(0); 244 String type = d.getAttributeNS(XLS_NS, "Type"); 245 if ("Boolean".equals(type)) { 246 if (d.getTextContent().equals("1")) 247 return "True"; 248 else 249 return "False"; 250 } else if ("String".equals(type)) { 251 return d.getTextContent(); 252 } else if ("Number".equals(type)) { 253 return d.getTextContent(); 254 } else if ("DateTime".equals(type)) { 255 return d.getTextContent(); 256 } else if ("Error".equals(type)) { 257 return null; 258 } else 259 throw new FHIRException("Cell Type is not known ("+d.getAttributeNodeNS(XLS_NS, "Type")+") in "+getLocation()); 260 } 261 262 private void check(boolean test, String message) throws FHIRException { 263 if (!test) 264 throw new FHIRException(message+" in "+getLocation()); 265 } 266 267 private String getLocation() { 268 return name+", row "+rowIndex.toString(); 269 } 270 271 public Map<String, Sheet> getSheets() { 272 return sheets; 273 } 274 275 276}