001/*
002Copyright (c) 2011+, HL7, Inc
003All rights reserved.
004
005Redistribution and use in source and binary forms, with or without modification, 
006are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this 
009   list of conditions and the following disclaimer.
010 * Redistributions in binary form must reproduce the above copyright notice, 
011   this list of conditions and the following disclaimer in the documentation 
012   and/or other materials provided with the distribution.
013 * Neither the name of HL7 nor the names of its contributors may be used to 
014   endorse or promote products derived from this software without specific 
015   prior written permission.
016
017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
026POSSIBILITY OF SUCH DAMAGE.
027
028*/
029package org.hl7.fhir.utilities.xls;
030
031import java.io.InputStream;
032import java.util.ArrayList;
033import java.util.HashMap;
034import java.util.List;
035import java.util.Map;
036
037import javax.xml.parsers.DocumentBuilder;
038import javax.xml.parsers.DocumentBuilderFactory;
039
040import org.hl7.fhir.exceptions.FHIRException;
041import org.hl7.fhir.utilities.Utilities;
042import org.hl7.fhir.utilities.xml.XMLUtil;
043import org.w3c.dom.DOMException;
044import org.w3c.dom.Document;
045import org.w3c.dom.Element;
046import org.w3c.dom.Node;
047import org.w3c.dom.NodeList;
048
049public class XLSXmlParser {
050
051  private static final String XLS_NS = "urn:schemas-microsoft-com:office:spreadsheet";
052
053  public class Row extends ArrayList<String> {  private static final long serialVersionUID = 1L; }
054  
055  public class Sheet {
056    public String title;
057    public Row columns;
058    public List<Row> rows = new ArrayList<Row>();
059
060    public boolean hasColumn(String column)  {
061      for (int i = 0; i < columns.size(); i++) {
062        if (columns.get(i).equalsIgnoreCase(column))
063          return true;
064      }
065      return false;
066    }
067    
068    public boolean hasColumn(int row, String column)  {
069      String s = getColumn(row, column);
070      return s != null && !s.equals("");     
071    }
072    
073    public String getColumn(int row, String column)  {
074      int c = -1;
075      String s = "";
076      for (int i = 0; i < columns.size(); i++) {
077        s = s + ","+columns.get(i);
078        if (columns.get(i).equalsIgnoreCase(column))
079          c = i;
080      }
081      if (c == -1)
082        return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1));
083      else if (rows.get(row).size() <= c)
084        return "";
085      else {
086        s = rows.get(row).get(c); 
087        return s == null ? "" : s.trim().replace("\t",  "  ").replace("\u00A0", " ");
088      }
089    }
090
091    public List<String> getColumnNamesBySuffix(String suffix)  {
092      List<String> names = new ArrayList<String>();
093      for (int i = 0; i < columns.size(); i++) {
094        if (columns.get(i).endsWith(suffix))
095          names.add(columns.get(i));
096      }
097      return names;
098    }
099
100    public String getByColumnPrefix(int row, String column)  {
101      int c = -1;
102      String s = "";
103      for (int i = 0; i < columns.size(); i++) {
104        s = s + ","+columns.get(i);
105        if (columns.get(i).startsWith(column))
106          c = i;
107      }
108      if (c == -1)
109        return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1));
110      else if (rows.get(row).size() <= c)
111        return "";
112      else
113        return rows.get(row).get(c).trim();
114    }
115
116    public List<Row> getRows() {
117      return rows;
118    }
119
120    public int getIntColumn(int row, String column)  {
121      String value = getColumn(row, column);
122      if (Utilities.noString(value))
123        return 0;
124      else
125        return Integer.parseInt(value);
126    }
127
128    public String getNonEmptyColumn(int row, String column) throws FHIRException  {
129     String value = getColumn(row, column);
130     if (Utilities.noString(value))
131       throw new FHIRException("The colummn "+column+" cannot be empty");
132     return value;
133    }
134
135    public boolean hasColumnContent(String col) {
136      int i = columns.indexOf(col);
137      if (i == -1)
138        return false;
139      for (Row r : rows) {
140        if (r.size() > i && !Utilities.noString(r.get(i)))
141          return true;
142      }
143      return false;
144    }
145    
146    
147  }
148  
149  private Map<String, Sheet> sheets;
150  private Document xml;
151  private String name;
152  
153  public XLSXmlParser(InputStream in, String name) throws FHIRException  {
154    this.name = name;
155    try {
156      xml = parseXml(in);
157      sheets = new HashMap<String, Sheet>();
158      readXml();
159    } catch (Exception e) {
160      throw new FHIRException("unable to load "+name+": "+e.getMessage(), e);
161    }
162  }
163
164  private Document parseXml(InputStream in) throws FHIRException  {
165    try {
166      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
167      factory.setNamespaceAware(true);
168      DocumentBuilder builder = factory.newDocumentBuilder();
169      return builder.parse(in);
170    } catch (Exception e) {
171      throw new FHIRException("Error processing "+name+": "+e.getMessage(), e);
172    }
173  }
174
175  private void readXml() throws FHIRException  {
176    Element root = xml.getDocumentElement();
177    check(root.getNamespaceURI().equals(XLS_NS), "Spreadsheet namespace incorrect");
178    check(root.getNodeName().equals("Workbook"), "Spreadsheet element name incorrect");
179    Node node = root.getFirstChild();
180    while (node != null) {
181      if (node.getNodeName().equals("Worksheet"))
182        processWorksheet((Element)node);
183      node = node.getNextSibling();
184    }
185  }
186  
187  private Integer rowIndex;
188  private void processWorksheet(Element node) throws FHIRException  {
189    Sheet sheet = new Sheet();
190    sheet.title = node.getAttributeNS(XLS_NS, "Name");
191    sheets.put(node.getAttributeNS(XLS_NS, "Name"), sheet);
192    NodeList table = node.getElementsByTagNameNS(XLS_NS, "Table");
193    check(table.getLength() == 1, "multiple table elements");
194    NodeList rows = ((Element)table.item(0)).getElementsByTagNameNS(XLS_NS, "Row");
195    if (rows.getLength() == 0) 
196      return;
197    rowIndex = 1;
198    sheet.columns = readRow((Element) rows.item(0));
199    for (int i = 1; i < rows.getLength(); i++) {
200      rowIndex++;
201      sheet.rows.add(readRow((Element) rows.item(i)));
202    }
203       
204    //Remove empty rows at the end of the sheet
205    while( sheet.rows.size() != 0 && isEmptyRow(sheet.rows.get(sheet.rows.size()-1) ) )
206        sheet.rows.remove(sheet.rows.size()-1);
207  }
208
209  
210  private boolean isEmptyRow(Row w)
211  { 
212          for( int col=0; col<w.size(); col++ )
213                  if( !w.get(col).trim().isEmpty() ) return false;
214          
215          return true;
216  }
217  
218  private Row readRow(Element row) throws DOMException, FHIRException  {
219    Row res = new Row();
220    int ndx = 1;    
221    NodeList cells = row.getElementsByTagNameNS(XLS_NS, "Cell");
222    for (int i = 0; i < cells.getLength(); i++) {
223      Element cell = (Element) cells.item(i);
224      if (cell.hasAttributeNS(XLS_NS, "Index")) {
225        int index = Integer.parseInt(cell.getAttributeNS(XLS_NS, "Index"));
226        while (ndx < index) {
227          res.add("");
228          ndx++;
229        }
230      }
231      res.add(readData(cell, ndx, res.size() > 0 ? res.get(0) : "?"));
232      ndx++;      
233    }
234    return res;
235  }
236
237  private String readData(Element cell, int col, String s) throws DOMException, FHIRException  {
238    List<Element> data = new ArrayList<Element>(); 
239    XMLUtil.getNamedChildren(cell, "Data", data); // cell.getElementsByTagNameNS(XLS_NS, "Data");
240    if (data.size() == 0)
241      return "";
242    check(data.size() == 1, "Multiple Data encountered ("+Integer.toString(data.size())+" @ col "+Integer.toString(col)+" - "+cell.getTextContent()+" ("+s+"))");
243    Element d = data.get(0);
244    String type = d.getAttributeNS(XLS_NS, "Type");
245    if ("Boolean".equals(type)) {
246      if (d.getTextContent().equals("1"))
247        return "True";
248      else
249        return "False";
250    } else if ("String".equals(type)) {
251      return d.getTextContent();
252    } else if ("Number".equals(type)) {
253      return d.getTextContent();
254    } else if ("DateTime".equals(type)) {
255      return d.getTextContent();
256    } else if ("Error".equals(type)) {
257      return null;
258    } else 
259      throw new FHIRException("Cell Type is not known ("+d.getAttributeNodeNS(XLS_NS, "Type")+") in "+getLocation());
260  }
261
262  private void check(boolean test, String message) throws FHIRException  {
263    if (!test)
264      throw new FHIRException(message+" in "+getLocation());
265  }
266  
267  private String getLocation() {
268    return name+", row "+rowIndex.toString();
269  }
270
271  public Map<String, Sheet> getSheets() {
272    return sheets;
273  }
274
275  
276}