001/*
002Copyright (c) 2011+, HL7, Inc
003All rights reserved.
004
005Redistribution and use in source and binary forms, with or without modification, 
006are permitted provided that the following conditions are met:
007
008 * Redistributions of source code must retain the above copyright notice, this 
009   list of conditions and the following disclaimer.
010 * Redistributions in binary form must reproduce the above copyright notice, 
011   this list of conditions and the following disclaimer in the documentation 
012   and/or other materials provided with the distribution.
013 * Neither the name of HL7 nor the names of its contributors may be used to 
014   endorse or promote products derived from this software without specific 
015   prior written permission.
016
017THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
018ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
019WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
020IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
021INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
022NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
023PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
024WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
025ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
026POSSIBILITY OF SUCH DAMAGE.
027
028*/
029package org.hl7.fhir.utilities;
030
031import java.io.InputStream;
032import java.util.ArrayList;
033import java.util.HashMap;
034import java.util.List;
035import java.util.Map;
036
037import javax.xml.parsers.DocumentBuilder;
038import javax.xml.parsers.DocumentBuilderFactory;
039
040import org.hl7.fhir.exceptions.FHIRException;
041import org.hl7.fhir.utilities.xml.XMLUtil;
042import org.w3c.dom.DOMException;
043import org.w3c.dom.Document;
044import org.w3c.dom.Element;
045import org.w3c.dom.Node;
046import org.w3c.dom.NodeList;
047
048public class XLSXmlParser {
049
050  private static final String XLS_NS = "urn:schemas-microsoft-com:office:spreadsheet";
051
052  public class Row extends ArrayList<String> {  private static final long serialVersionUID = 1L; }
053  
054  public class Sheet {
055    public String title;
056    public Row columns;
057    public List<Row> rows = new ArrayList<Row>();
058
059    public boolean hasColumn(String column)  {
060      for (int i = 0; i < columns.size(); i++) {
061        if (columns.get(i).equalsIgnoreCase(column))
062          return true;
063      }
064      return false;
065    }
066    
067    public boolean hasColumn(int row, String column)  {
068      String s = getColumn(row, column);
069      return s != null && !s.equals("");     
070    }
071    
072    public String getColumn(int row, String column)  {
073      int c = -1;
074      String s = "";
075      for (int i = 0; i < columns.size(); i++) {
076        s = s + ","+columns.get(i);
077        if (columns.get(i).equalsIgnoreCase(column))
078          c = i;
079      }
080      if (c == -1)
081        return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1));
082      else if (rows.get(row).size() <= c)
083        return "";
084      else {
085        s = rows.get(row).get(c); 
086        return s == null ? "" : s.trim();
087      }
088    }
089
090    public List<String> getColumnNamesBySuffix(String suffix)  {
091      List<String> names = new ArrayList<String>();
092      for (int i = 0; i < columns.size(); i++) {
093        if (columns.get(i).endsWith(suffix))
094          names.add(columns.get(i));
095      }
096      return names;
097    }
098
099    public String getByColumnPrefix(int row, String column)  {
100      int c = -1;
101      String s = "";
102      for (int i = 0; i < columns.size(); i++) {
103        s = s + ","+columns.get(i);
104        if (columns.get(i).startsWith(column))
105          c = i;
106      }
107      if (c == -1)
108        return ""; // throw new FHIRException("unable to find column "+column+" in "+s.substring(1));
109      else if (rows.get(row).size() <= c)
110        return "";
111      else
112        return rows.get(row).get(c).trim();
113    }
114
115    public List<Row> getRows() {
116      return rows;
117    }
118
119    public int getIntColumn(int row, String column)  {
120      String value = getColumn(row, column);
121      if (Utilities.noString(value))
122        return 0;
123      else
124        return Integer.parseInt(value);
125    }
126
127    public String getNonEmptyColumn(int row, String column) throws FHIRException  {
128     String value = getColumn(row, column);
129     if (Utilities.noString(value))
130       throw new FHIRException("The colummn "+column+" cannot be empty");
131     return value;
132    }
133
134    public boolean hasColumnContent(String col) {
135      int i = columns.indexOf(col);
136      if (i == -1)
137        return false;
138      for (Row r : rows) {
139        if (r.size() > i && !Utilities.noString(r.get(i)))
140          return true;
141      }
142      return false;
143    }
144    
145    
146  }
147  
148  private Map<String, Sheet> sheets;
149  private Document xml;
150  private String name;
151  
152  public XLSXmlParser(InputStream in, String name) throws FHIRException  {
153    this.name = name;
154    try {
155      xml = parseXml(in);
156      sheets = new HashMap<String, Sheet>();
157      readXml();
158    } catch (Exception e) {
159      throw new FHIRException("unable to load "+name+": "+e.getMessage(), e);
160    }
161  }
162
163  private Document parseXml(InputStream in) throws FHIRException  {
164    try {
165      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
166      factory.setNamespaceAware(true);
167      DocumentBuilder builder = factory.newDocumentBuilder();
168      return builder.parse(in);
169    } catch (Exception e) {
170      throw new FHIRException("Error processing "+name+": "+e.getMessage(), e);
171    }
172  }
173
174  private void readXml() throws FHIRException  {
175    Element root = xml.getDocumentElement();
176    check(root.getNamespaceURI().equals(XLS_NS), "Spreadsheet namespace incorrect");
177    check(root.getNodeName().equals("Workbook"), "Spreadsheet element name incorrect");
178    Node node = root.getFirstChild();
179    while (node != null) {
180      if (node.getNodeName().equals("Worksheet"))
181        processWorksheet((Element)node);
182      node = node.getNextSibling();
183    }
184  }
185  
186  private Integer rowIndex;
187  private void processWorksheet(Element node) throws FHIRException  {
188    Sheet sheet = new Sheet();
189    sheet.title = node.getAttributeNS(XLS_NS, "Name");
190    sheets.put(node.getAttributeNS(XLS_NS, "Name"), sheet);
191    NodeList table = node.getElementsByTagNameNS(XLS_NS, "Table");
192    check(table.getLength() == 1, "multiple table elements");
193    NodeList rows = ((Element)table.item(0)).getElementsByTagNameNS(XLS_NS, "Row");
194    if (rows.getLength() == 0) 
195      return;
196    rowIndex = 1;
197    sheet.columns = readRow((Element) rows.item(0));
198    for (int i = 1; i < rows.getLength(); i++) {
199      rowIndex++;
200      sheet.rows.add(readRow((Element) rows.item(i)));
201    }
202       
203    //Remove empty rows at the end of the sheet
204    while( sheet.rows.size() != 0 && isEmptyRow(sheet.rows.get(sheet.rows.size()-1) ) )
205        sheet.rows.remove(sheet.rows.size()-1);
206  }
207
208  
209  private boolean isEmptyRow(Row w)
210  { 
211          for( int col=0; col<w.size(); col++ )
212                  if( !w.get(col).trim().isEmpty() ) return false;
213          
214          return true;
215  }
216  
217  private Row readRow(Element row) throws DOMException, FHIRException  {
218    Row res = new Row();
219    int ndx = 1;    
220    NodeList cells = row.getElementsByTagNameNS(XLS_NS, "Cell");
221    for (int i = 0; i < cells.getLength(); i++) {
222      Element cell = (Element) cells.item(i);
223      if (cell.hasAttributeNS(XLS_NS, "Index")) {
224        int index = Integer.parseInt(cell.getAttributeNS(XLS_NS, "Index"));
225        while (ndx < index) {
226          res.add("");
227          ndx++;
228        }
229      }
230      res.add(readData(cell, ndx, res.size() > 0 ? res.get(0) : "?"));
231      ndx++;      
232    }
233    return res;
234  }
235
236  private String readData(Element cell, int col, String s) throws DOMException, FHIRException  {
237    List<Element> data = new ArrayList<Element>(); 
238    XMLUtil.getNamedChildren(cell, "Data", data); // cell.getElementsByTagNameNS(XLS_NS, "Data");
239    if (data.size() == 0)
240      return "";
241    check(data.size() == 1, "Multiple Data encountered ("+Integer.toString(data.size())+" @ col "+Integer.toString(col)+" - "+cell.getTextContent()+" ("+s+"))");
242    Element d = data.get(0);
243    String type = d.getAttributeNS(XLS_NS, "Type");
244    if ("Boolean".equals(type)) {
245      if (d.getTextContent().equals("1"))
246        return "True";
247      else
248        return "False";
249    } else if ("String".equals(type)) {
250      return d.getTextContent();
251    } else if ("Number".equals(type)) {
252      return d.getTextContent();
253    } else if ("DateTime".equals(type)) {
254      return d.getTextContent();
255    } else if ("Error".equals(type)) {
256      return null;
257    } else 
258      throw new FHIRException("Cell Type is not known ("+d.getAttributeNodeNS(XLS_NS, "Type")+") in "+getLocation());
259  }
260
261  private void check(boolean test, String message) throws FHIRException  {
262    if (!test)
263      throw new FHIRException(message+" in "+getLocation());
264  }
265  
266  private String getLocation() {
267    return name+", row "+rowIndex.toString();
268  }
269
270  public Map<String, Sheet> getSheets() {
271    return sheets;
272  }
273
274  
275}