001package org.hl7.fhir.utilities;
002
003/*
004  Copyright (c) 2011+, HL7, Inc.
005  All rights reserved.
006  
007  Redistribution and use in source and binary forms, with or without modification, 
008  are permitted provided that the following conditions are met:
009    
010   * Redistributions of source code must retain the above copyright notice, this 
011     list of conditions and the following disclaimer.
012   * Redistributions in binary form must reproduce the above copyright notice, 
013     this list of conditions and the following disclaimer in the documentation 
014     and/or other materials provided with the distribution.
015   * Neither the name of HL7 nor the names of its contributors may be used to 
016     endorse or promote products derived from this software without specific 
017     prior written permission.
018  
019  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
020  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
021  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
022  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
023  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 
024  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
025  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
026  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
027  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
028  POSSIBILITY OF SUCH DAMAGE.
029  
030 */
031
032
033
034import java.io.IOException;
035import java.io.InputStream;
036import java.io.InputStreamReader;
037import java.io.UnsupportedEncodingException;
038import java.util.ArrayList;
039import java.util.List;
040
041import org.hl7.fhir.exceptions.FHIRException;
042
043/**
044 * Baseclass for readers that read data from files in comma separated file format
045 * @author Ewout
046 *
047 */
048public class CSVReader extends InputStreamReader {
049        
050        public CSVReader(InputStream in) throws FHIRException, IOException {
051                super(in, "UTF-8");
052                checkBOM();
053        }
054
055        private void checkBOM() throws FHIRException, IOException {
056    if (peek() == '\uFEFF')
057      next();
058    
059  }
060
061  private String[] cols;
062  private String[] cells;
063  private char delimiter = ',';
064  private boolean multiline;
065  
066        public void readHeaders() throws IOException, FHIRException {
067    cols = parseLine();  
068        }
069
070  public boolean line() throws IOException, FHIRException {
071    if (ready()) {
072      cells = parseLine();
073      return true;
074    }  else
075      return false;
076  }
077
078  public boolean has(String name) {
079    for (int i = 0; i < cols.length; i++) {
080      if (name.equals(cols[i].trim()))
081        return cell(name) != null;
082    }
083    return false;
084  }
085  
086  public String cell(String name) {
087    int index = -1;
088    for (int i = 0; i < cols.length; i++) {
089      if (name.equals(cols[i].trim()))
090        index = i;
091    }
092    if (index == -1)
093      throw new FHIRException("no cell "+name);
094    String s = cells.length > index ? cells[index] : null;
095    if (Utilities.noString(s))
096      return null;
097    if (s.startsWith("\"") && s.endsWith("\"")) {
098      return s.substring(1, s.length()-2);     
099    } else {
100      return s;
101    }
102  }
103    
104        protected boolean parseBoolean(String column) {
105                if (column == null)
106                        return false;
107                else if (column.equalsIgnoreCase("y") || column.equalsIgnoreCase("yes") || column.equalsIgnoreCase("true") || column.equalsIgnoreCase("1"))
108                        return true;
109                else
110                        return false;
111        }
112
113        protected static String getColumn(String[] titles, String[] values, String column)  {
114                int c = -1;
115        //      String s = "";
116                for (int i = 0; i < titles.length; i++) {
117                //      s = s + ","+titles[i];
118                        if (titles[i].equalsIgnoreCase(column))
119                                c = i;
120                }
121                if (c == -1)
122                        return ""; // throw new Exception("unable to find column "+column+" in "+s.substring(1));
123                else if (values.length <= c)
124                        return "";
125                else
126                        return values[c];
127        }
128
129        
130        /**
131         * Split one line in a CSV file into its cells. Comma's appearing in double quoted strings will
132         * not be seen as a separator.
133         * @return
134         * @throws IOException 
135         * @throws FHIRException 
136         * @
137         */
138        public String[] parseLine() throws IOException, FHIRException  {
139                List<String> res = new ArrayList<String>();
140                StringBuilder b = new StringBuilder();
141                boolean inQuote = false;
142
143                while (more() && !finished(inQuote, res.size())) {
144                        char c = peek();
145                        next();
146                        if (c == '"') {
147                                if (ready() && peek() == '"') {
148                b.append(c);
149          next();
150                                } else {
151                            inQuote = !inQuote;
152                                }
153                        }
154                        else if (!inQuote && c == delimiter ) {
155                                res.add(b.toString().trim());
156                                b = new StringBuilder();
157                        }
158                        else 
159                                b.append(c);
160                }
161                res.add(b.toString().trim());
162                while (ready() && (peek() == '\r' || peek() == '\n')) {
163                        next();
164                }
165                
166                String[] r = new String[] {};
167                r = res.toArray(r);
168                return r;
169        }
170
171        private boolean more() throws IOException {
172    return state == 1 || ready();
173  }
174
175  private boolean finished(boolean inQuote, int size) throws FHIRException, IOException {
176          if (multiline && cols != null) {
177            return size == cols.length || (size == cols.length - 1 && !(inQuote || (peek() != '\r' && peek() != '\n')));
178          } else {
179            return !(inQuote || (peek() != '\r' && peek() != '\n'));
180          }
181  }
182
183  private int state = 0;
184        private char pc;
185        
186        private char peek() throws FHIRException, IOException 
187        {
188          if (state == 0)
189                  next();
190          if (state == 1)
191                  return pc;
192          else
193                  throw new FHIRException("read past end of source");
194        }
195        
196        private void next() throws FHIRException, IOException 
197        {
198                  if (state == 2)
199                          throw new FHIRException("read past end of source");
200          state = 1;
201                  int i = read();
202                  if (i == -1)
203                          state = 2;
204                  else 
205                          pc = (char) i;
206        }
207
208
209  public void checkColumn(int i, String name, String desc) throws FHIRException {
210    if (cols.length < i)
211      throw new FHIRException("Error parsing "+desc+": expected column "+name+" at col "+i+" but only found "+cols.length+" cols");
212    if (!cols[i-1].equals(name))
213      throw new FHIRException("Error parsing "+desc+": expected column "+name+" at col "+i+" but found '"+cols[i-1]+"'");
214  }
215
216
217  public String value(int i) {
218    if (i > cells.length)
219      return null;
220    if (Utilities.noString(cells[i-1]))
221      return null;
222    return cells[i-1];
223  }
224
225  public char getDelimiter() {
226    return delimiter;
227  }
228
229  public void setDelimiter(char delimiter) {
230    this.delimiter = delimiter;
231  }
232
233  public boolean isMultiline() {
234    return multiline;
235  }
236
237  public void setMultiline(boolean multiline) {
238    this.multiline = multiline;
239  }
240
241
242}