001package org.hl7.fhir.utilities; 002 003/* 004 Copyright (c) 2011+, HL7, Inc. 005 All rights reserved. 006 007 Redistribution and use in source and binary forms, with or without modification, 008 are permitted provided that the following conditions are met: 009 010 * Redistributions of source code must retain the above copyright notice, this 011 list of conditions and the following disclaimer. 012 * Redistributions in binary form must reproduce the above copyright notice, 013 this list of conditions and the following disclaimer in the documentation 014 and/or other materials provided with the distribution. 015 * Neither the name of HL7 nor the names of its contributors may be used to 016 endorse or promote products derived from this software without specific 017 prior written permission. 018 019 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 022 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 023 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 024 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 025 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 026 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 027 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 028 POSSIBILITY OF SUCH DAMAGE. 029 030 */ 031 032 033 034import java.io.IOException; 035import java.io.InputStream; 036import java.io.InputStreamReader; 037import java.io.UnsupportedEncodingException; 038import java.util.ArrayList; 039import java.util.List; 040 041import org.hl7.fhir.exceptions.FHIRException; 042 043/** 044 * Baseclass for readers that read data from files in comma separated file format 045 * @author Ewout 046 * 047 */ 048public class CSVReader extends InputStreamReader { 049 050 public CSVReader(InputStream in) throws FHIRException, IOException { 051 super(in, "UTF-8"); 052 checkBOM(); 053 } 054 055 private void checkBOM() throws FHIRException, IOException { 056 if (peek() == '\uFEFF') 057 next(); 058 059 } 060 061 private String[] cols; 062 private String[] cells; 063 private char delimiter = ','; 064 private boolean multiline; 065 066 public void readHeaders() throws IOException, FHIRException { 067 cols = parseLine(); 068 } 069 070 public boolean line() throws IOException, FHIRException { 071 if (ready()) { 072 cells = parseLine(); 073 return true; 074 } else 075 return false; 076 } 077 078 public boolean has(String name) { 079 for (int i = 0; i < cols.length; i++) { 080 if (name.equals(cols[i].trim())) 081 return cell(name) != null; 082 } 083 return false; 084 } 085 086 public String cell(String name) { 087 int index = -1; 088 for (int i = 0; i < cols.length; i++) { 089 if (name.equals(cols[i].trim())) 090 index = i; 091 } 092 if (index == -1) 093 throw new FHIRException("no cell "+name); 094 String s = cells.length > index ? cells[index] : null; 095 if (Utilities.noString(s)) 096 return null; 097 if (s.startsWith("\"") && s.endsWith("\"")) { 098 return s.substring(1, s.length()-2); 099 } else { 100 return s; 101 } 102 } 103 104 protected boolean parseBoolean(String column) { 105 if (column == null) 106 return false; 107 else if (column.equalsIgnoreCase("y") || column.equalsIgnoreCase("yes") || column.equalsIgnoreCase("true") || column.equalsIgnoreCase("1")) 108 return true; 109 else 110 return false; 111 } 112 113 protected static String getColumn(String[] titles, String[] values, String column) { 114 int c = -1; 115 // String s = ""; 116 for (int i = 0; i < titles.length; i++) { 117 // s = s + ","+titles[i]; 118 if (titles[i].equalsIgnoreCase(column)) 119 c = i; 120 } 121 if (c == -1) 122 return ""; // throw new Exception("unable to find column "+column+" in "+s.substring(1)); 123 else if (values.length <= c) 124 return ""; 125 else 126 return values[c]; 127 } 128 129 130 /** 131 * Split one line in a CSV file into its cells. Comma's appearing in double quoted strings will 132 * not be seen as a separator. 133 * @return 134 * @throws IOException 135 * @throws FHIRException 136 * @ 137 */ 138 public String[] parseLine() throws IOException, FHIRException { 139 List<String> res = new ArrayList<String>(); 140 StringBuilder b = new StringBuilder(); 141 boolean inQuote = false; 142 143 while (more() && !finished(inQuote, res.size())) { 144 char c = peek(); 145 next(); 146 if (c == '"') { 147 if (ready() && peek() == '"') { 148 b.append(c); 149 next(); 150 } else { 151 inQuote = !inQuote; 152 } 153 } 154 else if (!inQuote && c == delimiter ) { 155 res.add(b.toString().trim()); 156 b = new StringBuilder(); 157 } 158 else 159 b.append(c); 160 } 161 res.add(b.toString().trim()); 162 while (ready() && (peek() == '\r' || peek() == '\n')) { 163 next(); 164 } 165 166 String[] r = new String[] {}; 167 r = res.toArray(r); 168 return r; 169 } 170 171 private boolean more() throws IOException { 172 return state == 1 || ready(); 173 } 174 175 private boolean finished(boolean inQuote, int size) throws FHIRException, IOException { 176 if (multiline && cols != null) { 177 return size == cols.length || (size == cols.length - 1 && !(inQuote || (peek() != '\r' && peek() != '\n'))); 178 } else { 179 return !(inQuote || (peek() != '\r' && peek() != '\n')); 180 } 181 } 182 183 private int state = 0; 184 private char pc; 185 186 private char peek() throws FHIRException, IOException 187 { 188 if (state == 0) 189 next(); 190 if (state == 1) 191 return pc; 192 else 193 throw new FHIRException("read past end of source"); 194 } 195 196 private void next() throws FHIRException, IOException 197 { 198 if (state == 2) 199 throw new FHIRException("read past end of source"); 200 state = 1; 201 int i = read(); 202 if (i == -1) 203 state = 2; 204 else 205 pc = (char) i; 206 } 207 208 209 public void checkColumn(int i, String name, String desc) throws FHIRException { 210 if (cols.length < i) 211 throw new FHIRException("Error parsing "+desc+": expected column "+name+" at col "+i+" but only found "+cols.length+" cols"); 212 if (!cols[i-1].equals(name)) 213 throw new FHIRException("Error parsing "+desc+": expected column "+name+" at col "+i+" but found '"+cols[i-1]+"'"); 214 } 215 216 217 public String value(int i) { 218 if (i > cells.length) 219 return null; 220 if (Utilities.noString(cells[i-1])) 221 return null; 222 return cells[i-1]; 223 } 224 225 public char getDelimiter() { 226 return delimiter; 227 } 228 229 public void setDelimiter(char delimiter) { 230 this.delimiter = delimiter; 231 } 232 233 public boolean isMultiline() { 234 return multiline; 235 } 236 237 public void setMultiline(boolean multiline) { 238 this.multiline = multiline; 239 } 240 241 242}