001/******************************************************************************* 002 * Crown Copyright (c) 2006 - 2014, Copyright (c) 2006 - 2014 Kestral Computing P/L. 003 * All rights reserved. This program and the accompanying materials 004 * are made available under the terms of the Eclipse Public License v1.0 005 * which accompanies this distribution, and is available at 006 * http://www.eclipse.org/legal/epl-v10.html 007 * 008 * Contributors: 009 * Kestral Computing P/L - initial implementation 010 *******************************************************************************/ 011 012package org.hl7.fhir.utilities.ucum; 013 014import org.hl7.fhir.exceptions.UcumException; 015import org.hl7.fhir.utilities.Utilities; 016 017public class Lexer { 018 019 private static final char NO_CHAR = Character.UNASSIGNED; 020 private String source; 021 private int index; 022 023 private String token; 024 private TokenType type; 025 private int start; 026 027 /** 028 * @param source 029 * @throws UcumException 030 * @ 031 */ 032 public Lexer(String source) throws UcumException { 033 super(); 034 this.source = source; 035 if (source == null) 036 source = ""; 037 index = 0; 038 consume(); 039 } 040 041 public void consume() throws UcumException { 042 token = null; 043 type = TokenType.NONE; 044 start = index; 045 if (index < source.length()) { 046 char ch = nextChar(); 047 if (!(checkSingle(ch, '/', TokenType.SOLIDUS) || 048 checkSingle(ch, '.', TokenType.PERIOD) || 049 checkSingle(ch, '(', TokenType.OPEN) || 050 checkSingle(ch, ')', TokenType.CLOSE) || 051 checkAnnotation(ch) || 052 checkNumber(ch) || 053 checkNumberOrSymbol(ch))) 054 throw new UcumException("Error processing unit '"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start)); 055 } 056 } 057 058 private boolean checkNumber(char ch) throws UcumException { 059 if (ch == '+' || ch == '-') { 060 token = String.valueOf(ch); 061 ch = peekChar(); 062 while ((ch >= '0' && ch <= '9')) { 063 token = token + ch; 064 index++; 065 ch = peekChar(); 066 } 067 if (token.length() == 1) { 068 throw new UcumException("Error processing unit'"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start)+": a + or - must be followed by at least one digit"); 069 } 070 type = TokenType.NUMBER; 071 return true; 072 } else 073 return false; 074 } 075 076 private boolean checkNumberOrSymbol(char ch) throws UcumException { 077 boolean isSymbol = false; 078 boolean inBrackets = false; 079 if (isValidSymbolChar(ch, true)) { 080 token = String.valueOf(ch); 081 isSymbol = isSymbol || !((ch >= '0' && ch <= '9')); 082 inBrackets = checkBrackets(ch, inBrackets); 083 ch = peekChar(); 084 inBrackets = checkBrackets(ch, inBrackets); 085 while (isValidSymbolChar(ch, !isSymbol || inBrackets)) { 086 token = token + ch; 087 isSymbol = isSymbol || ((ch != NO_CHAR) && !((ch >= '0' && ch <= '9'))); 088 index++; 089 ch = peekChar(); 090 inBrackets = checkBrackets(ch, inBrackets); 091 } 092 if (isSymbol) 093 type = TokenType.SYMBOL; 094 else 095 type = TokenType.NUMBER; 096 return true; 097 } else 098 return false; 099 } 100 101 102 private boolean checkBrackets(char ch, boolean inBrackets) throws UcumException { 103 if (ch == '[') 104 if (inBrackets) 105 error("Nested ["); 106 else 107 return true; 108 if (ch == ']') 109 if (!inBrackets) 110 error("] without ["); 111 else 112 return false; 113 return inBrackets; 114 } 115 116 private boolean isValidSymbolChar(char ch, boolean allowDigits) { 117 return (allowDigits && ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || 118 ch == '[' || ch == ']' || ch == '%' || ch == '*' || ch == '^' || ch == '\'' || 119 ch == '"' || ch == '_'; 120 } 121 122 private boolean checkAnnotation(char ch) throws UcumException { 123 if (ch == '{') { 124 StringBuilder b = new StringBuilder(); 125 while (ch != '}') { 126 ch = nextChar(); 127 if (!Utilities.isAsciiChar(ch)) 128 throw new UcumException("Error processing unit'"+source+"': Annotation contains non-ascii characters"); 129 if (ch == 0) 130 throw new UcumException("Error processing unit'"+source+"': unterminated annotation"); 131 b.append(ch); 132 } 133 // got to the end of the annotation - need to do it again 134 token = b.toString(); 135 type = TokenType.ANNOTATION; 136 return true; 137 } else 138 return false; 139 } 140 141 private boolean checkSingle(char ch, char test, TokenType type) { 142 if (ch == test) { 143 token = String.valueOf(ch); 144 this.type = type; 145 return true; 146 } 147 return false; 148 } 149 150 private char nextChar() { 151 char res = index < source.length() ? source.charAt(index) : NO_CHAR; 152 index++; 153 return res; 154 } 155 156 private char peekChar() { 157 return index < source.length() ? source.charAt(index) : NO_CHAR; 158 } 159 160 /** 161 * @return the token 162 */ 163 public String getToken() { 164 return token; 165 } 166 167 /** 168 * @return the type 169 */ 170 public TokenType getType() { 171 return type; 172 } 173 174 public void error(String errMsg) throws UcumException { 175 throw new UcumException("Error processing unit '"+source+"': "+ errMsg +"' at position "+Integer.toString(start)); 176 177 } 178 179 public int getTokenAsInt() { 180 return token.charAt(0) == '+' ? Integer.parseInt(token.substring(1)) : Integer.parseInt(token); 181 } 182 183 public boolean finished() { 184 return index == source.length(); 185 } 186 187 188}