001/******************************************************************************* 002 * Crown Copyright (c) 2006 - 2014, Copyright (c) 2006 - 2014 Kestral Computing P/L. 003 * All rights reserved. This program and the accompanying materials 004 * are made available under the terms of the Eclipse Public License v1.0 005 * which accompanies this distribution, and is available at 006 * http://www.eclipse.org/legal/epl-v10.html 007 * 008 * Contributors: 009 * Kestral Computing P/L - initial implementation 010 *******************************************************************************/ 011 012package org.fhir.ucum; 013 014 015public class Lexer { 016 017 private static final char NO_CHAR = Character.UNASSIGNED; 018 private String source; 019 private int index; 020 021 private String token; 022 private TokenType type; 023 private int start; 024 025 /** 026 * @param source 027 * @throws UcumException 028 * @ 029 */ 030 public Lexer(String source) throws UcumException { 031 super(); 032 this.source = source; 033 if (source == null) 034 source = ""; 035 index = 0; 036 consume(); 037 } 038 039 public void consume() throws UcumException { 040 token = null; 041 type = TokenType.NONE; 042 start = index; 043 if (index < source.length()) { 044 char ch = nextChar(); 045 if (!(checkSingle(ch, '/', TokenType.SOLIDUS) || 046 checkSingle(ch, '.', TokenType.PERIOD) || 047 checkSingle(ch, '(', TokenType.OPEN) || 048 checkSingle(ch, ')', TokenType.CLOSE) || 049 checkAnnotation(ch) || 050 checkNumber(ch) || 051 checkNumberOrSymbol(ch))) 052 throw new UcumException("Error processing unit '"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start)); 053 } 054 } 055 056 private boolean checkNumber(char ch) throws UcumException { 057 if (ch == '+' || ch == '-') { 058 token = String.valueOf(ch); 059 ch = peekChar(); 060 while ((ch >= '0' && ch <= '9')) { 061 token = token + ch; 062 index++; 063 ch = peekChar(); 064 } 065 if (token.length() == 1) { 066 throw new UcumException("Error processing unit'"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start)+": a + or - must be followed by at least one digit"); 067 } 068 type = TokenType.NUMBER; 069 return true; 070 } else 071 return false; 072 } 073 074 private boolean checkNumberOrSymbol(char ch) throws UcumException { 075 boolean isSymbol = false; 076 boolean inBrackets = false; 077 if (isValidSymbolChar(ch, true, false)) { 078 token = String.valueOf(ch); 079 isSymbol = isSymbol || !((ch >= '0' && ch <= '9')); 080 inBrackets = checkBrackets(ch, inBrackets); 081 ch = peekChar(); 082 inBrackets = checkBrackets(ch, inBrackets); 083 while (isValidSymbolChar(ch, !isSymbol || inBrackets, inBrackets)) { 084 token = token + ch; 085 isSymbol = isSymbol || ((ch != NO_CHAR) && !((ch >= '0' && ch <= '9'))); 086 index++; 087 ch = peekChar(); 088 inBrackets = checkBrackets(ch, inBrackets); 089 } 090 if (isSymbol) 091 type = TokenType.SYMBOL; 092 else 093 type = TokenType.NUMBER; 094 return true; 095 } else 096 return false; 097 } 098 099 100 private boolean checkBrackets(char ch, boolean inBrackets) throws UcumException { 101 if (ch == '[') 102 if (inBrackets) 103 error("Nested ["); 104 else 105 return true; 106 if (ch == ']') 107 if (!inBrackets) 108 error("] without ["); 109 else 110 return false; 111 return inBrackets; 112 } 113 114 private boolean isValidSymbolChar(char ch, boolean allowDigits, boolean inBrackets) { 115 return (allowDigits && ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || 116 ch == '[' || ch == ']' || ch == '%' || ch == '*' || ch == '^' || ch == '\'' || 117 ch == '"' || ch == '_' || (inBrackets && ch == '.'); 118 } 119 120 private boolean checkAnnotation(char ch) throws UcumException { 121 if (ch == '{') { 122 StringBuilder b = new StringBuilder(); 123 while (ch != '}') { 124 ch = nextChar(); 125 if (!Utilities.isAsciiChar(ch)) 126 throw new UcumException("Error processing unit'"+source+"': Annotation contains non-ascii characters"); 127 if (ch == 0) 128 throw new UcumException("Error processing unit'"+source+"': unterminated annotation"); 129 b.append(ch); 130 } 131 // got to the end of the annotation - need to do it again 132 token = b.toString(); 133 type = TokenType.ANNOTATION; 134 return true; 135 } else 136 return false; 137 } 138 139 private boolean checkSingle(char ch, char test, TokenType type) { 140 if (ch == test) { 141 token = String.valueOf(ch); 142 this.type = type; 143 return true; 144 } 145 return false; 146 } 147 148 private char nextChar() { 149 char res = index < source.length() ? source.charAt(index) : NO_CHAR; 150 index++; 151 return res; 152 } 153 154 private char peekChar() { 155 return index < source.length() ? source.charAt(index) : NO_CHAR; 156 } 157 158 /** 159 * @return the token 160 */ 161 public String getToken() { 162 return token; 163 } 164 165 /** 166 * @return the type 167 */ 168 public TokenType getType() { 169 return type; 170 } 171 172 public void error(String errMsg) throws UcumException { 173 throw new UcumException("Error processing unit '"+source+"': "+ errMsg +"' at position "+Integer.toString(start)); 174 175 } 176 177 public int getTokenAsInt() { 178 return token.charAt(0) == '+' ? Integer.parseInt(token.substring(1)) : Integer.parseInt(token); 179 } 180 181 public boolean finished() { 182 return index == source.length(); 183 } 184 185 186}