001/*******************************************************************************
002 * Crown Copyright (c) 2006 - 2014, Copyright (c) 2006 - 2014 Kestral Computing P/L.
003 * All rights reserved. This program and the accompanying materials
004 * are made available under the terms of the Eclipse Public License v1.0
005 * which accompanies this distribution, and is available at
006 * http://www.eclipse.org/legal/epl-v10.html
007 * 
008 * Contributors:
009 *    Kestral Computing P/L - initial implementation
010 *******************************************************************************/
011
012package org.fhir.ucum;
013
014
015public class Lexer {
016
017        private static final char NO_CHAR = Character.UNASSIGNED;
018        private String source;
019        private int index;
020        
021        private String token;
022        private TokenType type;
023        private int start;
024        
025        /**
026         * @param source
027         * @throws UcumException 
028         * @ 
029         */
030        public Lexer(String source) throws UcumException  {
031                super();
032                this.source = source;
033                if (source == null)
034                        source = "";
035                index = 0;
036                consume();
037        }
038        
039        public void consume() throws UcumException  {
040                token = null;
041                type = TokenType.NONE;
042                start = index;
043                if (index < source.length()) {
044                        char ch = nextChar();
045                        if (!(checkSingle(ch, '/', TokenType.SOLIDUS) ||
046                                        checkSingle(ch, '.', TokenType.PERIOD) || 
047                                        checkSingle(ch, '(', TokenType.OPEN) || 
048                                        checkSingle(ch, ')', TokenType.CLOSE) || 
049                                        checkAnnotation(ch) ||
050                                        checkNumber(ch) ||
051                                        checkNumberOrSymbol(ch)))
052                                throw new UcumException("Error processing unit '"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start));                      
053                }               
054        }
055
056        private boolean checkNumber(char ch) throws UcumException  {
057                if (ch == '+' || ch == '-') {
058                        token = String.valueOf(ch);
059                        ch = peekChar();
060                        while ((ch >= '0' && ch <= '9')) {
061                                token = token + ch;
062                                index++;
063                                ch = peekChar();
064                        }
065                        if (token.length() == 1) {
066                                throw new UcumException("Error processing unit'"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start)+": a + or - must be followed by at least one digit");                   
067                                }
068                        type = TokenType.NUMBER;
069                        return true;
070                } else
071                        return false;
072        }
073
074        private boolean checkNumberOrSymbol(char ch) throws UcumException  {
075                boolean isSymbol = false;
076                boolean inBrackets = false;
077                if (isValidSymbolChar(ch, true, false)) {
078                        token = String.valueOf(ch);
079                        isSymbol = isSymbol || !((ch >= '0' && ch <= '9'));
080                        inBrackets = checkBrackets(ch, inBrackets);
081                        ch = peekChar();
082                        inBrackets = checkBrackets(ch, inBrackets);
083                        while (isValidSymbolChar(ch, !isSymbol || inBrackets, inBrackets)) {
084                                token = token + ch;
085                                isSymbol = isSymbol || ((ch != NO_CHAR) && !((ch >= '0' && ch <= '9')));
086                                index++;
087                                ch = peekChar();
088                                inBrackets = checkBrackets(ch, inBrackets);
089                        }
090                        if (isSymbol)
091                                type = TokenType.SYMBOL;
092                        else
093                                type = TokenType.NUMBER;
094                        return true;
095                } else
096                        return false;
097        }
098
099        
100        private boolean checkBrackets(char ch, boolean inBrackets) throws UcumException  {
101                if (ch == '[')
102                        if (inBrackets)
103                                error("Nested [");
104                        else 
105                                return true;
106                if (ch == ']')
107                        if (!inBrackets)
108                                error("] without [");
109                        else 
110                                return false;
111                return inBrackets;
112        }
113
114        private boolean isValidSymbolChar(char ch, boolean allowDigits, boolean inBrackets) {
115                return (allowDigits && ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
116                     ch == '[' || ch == ']' || ch == '%' || ch == '*' || ch == '^' || ch == '\'' || 
117                     ch == '"' || ch == '_' || (inBrackets && ch == '.');
118        }
119
120        private boolean checkAnnotation(char ch) throws UcumException  {
121                if (ch == '{') {
122                        StringBuilder b = new StringBuilder();
123                        while (ch != '}') {
124                                ch = nextChar();
125                                if (!Utilities.isAsciiChar(ch))
126                                        throw new UcumException("Error processing unit'"+source+"': Annotation contains non-ascii characters");
127                                if (ch == 0) 
128                                        throw new UcumException("Error processing unit'"+source+"': unterminated annotation");
129                                b.append(ch);
130                        }
131                        // got to the end of the annotation - need to do it again
132                        token = b.toString();
133                        type = TokenType.ANNOTATION;
134                        return true;
135                } else
136                        return false;
137        }
138
139        private boolean checkSingle(char ch, char test, TokenType type) {
140                if (ch == test) {
141                        token = String.valueOf(ch);
142                        this.type = type;                       
143                        return true;
144                }
145                return false;
146        }
147
148        private char nextChar() {
149                char res = index < source.length() ? source.charAt(index) : NO_CHAR;
150                index++;
151                return res;
152        }
153
154        private char peekChar() {
155                return index < source.length() ? source.charAt(index) : NO_CHAR;
156        }
157
158        /**
159         * @return the token
160         */
161        public String getToken() {
162                return token;
163        }
164
165        /**
166         * @return the type
167         */
168        public TokenType getType() {
169                return type;
170        }
171
172        public void error(String errMsg) throws UcumException  {
173                throw new UcumException("Error processing unit '"+source+"': "+ errMsg +"' at position "+Integer.toString(start));                      
174                
175        }
176
177        public int getTokenAsInt() {
178                return token.charAt(0) == '+' ? Integer.parseInt(token.substring(1)) : Integer.parseInt(token);
179        }
180
181        public boolean finished() {
182                return index == source.length();
183        }
184
185
186}