001/*******************************************************************************
002 * Crown Copyright (c) 2006 - 2014, Copyright (c) 2006 - 2014 Kestral Computing P/L.
003 * All rights reserved. This program and the accompanying materials
004 * are made available under the terms of the Eclipse Public License v1.0
005 * which accompanies this distribution, and is available at
006 * http://www.eclipse.org/legal/epl-v10.html
007 * 
008 * Contributors:
009 *    Kestral Computing P/L - initial implementation
010 *******************************************************************************/
011
012package org.hl7.fhir.utilities.ucum;
013
014import org.hl7.fhir.exceptions.UcumException;
015import org.hl7.fhir.utilities.Utilities;
016
017public class Lexer {
018
019        private static final char NO_CHAR = Character.UNASSIGNED;
020        private String source;
021        private int index;
022        
023        private String token;
024        private TokenType type;
025        private int start;
026        
027        /**
028         * @param source
029         * @throws UcumException 
030         * @ 
031         */
032        public Lexer(String source) throws UcumException  {
033                super();
034                this.source = source;
035                if (source == null)
036                        source = "";
037                index = 0;
038                consume();
039        }
040        
041        public void consume() throws UcumException  {
042                token = null;
043                type = TokenType.NONE;
044                start = index;
045                if (index < source.length()) {
046                        char ch = nextChar();
047                        if (!(checkSingle(ch, '/', TokenType.SOLIDUS) ||
048                                        checkSingle(ch, '.', TokenType.PERIOD) || 
049                                        checkSingle(ch, '(', TokenType.OPEN) || 
050                                        checkSingle(ch, ')', TokenType.CLOSE) || 
051                                        checkAnnotation(ch) ||
052                                        checkNumber(ch) ||
053                                        checkNumberOrSymbol(ch)))
054                                throw new UcumException("Error processing unit '"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start));                      
055                }               
056        }
057
058        private boolean checkNumber(char ch) throws UcumException  {
059                if (ch == '+' || ch == '-') {
060                        token = String.valueOf(ch);
061                        ch = peekChar();
062                        while ((ch >= '0' && ch <= '9')) {
063                                token = token + ch;
064                                index++;
065                                ch = peekChar();
066                        }
067                        if (token.length() == 1) {
068                                throw new UcumException("Error processing unit'"+source+"': unexpected character '"+ch+"' at position "+Integer.toString(start)+": a + or - must be followed by at least one digit");                   
069                                }
070                        type = TokenType.NUMBER;
071                        return true;
072                } else
073                        return false;
074        }
075
076        private boolean checkNumberOrSymbol(char ch) throws UcumException  {
077                boolean isSymbol = false;
078                boolean inBrackets = false;
079                if (isValidSymbolChar(ch, true)) {
080                        token = String.valueOf(ch);
081                        isSymbol = isSymbol || !((ch >= '0' && ch <= '9'));
082                        inBrackets = checkBrackets(ch, inBrackets);
083                        ch = peekChar();
084                        inBrackets = checkBrackets(ch, inBrackets);
085                        while (isValidSymbolChar(ch, !isSymbol || inBrackets)) {
086                                token = token + ch;
087                                isSymbol = isSymbol || ((ch != NO_CHAR) && !((ch >= '0' && ch <= '9')));
088                                index++;
089                                ch = peekChar();
090                                inBrackets = checkBrackets(ch, inBrackets);
091                        }
092                        if (isSymbol)
093                                type = TokenType.SYMBOL;
094                        else
095                                type = TokenType.NUMBER;
096                        return true;
097                } else
098                        return false;
099        }
100
101        
102        private boolean checkBrackets(char ch, boolean inBrackets) throws UcumException  {
103                if (ch == '[')
104                        if (inBrackets)
105                                error("Nested [");
106                        else 
107                                return true;
108                if (ch == ']')
109                        if (!inBrackets)
110                                error("] without [");
111                        else 
112                                return false;
113                return inBrackets;
114        }
115
116        private boolean isValidSymbolChar(char ch, boolean allowDigits) {
117                return (allowDigits && ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
118                     ch == '[' || ch == ']' || ch == '%' || ch == '*' || ch == '^' || ch == '\'' || 
119                     ch == '"' || ch == '_';
120        }
121
122        private boolean checkAnnotation(char ch) throws UcumException  {
123                if (ch == '{') {
124                        StringBuilder b = new StringBuilder();
125                        while (ch != '}') {
126                                ch = nextChar();
127                                if (!Utilities.isAsciiChar(ch))
128                                        throw new UcumException("Error processing unit'"+source+"': Annotation contains non-ascii characters");
129                                if (ch == 0) 
130                                        throw new UcumException("Error processing unit'"+source+"': unterminated annotation");
131                                b.append(ch);
132                        }
133                        // got to the end of the annotation - need to do it again
134                        token = b.toString();
135                        type = TokenType.ANNOTATION;
136                        return true;
137                } else
138                        return false;
139        }
140
141        private boolean checkSingle(char ch, char test, TokenType type) {
142                if (ch == test) {
143                        token = String.valueOf(ch);
144                        this.type = type;                       
145                        return true;
146                }
147                return false;
148        }
149
150        private char nextChar() {
151                char res = index < source.length() ? source.charAt(index) : NO_CHAR;
152                index++;
153                return res;
154        }
155
156        private char peekChar() {
157                return index < source.length() ? source.charAt(index) : NO_CHAR;
158        }
159
160        /**
161         * @return the token
162         */
163        public String getToken() {
164                return token;
165        }
166
167        /**
168         * @return the type
169         */
170        public TokenType getType() {
171                return type;
172        }
173
174        public void error(String errMsg) throws UcumException  {
175                throw new UcumException("Error processing unit '"+source+"': "+ errMsg +"' at position "+Integer.toString(start));                      
176                
177        }
178
179        public int getTokenAsInt() {
180                return token.charAt(0) == '+' ? Integer.parseInt(token.substring(1)) : Integer.parseInt(token);
181        }
182
183        public boolean finished() {
184                return index == source.length();
185        }
186
187
188}