001package org.hl7.fhir.r4.formats; 002 003public class TurtleLexer { 004 005 public enum TurtleTokenType { 006 NULL, 007 TOKEN, SPECIAL, LITERAL 008 } 009 010 private String source; 011 private int cursor; 012 private String token; 013 private TurtleTokenType type; 014 015 public TurtleLexer(String source) throws Exception { 016 this.source = source; 017 cursor = 0; 018 readNext(); 019 } 020 021 private void readNext() throws Exception { 022 if (cursor >= source.length()) { 023 token = null; 024 type = TurtleTokenType.NULL; 025 } else if (source.charAt(cursor) == '"') 026 readLiteral(); 027 else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']') 028 readDelimiter(); 029 else if (source.charAt(cursor) == '(') 030 throw new Exception("not supported yet"); 031 else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',') 032 readDelimiter(); 033 else if (Character.isLetter(source.charAt(cursor))) 034 readToken(); 035 036 } 037 038 private void readLiteral() { 039 StringBuilder b = new StringBuilder(); 040 cursor++; // skip " 041 while (cursor < source.length() && source.charAt(cursor) != '"') { 042 if (source.charAt(cursor) == '\\') { 043 b.append(source.charAt(cursor)); 044 cursor++; 045 } 046 b.append(source.charAt(cursor)); 047 cursor++; 048 } 049 token = "\""+b.toString()+"\""; 050 type = TurtleTokenType.LITERAL; 051 cursor++; // skip " 052 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 053 cursor++; 054 } 055 056 private void readDelimiter() { 057 StringBuilder b = new StringBuilder(); 058 b.append(source.charAt(cursor)); 059 cursor++; 060 token = b.toString(); 061 type = TurtleTokenType.SPECIAL; 062 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 063 cursor++; 064 } 065 066 private void readToken() { 067 StringBuilder b = new StringBuilder(); 068 while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) { 069 if (source.charAt(cursor) == '\\') { 070 b.append(source.charAt(cursor)); 071 cursor++; 072 } 073 b.append(source.charAt(cursor)); 074 cursor++; 075 } 076 token = b.toString(); 077 type = TurtleTokenType.TOKEN; 078 if (token.endsWith(".")) { 079 cursor--; 080 token = token.substring(0, token.length()-1); 081 } 082 while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 083 cursor++; 084 } 085 086 private boolean isValidTokenChar(char c) { 087 return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.'; 088 } 089 090 public boolean done() { 091 return type == TurtleTokenType.NULL; 092 } 093 094 public String next() throws Exception { 095 String res = token; 096 readNext(); 097 return res; 098 } 099 100 public String peek() throws Exception { 101 return token; 102 } 103 104 public TurtleTokenType peekType() { 105 return type; 106 } 107 108 109}