001package org.hl7.fhir.r4.formats;
002
003public class TurtleLexer {
004
005  public enum TurtleTokenType {
006    NULL, 
007    TOKEN, SPECIAL, LITERAL
008  }
009
010  private String source;
011  private int cursor; 
012  private String token;
013  private TurtleTokenType type;
014  
015  public TurtleLexer(String source) throws Exception {
016    this.source = source;
017    cursor = 0;
018    readNext();
019  }
020
021  private void readNext() throws Exception {    
022    if (cursor >= source.length()) {
023      token = null;
024      type = TurtleTokenType.NULL;
025    } else if (source.charAt(cursor) == '"')
026      readLiteral();
027    else if (source.charAt(cursor) == '[' || source.charAt(cursor) == ']')
028      readDelimiter();
029    else if (source.charAt(cursor) == '(')
030      throw new Exception("not supported yet");
031    else if (source.charAt(cursor) == ';' || source.charAt(cursor) == '.' || source.charAt(cursor) == ',')
032      readDelimiter();
033    else if (Character.isLetter(source.charAt(cursor)))
034      readToken();
035    
036  }
037
038  private void readLiteral() {
039    StringBuilder b = new StringBuilder();
040    cursor++; // skip "        
041    while (cursor < source.length() && source.charAt(cursor) != '"') {
042      if (source.charAt(cursor) == '\\') {
043        b.append(source.charAt(cursor));
044        cursor++;        
045      } 
046      b.append(source.charAt(cursor));
047      cursor++;
048    }
049    token = "\""+b.toString()+"\"";
050    type = TurtleTokenType.LITERAL;
051    cursor++; // skip "
052    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
053      cursor++;    
054  }
055
056  private void readDelimiter() {
057    StringBuilder b = new StringBuilder();
058    b.append(source.charAt(cursor));
059    cursor++;
060    token = b.toString();
061    type = TurtleTokenType.SPECIAL;
062    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
063      cursor++;
064  }
065
066  private void readToken() {
067    StringBuilder b = new StringBuilder();
068    while (cursor < source.length() && isValidTokenChar(source.charAt(cursor))) {
069      if (source.charAt(cursor) == '\\') {
070        b.append(source.charAt(cursor));
071        cursor++;        
072      } 
073      b.append(source.charAt(cursor));
074      cursor++;
075    }
076    token = b.toString();
077    type = TurtleTokenType.TOKEN;
078    if (token.endsWith(".")) {
079      cursor--;
080      token = token.substring(0, token.length()-1);
081    }
082    while (cursor < source.length() && Character.isWhitespace(source.charAt(cursor))) 
083      cursor++;
084  }
085
086  private boolean isValidTokenChar(char c) {
087    return Character.isLetter(c) || Character.isDigit(c) || c == ':' || c == '\\' || c == '.';
088  }
089
090  public boolean done() {
091    return type == TurtleTokenType.NULL;
092  }
093
094  public String next() throws Exception {
095    String res = token;
096    readNext();
097    return res;
098  }
099
100  public String peek() throws Exception {
101    return token;
102  }
103
104  public TurtleTokenType peekType() {
105    return type;
106  }
107  
108  
109}