001/*
002 * Copyright 2011-2016 UnboundID Corp.
003 *
004 * This program is free software; you can redistribute it and/or modify
005 * it under the terms of the GNU General Public License (GPLv2 only)
006 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
007 * as published by the Free Software Foundation.
008 *
009 * This program is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
012 * GNU General Public License for more details.
013 *
014 * You should have received a copy of the GNU General Public License
015 * along with this program; if not, see <http://www.gnu.org/licenses>.
016 */
017
018package com.unboundid.scim.sdk;
019
020import org.json.JSONObject;
021
022import java.text.MessageFormat;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import java.util.Stack;
027
028
029
030/**
031 * A parser for SCIM filter expressions.
032 */
033public class FilterParser
034{
035  /**
036   * The filter to be parsed.
037   */
038  private final String filterString;
039
040  /**
041   * The default schema that should be assumed when parsing attributes with
042   * no schema explicitly defined in the URN.
043   */
044  private final String defaultSchema;
045
046  /**
047   * The position one higher than the last character.
048   */
049  private int endPos;
050
051  /**
052   * The current character position.
053   */
054  private int currentPos;
055
056  /**
057   * The position marking the first character of the previous word or value.
058   */
059  private int markPos;
060
061
062
063  /**
064   * Base class for expression stack nodes. The expression stack is needed to
065   * employ the shunting-yard algorithm to parse the filter expression.
066   */
067  class Node
068  {
069    private final int pos;
070
071
072
073    /**
074     * Create a new node.
075     *
076     * @param pos  The position of the node in the filter string.
077     */
078    public Node(final int pos)
079    {
080      this.pos = pos;
081    }
082
083
084
085    /**
086     * Retrieve the position of the node in the filter string.
087     * @return  The position of the node in the filter string.
088     */
089    public int getPos()
090    {
091      return pos;
092    }
093  }
094
095
096
097  /**
098   * A node representing a filter component.
099   */
100  class FilterNode extends Node
101  {
102    private final SCIMFilter filterComponent;
103
104
105
106    /**
107     * Create a new filter component node.
108     *
109     * @param filterComponent  The filter component.
110     * @param pos              The position of the node in the filter string.
111     */
112    public FilterNode(final SCIMFilter filterComponent,
113                      final int pos)
114    {
115      super(pos);
116      this.filterComponent = filterComponent;
117    }
118
119
120
121    /**
122     * Retrieve the filter component.
123     *
124     * @return  The filter component.
125     */
126    public SCIMFilter getFilterComponent()
127    {
128      return filterComponent;
129    }
130
131
132
133    @Override
134    public String toString()
135    {
136      return "FilterNode{" +
137             "filterComponent=" + filterComponent +
138             "} " + super.toString();
139    }
140  }
141
142
143
144  /**
145   * A node representing a logical operator.
146   */
147  class OperatorNode extends Node
148  {
149    private final SCIMFilterType filterType;
150
151    /**
152     * Create a new logical operator node.
153     *
154     * @param filterType   The type of operator, either SCIMFilterType.AND or
155     *                     SCIMFilterType.OR.
156     * @param pos          The position of the node in the filter string.
157     */
158    public OperatorNode(final SCIMFilterType filterType,
159                        final int pos)
160    {
161      super(pos);
162      this.filterType = filterType;
163    }
164
165
166
167    /**
168     * Retrieve the type of operator.
169     *
170     * @return  The type of operator, either SCIMFilterType.AND or
171     *          SCIMFilterType.OR.
172     */
173    public SCIMFilterType getFilterType()
174    {
175      return filterType;
176    }
177
178
179
180    /**
181     * Retrieve the precedence of the operator.
182     *
183     * @return  The precedence of the operator.
184     */
185    public int getPrecedence()
186    {
187      switch (filterType)
188      {
189        case AND:
190          return 2;
191
192        case OR:
193        default:
194          return 1;
195      }
196    }
197
198
199
200    @Override
201    public String toString()
202    {
203      return "OperatorNode{" +
204             "filterType=" + filterType +
205             "} " + super.toString();
206    }
207  }
208
209
210
211  /**
212   * A node representing an opening parenthesis.
213   */
214  class LeftParenthesisNode extends Node
215  {
216    /**
217     * Create a new opening parenthesis node.
218     *
219     * @param pos  The position of the parenthesis in the filter string.
220     */
221    public LeftParenthesisNode(final int pos)
222    {
223      super(pos);
224    }
225  }
226
227
228
229  /**
230   * Create a new instance of a filter parser.
231   *
232   * @param filterString  The filter to be parsed.
233   * @param defaultSchema The default schema that should be assumed when parsing
234   *                      attributes without the schema explicitly defined in
235   *                      the URN.
236   */
237  public FilterParser(final String filterString, final String defaultSchema)
238  {
239    this.filterString = filterString;
240    this.endPos = filterString.length();
241    this.currentPos = 0;
242    this.markPos = 0;
243    this.defaultSchema = defaultSchema;
244  }
245
246
247
248  /**
249   * Parse the filter provided in the constructor.
250   *
251   * @return  A parsed SCIM filter.
252   *
253   * @throws  SCIMException  If the filter string could not be parsed.
254   */
255  public SCIMFilter parse()
256      throws SCIMException
257  {
258    try
259    {
260      return readFilter();
261    }
262    catch (Exception e)
263    {
264      Debug.debugException(e);
265      throw SCIMException.createException(
266          400, MessageFormat.format("Invalid filter ''{0}'': {1}",
267                                    filterString, e.getMessage()));
268    }
269  }
270
271
272
273  /**
274   * Read a filter component at the current position. A filter component is
275   * <pre>
276   * attribute attribute-operator [value]
277   * </pre>
278   * Most attribute operators require a value but 'pr' (presence) requires
279   * no value.
280   *
281   * @return  The parsed filter component.
282   */
283  private SCIMFilter readFilterComponent()
284  {
285    String word = readWord();
286    if (word == null)
287    {
288      final String msg = String.format(
289          "End of input at position %d but expected a filter expression",
290          markPos);
291      throw new IllegalArgumentException(msg);
292    }
293
294    final AttributePath filterAttribute;
295    try
296    {
297      filterAttribute = AttributePath.parse(word, defaultSchema);
298    }
299    catch (final Exception e)
300    {
301      Debug.debugException(e);
302      final String msg = String.format(
303          "Expected an attribute reference at position %d: %s",
304          markPos, e.getMessage());
305      throw new IllegalArgumentException(msg);
306    }
307
308    final String operator = readWord();
309    if (operator == null)
310    {
311      final String msg = String.format(
312          "End of input at position %d but expected an attribute operator",
313          markPos);
314      throw new IllegalArgumentException(msg);
315    }
316
317    final SCIMFilterType filterType;
318    if (operator.equalsIgnoreCase("eq"))
319    {
320      filterType = SCIMFilterType.EQUALITY;
321    }
322    else if (operator.equalsIgnoreCase("co"))
323    {
324      filterType = SCIMFilterType.CONTAINS;
325    }
326    else if (operator.equalsIgnoreCase("sw"))
327    {
328      filterType = SCIMFilterType.STARTS_WITH;
329    }
330    else if (operator.equalsIgnoreCase("pr"))
331    {
332      filterType = SCIMFilterType.PRESENCE;
333    }
334    else if (operator.equalsIgnoreCase("gt"))
335    {
336      filterType = SCIMFilterType.GREATER_THAN;
337    }
338    else if (operator.equalsIgnoreCase("ge"))
339    {
340      filterType = SCIMFilterType.GREATER_OR_EQUAL;
341    }
342    else if (operator.equalsIgnoreCase("lt"))
343    {
344      filterType = SCIMFilterType.LESS_THAN;
345    }
346    else if (operator.equalsIgnoreCase("le"))
347    {
348      filterType = SCIMFilterType.LESS_OR_EQUAL;
349    }
350    else
351    {
352      final String msg = String.format(
353          "Unrecognized attribute operator '%s' at position %d. " +
354          "Expected: eq,co,sw,pr,gt,ge,lt,le", operator, markPos);
355      throw new IllegalArgumentException(msg);
356    }
357
358    final String filterValueString;
359    if (!filterType.equals(SCIMFilterType.PRESENCE))
360    {
361      filterValueString = readValue();
362      if (filterValueString == null)
363      {
364        final String msg = String.format(
365            "End of input at position %d while expecting a value for " +
366            "operator %s", markPos, operator);
367        throw new IllegalArgumentException(msg);
368      }
369    }
370    else
371    {
372      filterValueString = null;
373    }
374
375    return new SCIMFilter(
376        filterType, filterAttribute, filterValueString,
377        (filterValueString != null), null);
378  }
379
380
381
382  /**
383   * Read a filter expression.
384   *
385   * @return  The SCIM filter.
386   */
387  private SCIMFilter readFilter()
388  {
389    final Stack<Node> expressionStack = new Stack<Node>();
390
391    // Employ the shunting-yard algorithm to parse into reverse polish notation,
392    // where the operands are filter components and the operators are the
393    // logical AND and OR operators. This algorithm ensures that operator
394    // precedence and parentheses are respected.
395    final List<Node> reversePolish = new ArrayList<Node>();
396    for (String word = readWord(); word != null; word = readWord())
397    {
398      if (word.equalsIgnoreCase("and") || word.equalsIgnoreCase("or"))
399      {
400        final OperatorNode currentOperator;
401        if (word.equalsIgnoreCase("and"))
402        {
403          currentOperator = new OperatorNode(SCIMFilterType.AND, markPos);
404        }
405        else
406        {
407          currentOperator = new OperatorNode(SCIMFilterType.OR, markPos);
408        }
409        while (!expressionStack.empty() &&
410               (expressionStack.peek() instanceof OperatorNode))
411        {
412          final OperatorNode previousOperator =
413              (OperatorNode)expressionStack.peek();
414          if (previousOperator.getPrecedence() <
415              currentOperator.getPrecedence())
416          {
417            break;
418          }
419          reversePolish.add(expressionStack.pop());
420        }
421        expressionStack.push(currentOperator);
422      }
423      else if (word.equals("("))
424      {
425        expressionStack.push(new LeftParenthesisNode(markPos));
426      }
427      else if (word.equals(")"))
428      {
429        while (!expressionStack.empty() &&
430               !(expressionStack.peek() instanceof LeftParenthesisNode))
431        {
432          reversePolish.add(expressionStack.pop());
433        }
434        if (expressionStack.empty())
435        {
436          final String msg =
437              String.format("No opening parenthesis matching closing " +
438                            "parenthesis at position %d", markPos);
439          throw new IllegalArgumentException(msg);
440        }
441        expressionStack.pop();
442      }
443      else
444      {
445        rewind();
446        final int pos = currentPos;
447        final SCIMFilter filterComponent = readFilterComponent();
448        reversePolish.add(new FilterNode(filterComponent, pos));
449      }
450    }
451
452    while  (!expressionStack.empty())
453    {
454      final Node node = expressionStack.pop();
455      if (node instanceof LeftParenthesisNode)
456      {
457        final String msg =
458            String.format("No closing parenthesis matching opening " +
459                          "parenthesis at position %d", node.getPos());
460        throw new IllegalArgumentException(msg);
461      }
462      reversePolish.add(node);
463    }
464
465    // Evaluate the reverse polish notation to create a single complex filter.
466    final Stack<FilterNode> filterStack = new Stack<FilterNode>();
467    for (final Node node : reversePolish)
468    {
469      if (node instanceof OperatorNode)
470      {
471        final FilterNode rightOperand = filterStack.pop();
472        final FilterNode leftOperand = filterStack.pop();
473
474        final OperatorNode operatorNode = (OperatorNode)node;
475        if (operatorNode.getFilterType().equals(SCIMFilterType.AND))
476        {
477          final SCIMFilter filter = SCIMFilter.createAndFilter(
478              Arrays.asList(leftOperand.getFilterComponent(),
479                            rightOperand.getFilterComponent()));
480          filterStack.push(new FilterNode(filter, leftOperand.getPos()));
481        }
482        else
483        {
484          final SCIMFilter filter = SCIMFilter.createOrFilter(
485              Arrays.asList(leftOperand.getFilterComponent(),
486                            rightOperand.getFilterComponent()));
487          filterStack.push(new FilterNode(filter, leftOperand.getPos()));
488        }
489      }
490      else
491      {
492        filterStack.push((FilterNode)node);
493      }
494    }
495
496    if (filterStack.size() == 0)
497    {
498      final String msg = String.format("Empty filter expression");
499      throw new IllegalArgumentException(msg);
500    }
501    else if (filterStack.size() > 1)
502    {
503      final String msg = String.format(
504          "Unexpected characters at position %d", expressionStack.get(1).pos);
505      throw new IllegalArgumentException(msg);
506    }
507
508    return filterStack.get(0).filterComponent;
509  }
510
511
512
513  /**
514   * Read a word at the current position. A word is a consecutive sequence of
515   * characters terminated by whitespace or a parenthesis, or a single opening
516   * or closing parenthesis. Whitespace before and after the word is consumed.
517   * The start of the word is saved in {@code markPos}.
518   *
519   * @return The word at the current position, or {@code null} if the end of
520   *         the input has been reached.
521   */
522  private String readWord()
523  {
524    skipWhitespace();
525    markPos = currentPos;
526
527    loop:
528    while (currentPos < endPos)
529    {
530      final char c = filterString.charAt(currentPos);
531      switch (c)
532      {
533        case '(':
534        case ')':
535          if (currentPos == markPos)
536          {
537            currentPos++;
538          }
539          break loop;
540
541        case ' ':
542          break loop;
543
544        default:
545          currentPos++;
546          break;
547      }
548    }
549
550    if (currentPos - markPos == 0)
551    {
552      return null;
553    }
554
555    final String word = filterString.substring(markPos, currentPos);
556
557    skipWhitespace();
558    return word;
559  }
560
561
562
563  /**
564   * Rewind the current position to the start of the previous word or value.
565   */
566  private void rewind()
567  {
568    currentPos = markPos;
569  }
570
571
572
573  /**
574   * Read a value at the current position. A value can be a number, a datetime
575   * or a boolean value (the words true or false), or a string value in double
576   * quotes, using the same syntax as for JSON values. Whitespace before and
577   * after the value is consumed. The start of the value is saved in
578   * {@code markPos}.
579   *
580   * @return A String representing the value at the current position, or
581   *         {@code null} if the end of the input has already been reached.
582   */
583  public String readValue()
584  {
585    skipWhitespace();
586    markPos = currentPos;
587
588    if (currentPos == endPos)
589    {
590      return null;
591    }
592
593    if (filterString.charAt(currentPos) == '"')
594    {
595      currentPos++;
596
597      final StringBuilder builder = new StringBuilder();
598      while (currentPos < endPos)
599      {
600        final char c = filterString.charAt(currentPos);
601        switch (c)
602        {
603          case '\\':
604            currentPos++;
605            if (endOfInput())
606            {
607              final String msg = String.format(
608                  "End of input in a string value that began at " +
609                  "position %d", markPos);
610              throw new IllegalArgumentException(msg);
611            }
612            final char escapeChar = filterString.charAt(currentPos);
613            currentPos++;
614            switch (escapeChar)
615            {
616              case '"':
617              case '/':
618              case '\'':
619              case '\\':
620                builder.append(escapeChar);
621                break;
622              case 'b':
623                builder.append('\b');
624                break;
625              case 'f':
626                builder.append('\f');
627                break;
628              case 'n':
629                builder.append('\n');
630                break;
631              case 'r':
632                builder.append('\r');
633                break;
634              case 't':
635                builder.append('\t');
636                break;
637              case 'u':
638                if (currentPos + 4 > endPos)
639                {
640                  final String msg = String.format(
641                      "End of input in a string value that began at " +
642                      "position %d", markPos);
643                  throw new IllegalArgumentException(msg);
644                }
645                final String hexChars =
646                    filterString.substring(currentPos, currentPos + 4);
647                builder.append((char)Integer.parseInt(hexChars, 16));
648                currentPos += 4;
649                break;
650              default:
651                final String msg = String.format(
652                    "Unrecognized escape sequence '\\%c' in a string value " +
653                    "at position %d", escapeChar, currentPos - 2);
654                throw new IllegalArgumentException(msg);
655            }
656            break;
657
658          case '"':
659            currentPos++;
660            skipWhitespace();
661            return builder.toString();
662
663          default:
664            builder.append(c);
665            currentPos++;
666            break;
667        }
668      }
669
670      final String msg = String.format(
671          "End of input in a string value that began at " +
672          "position %d", markPos);
673      throw new IllegalArgumentException(msg);
674    }
675    else
676    {
677      loop:
678      while (currentPos < endPos)
679      {
680        final char c = filterString.charAt(currentPos);
681        switch (c)
682        {
683          case ' ':
684          case '(':
685          case ')':
686            break loop;
687
688          case '+':
689          case '-':
690          case '.':
691          case '0':
692          case '1':
693          case '2':
694          case '3':
695          case '4':
696          case '5':
697          case '6':
698          case '7':
699          case '8':
700          case '9':
701          case 'A':
702          case 'B':
703          case 'C':
704          case 'D':
705          case 'E':
706          case 'F':
707          case 'G':
708          case 'H':
709          case 'I':
710          case 'J':
711          case 'K':
712          case 'L':
713          case 'M':
714          case 'N':
715          case 'O':
716          case 'P':
717          case 'Q':
718          case 'R':
719          case 'S':
720          case 'T':
721          case 'U':
722          case 'V':
723          case 'W':
724          case 'X':
725          case 'Y':
726          case 'Z':
727          case 'a':
728          case 'b':
729          case 'c':
730          case 'd':
731          case 'e':
732          case 'f':
733          case 'g':
734          case 'h':
735          case 'i':
736          case 'j':
737          case 'k':
738          case 'l':
739          case 'm':
740          case 'n':
741          case 'o':
742          case 'p':
743          case 'q':
744          case 'r':
745          case 's':
746          case 't':
747          case 'u':
748          case 'v':
749          case 'w':
750          case 'x':
751          case 'y':
752          case 'z':
753            // These are all OK.
754            currentPos++;
755            break;
756
757          case '/':
758          case ':':
759          case ';':
760          case '<':
761          case '=':
762          case '>':
763          case '?':
764          case '@':
765          case '[':
766          case '\\':
767          case ']':
768          case '^':
769          case '_':
770          case '`':
771            // These are not allowed, but they are explicitly called out because
772            // they are included in the range of values between '-' and 'z', and
773            // making sure all possible characters are included can help make
774            // the switch statement more efficient.  We'll fall through to the
775            // default clause to reject them.
776          default:
777            final String msg = String.format(
778                "Invalid character '%c' in a number or boolean value at " +
779                "position %d",
780                c, currentPos);
781            throw new IllegalArgumentException(msg);
782        }
783      }
784
785      final String s = filterString.substring(markPos, currentPos);
786      skipWhitespace();
787      final Object value = JSONObject.stringToValue(s);
788      if (value.equals(JSONObject.NULL) || value instanceof String)
789      {
790        final String msg = String.format(
791            "Invalid filter value beginning at position %d", markPos);
792        throw new IllegalArgumentException(msg);
793      }
794
795      return s;
796    }
797  }
798
799
800
801  /**
802   * Determine if the end of the input has been reached.
803   *
804   * @return  {@code true} if the end of the input has been reached.
805   */
806  private boolean endOfInput()
807  {
808    return currentPos == endPos;
809  }
810
811
812
813  /**
814   * Skip over any whitespace at the current position.
815   */
816  private void skipWhitespace()
817  {
818    while (currentPos < endPos && filterString.charAt(currentPos) == ' ')
819    {
820      currentPos++;
821    }
822  }
823}