001/*
002 * Copyright 2011-2013 UnboundID Corp.
003 *
004 * This program is free software; you can redistribute it and/or modify
005 * it under the terms of the GNU General Public License (GPLv2 only)
006 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
007 * as published by the Free Software Foundation.
008 *
009 * This program is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
012 * GNU General Public License for more details.
013 *
014 * You should have received a copy of the GNU General Public License
015 * along with this program; if not, see <http://www.gnu.org/licenses>.
016 */
017
018package com.unboundid.scim.sdk;
019
020import org.json.JSONObject;
021
022import java.text.MessageFormat;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import java.util.Stack;
027
028
029
030/**
031 * A parser for SCIM filter expressions.
032 */
033public class FilterParser
034{
035  /**
036   * The filter to be parsed.
037   */
038  private final String filterString;
039
040  /**
041   * The default schema that should be assumed when parsing attributes with
042   * no schema explicitly defined in the URN.
043   */
044  private final String defaultSchema;
045
046  /**
047   * The position one higher than the last character.
048   */
049  private int endPos;
050
051  /**
052   * The current character position.
053   */
054  private int currentPos;
055
056  /**
057   * The position marking the first character of the previous word or value.
058   */
059  private int markPos;
060
061
062
063  /**
064   * Base class for expression stack nodes. The expression stack is needed to
065   * employ the shunting-yard algorithm to parse the filter expression.
066   */
067  class Node
068  {
069    private final int pos;
070
071
072
073    /**
074     * Create a new node.
075     *
076     * @param pos  The position of the node in the filter string.
077     */
078    public Node(final int pos)
079    {
080      this.pos = pos;
081    }
082
083
084
085    /**
086     * Retrieve the position of the node in the filter string.
087     * @return  The position of the node in the filter string.
088     */
089    public int getPos()
090    {
091      return pos;
092    }
093  }
094
095
096
097  /**
098   * A node representing a filter component.
099   */
100  class FilterNode extends Node
101  {
102    private final SCIMFilter filterComponent;
103
104
105
106    /**
107     * Create a new filter component node.
108     *
109     * @param filterComponent  The filter component.
110     * @param pos              The position of the node in the filter string.
111     */
112    public FilterNode(final SCIMFilter filterComponent,
113                      final int pos)
114    {
115      super(pos);
116      this.filterComponent = filterComponent;
117    }
118
119
120
121    /**
122     * Retrieve the filter component.
123     *
124     * @return  The filter component.
125     */
126    public SCIMFilter getFilterComponent()
127    {
128      return filterComponent;
129    }
130
131
132
133    @Override
134    public String toString()
135    {
136      return "FilterNode{" +
137             "filterComponent=" + filterComponent +
138             "} " + super.toString();
139    }
140  }
141
142
143
144  /**
145   * A node representing a logical operator.
146   */
147  class OperatorNode extends Node
148  {
149    private final SCIMFilterType filterType;
150
151    /**
152     * Create a new logical operator node.
153     *
154     * @param filterType   The type of operator, either SCIMFilterType.AND or
155     *                     SCIMFilterType.OR.
156     * @param pos          The position of the node in the filter string.
157     */
158    public OperatorNode(final SCIMFilterType filterType,
159                        final int pos)
160    {
161      super(pos);
162      this.filterType = filterType;
163    }
164
165
166
167    /**
168     * Retrieve the type of operator.
169     *
170     * @return  The type of operator, either SCIMFilterType.AND or
171     *          SCIMFilterType.OR.
172     */
173    public SCIMFilterType getFilterType()
174    {
175      return filterType;
176    }
177
178
179
180    /**
181     * Retrieve the precedence of the operator.
182     *
183     * @return  The precedence of the operator.
184     */
185    public int getPrecedence()
186    {
187      switch (filterType)
188      {
189        case AND:
190          return 2;
191
192        case OR:
193        default:
194          return 1;
195      }
196    }
197
198
199
200    @Override
201    public String toString()
202    {
203      return "OperatorNode{" +
204             "filterType=" + filterType +
205             "} " + super.toString();
206    }
207  }
208
209
210
211  /**
212   * A node representing an opening parenthesis.
213   */
214  class LeftParenthesisNode extends Node
215  {
216    /**
217     * Create a new opening parenthesis node.
218     *
219     * @param pos  The position of the parenthesis in the filter string.
220     */
221    public LeftParenthesisNode(final int pos)
222    {
223      super(pos);
224    }
225  }
226
227
228
229  /**
230   * Create a new instance of a filter parser.
231   *
232   * @param filterString  The filter to be parsed.
233   * @param defaultSchema The default schema that should be assumed when parsing
234   *                      attributes without the schema explicitly defined in
235   *                      the URN.
236   */
237  public FilterParser(final String filterString, final String defaultSchema)
238  {
239    this.filterString = filterString;
240    this.endPos = filterString.length();
241    this.currentPos = 0;
242    this.markPos = 0;
243    this.defaultSchema = defaultSchema;
244  }
245
246
247
248  /**
249   * Parse the filter provided in the constructor.
250   *
251   * @return  A parsed SCIM filter.
252   *
253   * @throws  SCIMException  If the filter string could not be parsed.
254   */
255  public SCIMFilter parse()
256      throws SCIMException
257  {
258    try
259    {
260      return readFilter();
261    }
262    catch (Exception e)
263    {
264      Debug.debugException(e);
265      throw SCIMException.createException(
266          400, MessageFormat.format("Invalid filter ''{0}'': {1}",
267                                    filterString, e.getMessage()));
268    }
269  }
270
271
272
273  /**
274   * Read a filter component at the current position. A filter component is
275   * <pre>
276   * attribute attribute-operator [value]
277   * </pre>
278   * Most attribute operators require a value but 'pr' (presence) requires
279   * no value.
280   *
281   * @return  The parsed filter component.
282   */
283  private SCIMFilter readFilterComponent()
284  {
285    String word = readWord();
286    if (word == null)
287    {
288      final String msg = String.format(
289          "End of input at position %d but expected a filter expression",
290          markPos);
291      throw new IllegalArgumentException(msg);
292    }
293
294    final AttributePath filterAttribute;
295    try
296    {
297      filterAttribute = AttributePath.parse(word, defaultSchema);
298    }
299    catch (final Exception e)
300    {
301      Debug.debugException(e);
302      final String msg = String.format(
303          "Expected an attribute reference at position %d: %s",
304          markPos, e.getMessage());
305      throw new IllegalArgumentException(msg);
306    }
307
308    final String operator = readWord();
309    if (operator == null)
310    {
311      final String msg = String.format(
312          "End of input at position %d but expected an attribute operator",
313          markPos);
314      throw new IllegalArgumentException(msg);
315    }
316
317    final SCIMFilterType filterType;
318    if (operator.equalsIgnoreCase("eq"))
319    {
320      filterType = SCIMFilterType.EQUALITY;
321    }
322    else if (operator.equalsIgnoreCase("co"))
323    {
324      filterType = SCIMFilterType.CONTAINS;
325    }
326    else if (operator.equalsIgnoreCase("sw"))
327    {
328      filterType = SCIMFilterType.STARTS_WITH;
329    }
330    else if (operator.equalsIgnoreCase("pr"))
331    {
332      filterType = SCIMFilterType.PRESENCE;
333    }
334    else if (operator.equalsIgnoreCase("gt"))
335    {
336      filterType = SCIMFilterType.GREATER_THAN;
337    }
338    else if (operator.equalsIgnoreCase("ge"))
339    {
340      filterType = SCIMFilterType.GREATER_OR_EQUAL;
341    }
342    else if (operator.equalsIgnoreCase("lt"))
343    {
344      filterType = SCIMFilterType.LESS_THAN;
345    }
346    else if (operator.equalsIgnoreCase("le"))
347    {
348      filterType = SCIMFilterType.LESS_OR_EQUAL;
349    }
350    else
351    {
352      final String msg = String.format(
353          "Unrecognized attribute operator '%s' at position %d. " +
354          "Expected: eq,co,sw,pr,gt,ge,lt,le", operator, markPos);
355      throw new IllegalArgumentException(msg);
356    }
357
358    final Object filterValue;
359    if (!filterType.equals(SCIMFilterType.PRESENCE))
360    {
361      filterValue = readValue();
362      if (filterValue == null)
363      {
364        final String msg = String.format(
365            "End of input at position %d while expecting a value for " +
366            "operator %s", markPos, operator);
367        throw new IllegalArgumentException(msg);
368      }
369    }
370    else
371    {
372      filterValue = null;
373    }
374
375    return new SCIMFilter(
376        filterType, filterAttribute,
377        filterValue != null ? filterValue.toString() : null,
378        (filterValue != null) && (filterValue instanceof String),
379        null);
380  }
381
382
383
384  /**
385   * Read a filter expression.
386   *
387   * @return  The SCIM filter.
388   */
389  private SCIMFilter readFilter()
390  {
391    final Stack<Node> expressionStack = new Stack<Node>();
392
393    // Employ the shunting-yard algorithm to parse into reverse polish notation,
394    // where the operands are filter components and the operators are the
395    // logical AND and OR operators. This algorithm ensures that operator
396    // precedence and parentheses are respected.
397    final List<Node> reversePolish = new ArrayList<Node>();
398    for (String word = readWord(); word != null; word = readWord())
399    {
400      if (word.equalsIgnoreCase("and") || word.equalsIgnoreCase("or"))
401      {
402        final OperatorNode currentOperator;
403        if (word.equalsIgnoreCase("and"))
404        {
405          currentOperator = new OperatorNode(SCIMFilterType.AND, markPos);
406        }
407        else
408        {
409          currentOperator = new OperatorNode(SCIMFilterType.OR, markPos);
410        }
411        while (!expressionStack.empty() &&
412               (expressionStack.peek() instanceof OperatorNode))
413        {
414          final OperatorNode previousOperator =
415              (OperatorNode)expressionStack.peek();
416          if (previousOperator.getPrecedence() <
417              currentOperator.getPrecedence())
418          {
419            break;
420          }
421          reversePolish.add(expressionStack.pop());
422        }
423        expressionStack.push(currentOperator);
424      }
425      else if (word.equals("("))
426      {
427        expressionStack.push(new LeftParenthesisNode(markPos));
428      }
429      else if (word.equals(")"))
430      {
431        while (!expressionStack.empty() &&
432               !(expressionStack.peek() instanceof LeftParenthesisNode))
433        {
434          reversePolish.add(expressionStack.pop());
435        }
436        if (expressionStack.empty())
437        {
438          final String msg =
439              String.format("No opening parenthesis matching closing " +
440                            "parenthesis at position %d", markPos);
441          throw new IllegalArgumentException(msg);
442        }
443        expressionStack.pop();
444      }
445      else
446      {
447        rewind();
448        final int pos = currentPos;
449        final SCIMFilter filterComponent = readFilterComponent();
450        reversePolish.add(new FilterNode(filterComponent, pos));
451      }
452    }
453
454    while  (!expressionStack.empty())
455    {
456      final Node node = expressionStack.pop();
457      if (node instanceof LeftParenthesisNode)
458      {
459        final String msg =
460            String.format("No closing parenthesis matching opening " +
461                          "parenthesis at position %d", node.getPos());
462        throw new IllegalArgumentException(msg);
463      }
464      reversePolish.add(node);
465    }
466
467    // Evaluate the reverse polish notation to create a single complex filter.
468    final Stack<FilterNode> filterStack = new Stack<FilterNode>();
469    for (final Node node : reversePolish)
470    {
471      if (node instanceof OperatorNode)
472      {
473        final FilterNode rightOperand = filterStack.pop();
474        final FilterNode leftOperand = filterStack.pop();
475
476        final OperatorNode operatorNode = (OperatorNode)node;
477        if (operatorNode.getFilterType().equals(SCIMFilterType.AND))
478        {
479          final SCIMFilter filter = SCIMFilter.createAndFilter(
480              Arrays.asList(leftOperand.getFilterComponent(),
481                            rightOperand.getFilterComponent()));
482          filterStack.push(new FilterNode(filter, leftOperand.getPos()));
483        }
484        else
485        {
486          final SCIMFilter filter = SCIMFilter.createOrFilter(
487              Arrays.asList(leftOperand.getFilterComponent(),
488                            rightOperand.getFilterComponent()));
489          filterStack.push(new FilterNode(filter, leftOperand.getPos()));
490        }
491      }
492      else
493      {
494        filterStack.push((FilterNode)node);
495      }
496    }
497
498    if (filterStack.size() == 0)
499    {
500      final String msg = String.format("Empty filter expression");
501      throw new IllegalArgumentException(msg);
502    }
503    else if (filterStack.size() > 1)
504    {
505      final String msg = String.format(
506          "Unexpected characters at position %d", expressionStack.get(1).pos);
507      throw new IllegalArgumentException(msg);
508    }
509
510    return filterStack.get(0).filterComponent;
511  }
512
513
514
515  /**
516   * Read a word at the current position. A word is a consecutive sequence of
517   * characters terminated by whitespace or a parenthesis, or a single opening
518   * or closing parenthesis. Whitespace before and after the word is consumed.
519   * The start of the word is saved in {@code markPos}.
520   *
521   * @return The word at the current position, or {@code null} if the end of
522   *         the input has been reached.
523   */
524  private String readWord()
525  {
526    skipWhitespace();
527    markPos = currentPos;
528
529    loop:
530    while (currentPos < endPos)
531    {
532      final char c = filterString.charAt(currentPos);
533      switch (c)
534      {
535        case '(':
536        case ')':
537          if (currentPos == markPos)
538          {
539            currentPos++;
540          }
541          break loop;
542
543        case ' ':
544          break loop;
545
546        default:
547          currentPos++;
548          break;
549      }
550    }
551
552    if (currentPos - markPos == 0)
553    {
554      return null;
555    }
556
557    final String word = filterString.substring(markPos, currentPos);
558
559    skipWhitespace();
560    return word;
561  }
562
563
564
565  /**
566   * Rewind the current position to the start of the previous word or value.
567   */
568  private void rewind()
569  {
570    currentPos = markPos;
571  }
572
573
574
575  /**
576   * Read a value at the current position. A value can be a number, or a
577   * boolean value (the words true or false), or a string value in double
578   * quotes, using the same syntax as for JSON values. Whitespace before and
579   * after the value is consumed. The start of the value is saved in
580   * {@code markPos}.
581   *
582   * @return A Boolean, Double, Integer, Long or String representing the value
583   *         at the current position, or {@code null} if the end of the input
584   *         has already been reached.
585   */
586  public Object readValue()
587  {
588    skipWhitespace();
589    markPos = currentPos;
590
591    if (currentPos == endPos)
592    {
593      return null;
594    }
595
596    if (filterString.charAt(currentPos) == '"')
597    {
598      currentPos++;
599
600      final StringBuilder builder = new StringBuilder();
601      while (currentPos < endPos)
602      {
603        final char c = filterString.charAt(currentPos);
604        switch (c)
605        {
606          case '\\':
607            currentPos++;
608            if (endOfInput())
609            {
610              final String msg = String.format(
611                  "End of input in a string value that began at " +
612                  "position %d", markPos);
613              throw new IllegalArgumentException(msg);
614            }
615            final char escapeChar = filterString.charAt(currentPos);
616            currentPos++;
617            switch (escapeChar)
618            {
619              case '"':
620              case '/':
621              case '\'':
622              case '\\':
623                builder.append(escapeChar);
624                break;
625              case 'b':
626                builder.append('\b');
627                break;
628              case 'f':
629                builder.append('\f');
630                break;
631              case 'n':
632                builder.append('\n');
633                break;
634              case 'r':
635                builder.append('\r');
636                break;
637              case 't':
638                builder.append('\t');
639                break;
640              case 'u':
641                if (currentPos + 4 > endPos)
642                {
643                  final String msg = String.format(
644                      "End of input in a string value that began at " +
645                      "position %d", markPos);
646                  throw new IllegalArgumentException(msg);
647                }
648                final String hexChars =
649                    filterString.substring(currentPos, currentPos + 4);
650                builder.append((char)Integer.parseInt(hexChars, 16));
651                currentPos += 4;
652                break;
653              default:
654                final String msg = String.format(
655                    "Unrecognized escape sequence '\\%c' in a string value " +
656                    "at position %d", escapeChar, currentPos - 2);
657                throw new IllegalArgumentException(msg);
658            }
659            break;
660
661          case '"':
662            currentPos++;
663            skipWhitespace();
664            return builder.toString();
665
666          default:
667            builder.append(c);
668            currentPos++;
669            break;
670        }
671      }
672
673      final String msg = String.format(
674          "End of input in a string value that began at " +
675          "position %d", markPos);
676      throw new IllegalArgumentException(msg);
677    }
678    else
679    {
680      loop:
681      while (currentPos < endPos)
682      {
683        final char c = filterString.charAt(currentPos);
684        switch (c)
685        {
686          case ' ':
687          case '(':
688          case ')':
689            break loop;
690
691          case '+':
692          case '-':
693          case '.':
694          case '0':
695          case '1':
696          case '2':
697          case '3':
698          case '4':
699          case '5':
700          case '6':
701          case '7':
702          case '8':
703          case '9':
704          case 'A':
705          case 'B':
706          case 'C':
707          case 'D':
708          case 'E':
709          case 'F':
710          case 'G':
711          case 'H':
712          case 'I':
713          case 'J':
714          case 'K':
715          case 'L':
716          case 'M':
717          case 'N':
718          case 'O':
719          case 'P':
720          case 'Q':
721          case 'R':
722          case 'S':
723          case 'T':
724          case 'U':
725          case 'V':
726          case 'W':
727          case 'X':
728          case 'Y':
729          case 'Z':
730          case 'a':
731          case 'b':
732          case 'c':
733          case 'd':
734          case 'e':
735          case 'f':
736          case 'g':
737          case 'h':
738          case 'i':
739          case 'j':
740          case 'k':
741          case 'l':
742          case 'm':
743          case 'n':
744          case 'o':
745          case 'p':
746          case 'q':
747          case 'r':
748          case 's':
749          case 't':
750          case 'u':
751          case 'v':
752          case 'w':
753          case 'x':
754          case 'y':
755          case 'z':
756            // These are all OK.
757            currentPos++;
758            break;
759
760          case '/':
761          case ':':
762          case ';':
763          case '<':
764          case '=':
765          case '>':
766          case '?':
767          case '@':
768          case '[':
769          case '\\':
770          case ']':
771          case '^':
772          case '_':
773          case '`':
774            // These are not allowed, but they are explicitly called out because
775            // they are included in the range of values between '-' and 'z', and
776            // making sure all possible characters are included can help make
777            // the switch statement more efficient.  We'll fall through to the
778            // default clause to reject them.
779          default:
780            final String msg = String.format(
781                "Invalid character '%c' in a number or boolean value at " +
782                "position %d",
783                c, currentPos);
784            throw new IllegalArgumentException(msg);
785        }
786      }
787
788      final String s = filterString.substring(markPos, currentPos);
789      skipWhitespace();
790      final Object value = JSONObject.stringToValue(s);
791
792      if (value.equals(JSONObject.NULL) || value instanceof String)
793      {
794        final String msg = String.format(
795            "Invalid filter value beginning at position %d", markPos);
796        throw new IllegalArgumentException(msg);
797      }
798
799      return value;
800    }
801  }
802
803
804
805  /**
806   * Determine if the end of the input has been reached.
807   *
808   * @return  {@code true} if the end of the input has been reached.
809   */
810  private boolean endOfInput()
811  {
812    return currentPos == endPos;
813  }
814
815
816
817  /**
818   * Skip over any whitespace at the current position.
819   */
820  private void skipWhitespace()
821  {
822    while (currentPos < endPos && filterString.charAt(currentPos) == ' ')
823    {
824      currentPos++;
825    }
826  }
827}