001    /*
002     * Copyright 2011-2012 UnboundID Corp.
003     *
004     * This program is free software; you can redistribute it and/or modify
005     * it under the terms of the GNU General Public License (GPLv2 only)
006     * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
007     * as published by the Free Software Foundation.
008     *
009     * This program is distributed in the hope that it will be useful,
010     * but WITHOUT ANY WARRANTY; without even the implied warranty of
011     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
012     * GNU General Public License for more details.
013     *
014     * You should have received a copy of the GNU General Public License
015     * along with this program; if not, see <http://www.gnu.org/licenses>.
016     */
017    
018    package com.unboundid.scim.sdk;
019    
020    import org.json.JSONObject;
021    
022    import java.text.MessageFormat;
023    import java.util.ArrayList;
024    import java.util.Arrays;
025    import java.util.List;
026    import java.util.Stack;
027    
028    
029    
030    /**
031     * A parser for SCIM filter expressions.
032     */
033    public class FilterParser
034    {
035      /**
036       * The filter to be parsed.
037       */
038      private final String filterString;
039    
040      /**
041       * The position one higher than the last character.
042       */
043      private int endPos;
044    
045      /**
046       * The current character position.
047       */
048      private int currentPos;
049    
050      /**
051       * The position marking the first character of the previous word or value.
052       */
053      private int markPos;
054    
055    
056    
057      /**
058       * Base class for expression stack nodes. The expression stack is needed to
059       * employ the shunting-yard algorithm to parse the filter expression.
060       */
061      class Node
062      {
063        private final int pos;
064    
065    
066    
067        /**
068         * Create a new node.
069         *
070         * @param pos  The position of the node in the filter string.
071         */
072        public Node(final int pos)
073        {
074          this.pos = pos;
075        }
076    
077    
078    
079        /**
080         * Retrieve the position of the node in the filter string.
081         * @return  The position of the node in the filter string.
082         */
083        public int getPos()
084        {
085          return pos;
086        }
087      }
088    
089    
090    
091      /**
092       * A node representing a filter component.
093       */
094      class FilterNode extends Node
095      {
096        private final SCIMFilter filterComponent;
097    
098    
099    
100        /**
101         * Create a new filter component node.
102         *
103         * @param filterComponent  The filter component.
104         * @param pos              The position of the node in the filter string.
105         */
106        public FilterNode(final SCIMFilter filterComponent,
107                          final int pos)
108        {
109          super(pos);
110          this.filterComponent = filterComponent;
111        }
112    
113    
114    
115        /**
116         * Retrieve the filter component.
117         *
118         * @return  The filter component.
119         */
120        public SCIMFilter getFilterComponent()
121        {
122          return filterComponent;
123        }
124    
125    
126    
127        @Override
128        public String toString()
129        {
130          return "FilterNode{" +
131                 "filterComponent=" + filterComponent +
132                 "} " + super.toString();
133        }
134      }
135    
136    
137    
138      /**
139       * A node representing a logical operator.
140       */
141      class OperatorNode extends Node
142      {
143        private final SCIMFilterType filterType;
144    
145        /**
146         * Create a new logical operator node.
147         *
148         * @param filterType   The type of operator, either SCIMFilterType.AND or
149         *                     SCIMFilterType.OR.
150         * @param pos          The position of the node in the filter string.
151         */
152        public OperatorNode(final SCIMFilterType filterType,
153                            final int pos)
154        {
155          super(pos);
156          this.filterType = filterType;
157        }
158    
159    
160    
161        /**
162         * Retrieve the type of operator.
163         *
164         * @return  The type of operator, either SCIMFilterType.AND or
165         *          SCIMFilterType.OR.
166         */
167        public SCIMFilterType getFilterType()
168        {
169          return filterType;
170        }
171    
172    
173    
174        /**
175         * Retrieve the precedence of the operator.
176         *
177         * @return  The precedence of the operator.
178         */
179        public int getPrecedence()
180        {
181          switch (filterType)
182          {
183            case AND:
184              return 2;
185    
186            case OR:
187            default:
188              return 1;
189          }
190        }
191    
192    
193    
194        @Override
195        public String toString()
196        {
197          return "OperatorNode{" +
198                 "filterType=" + filterType +
199                 "} " + super.toString();
200        }
201      }
202    
203    
204    
205      /**
206       * A node representing an opening parenthesis.
207       */
208      class LeftParenthesisNode extends Node
209      {
210        /**
211         * Create a new opening parenthesis node.
212         *
213         * @param pos  The position of the parenthesis in the filter string.
214         */
215        public LeftParenthesisNode(final int pos)
216        {
217          super(pos);
218        }
219      }
220    
221    
222    
223      /**
224       * Create a new instance of a filter parser.
225       *
226       * @param filterString  The filter to be parsed.
227       */
228      public FilterParser(final String filterString)
229      {
230        this.filterString = filterString;
231        this.endPos = filterString.length();
232        this.currentPos = 0;
233        this.markPos = 0;
234      }
235    
236    
237    
238      /**
239       * Parse the filter provided in the constructor.
240       *
241       * @return  A parsed SCIM filter.
242       *
243       * @throws  SCIMException  If the filter string could not be parsed.
244       */
245      public SCIMFilter parse()
246          throws SCIMException
247      {
248        try
249        {
250          return readFilter();
251        }
252        catch (Exception e)
253        {
254          Debug.debugException(e);
255          throw SCIMException.createException(
256              400, MessageFormat.format("Invalid filter ''{0}'': {1}",
257                                        filterString, e.getMessage()));
258        }
259      }
260    
261    
262    
263      /**
264       * Read a filter component at the current position. A filter component is
265       * <pre>
266       * attribute attribute-operator [value]
267       * </pre>
268       * Most attribute operators require a value but 'pr' (presence) requires
269       * no value.
270       *
271       * @return  The parsed filter component.
272       */
273      private SCIMFilter readFilterComponent()
274      {
275        String word = readWord();
276        if (word == null)
277        {
278          final String msg = String.format(
279              "End of input at position %d but expected a filter expression",
280              markPos);
281          throw new IllegalArgumentException(msg);
282        }
283    
284        final AttributePath filterAttribute;
285        try
286        {
287          filterAttribute = AttributePath.parse(word);
288        }
289        catch (final Exception e)
290        {
291          Debug.debugException(e);
292          final String msg = String.format(
293              "Expected an attribute reference at position %d: %s",
294              markPos, e.getMessage());
295          throw new IllegalArgumentException(msg);
296        }
297    
298        final String operator = readWord();
299        if (operator == null)
300        {
301          final String msg = String.format(
302              "End of input at position %d but expected an attribute operator",
303              markPos);
304          throw new IllegalArgumentException(msg);
305        }
306    
307        final SCIMFilterType filterType;
308        if (operator.equalsIgnoreCase("eq"))
309        {
310          filterType = SCIMFilterType.EQUALITY;
311        }
312        else if (operator.equalsIgnoreCase("co"))
313        {
314          filterType = SCIMFilterType.CONTAINS;
315        }
316        else if (operator.equalsIgnoreCase("sw"))
317        {
318          filterType = SCIMFilterType.STARTS_WITH;
319        }
320        else if (operator.equalsIgnoreCase("pr"))
321        {
322          filterType = SCIMFilterType.PRESENCE;
323        }
324        else if (operator.equalsIgnoreCase("gt"))
325        {
326          filterType = SCIMFilterType.GREATER_THAN;
327        }
328        else if (operator.equalsIgnoreCase("ge"))
329        {
330          filterType = SCIMFilterType.GREATER_OR_EQUAL;
331        }
332        else if (operator.equalsIgnoreCase("lt"))
333        {
334          filterType = SCIMFilterType.LESS_THAN;
335        }
336        else if (operator.equalsIgnoreCase("le"))
337        {
338          filterType = SCIMFilterType.LESS_OR_EQUAL;
339        }
340        else
341        {
342          final String msg = String.format(
343              "Unrecognized attribute operator '%s' at position %d. " +
344              "Expected: eq,co,sw,pr,gt,ge,lt,le", operator, markPos);
345          throw new IllegalArgumentException(msg);
346        }
347    
348        final Object filterValue;
349        if (!filterType.equals(SCIMFilterType.PRESENCE))
350        {
351          filterValue = readValue();
352          if (filterValue == null)
353          {
354            final String msg = String.format(
355                "End of input at position %d while expecting a value for " +
356                "operator %s", markPos, operator);
357            throw new IllegalArgumentException(msg);
358          }
359        }
360        else
361        {
362          filterValue = null;
363        }
364    
365        return new SCIMFilter(
366            filterType, filterAttribute,
367            filterValue != null ? filterValue.toString() : null,
368            (filterValue != null) && (filterValue instanceof String),
369            null);
370      }
371    
372    
373    
374      /**
375       * Read a filter expression.
376       *
377       * @return  The SCIM filter.
378       */
379      private SCIMFilter readFilter()
380      {
381        final Stack<Node> expressionStack = new Stack<Node>();
382    
383        // Employ the shunting-yard algorithm to parse into reverse polish notation,
384        // where the operands are filter components and the operators are the
385        // logical AND and OR operators. This algorithm ensures that operator
386        // precedence and parentheses are respected.
387        final List<Node> reversePolish = new ArrayList<Node>();
388        for (String word = readWord(); word != null; word = readWord())
389        {
390          if (word.equalsIgnoreCase("and") || word.equalsIgnoreCase("or"))
391          {
392            final OperatorNode currentOperator;
393            if (word.equalsIgnoreCase("and"))
394            {
395              currentOperator = new OperatorNode(SCIMFilterType.AND, markPos);
396            }
397            else
398            {
399              currentOperator = new OperatorNode(SCIMFilterType.OR, markPos);
400            }
401            while (!expressionStack.empty() &&
402                   (expressionStack.peek() instanceof OperatorNode))
403            {
404              final OperatorNode previousOperator =
405                  (OperatorNode)expressionStack.peek();
406              if (previousOperator.getPrecedence() <
407                  currentOperator.getPrecedence())
408              {
409                break;
410              }
411              reversePolish.add(expressionStack.pop());
412            }
413            expressionStack.push(currentOperator);
414          }
415          else if (word.equals("("))
416          {
417            expressionStack.push(new LeftParenthesisNode(markPos));
418          }
419          else if (word.equals(")"))
420          {
421            while (!expressionStack.empty() &&
422                   !(expressionStack.peek() instanceof LeftParenthesisNode))
423            {
424              reversePolish.add(expressionStack.pop());
425            }
426            if (expressionStack.empty())
427            {
428              final String msg =
429                  String.format("No opening parenthesis matching closing " +
430                                "parenthesis at position %d", markPos);
431              throw new IllegalArgumentException(msg);
432            }
433            expressionStack.pop();
434          }
435          else
436          {
437            rewind();
438            final int pos = currentPos;
439            final SCIMFilter filterComponent = readFilterComponent();
440            reversePolish.add(new FilterNode(filterComponent, pos));
441          }
442        }
443    
444        while  (!expressionStack.empty())
445        {
446          final Node node = expressionStack.pop();
447          if (node instanceof LeftParenthesisNode)
448          {
449            final String msg =
450                String.format("No closing parenthesis matching opening " +
451                              "parenthesis at position %d", node.getPos());
452            throw new IllegalArgumentException(msg);
453          }
454          reversePolish.add(node);
455        }
456    
457        // Evaluate the reverse polish notation to create a single complex filter.
458        final Stack<FilterNode> filterStack = new Stack<FilterNode>();
459        for (final Node node : reversePolish)
460        {
461          if (node instanceof OperatorNode)
462          {
463            final FilterNode rightOperand = filterStack.pop();
464            final FilterNode leftOperand = filterStack.pop();
465    
466            final OperatorNode operatorNode = (OperatorNode)node;
467            if (operatorNode.getFilterType().equals(SCIMFilterType.AND))
468            {
469              final SCIMFilter filter = SCIMFilter.createAndFilter(
470                  Arrays.asList(leftOperand.getFilterComponent(),
471                                rightOperand.getFilterComponent()));
472              filterStack.push(new FilterNode(filter, leftOperand.getPos()));
473            }
474            else
475            {
476              final SCIMFilter filter = SCIMFilter.createOrFilter(
477                  Arrays.asList(leftOperand.getFilterComponent(),
478                                rightOperand.getFilterComponent()));
479              filterStack.push(new FilterNode(filter, leftOperand.getPos()));
480            }
481          }
482          else
483          {
484            filterStack.push((FilterNode)node);
485          }
486        }
487    
488        if (filterStack.size() == 0)
489        {
490          final String msg = String.format("Empty filter expression");
491          throw new IllegalArgumentException(msg);
492        }
493        else if (filterStack.size() > 1)
494        {
495          final String msg = String.format(
496              "Unexpected characters at position %d", expressionStack.get(1).pos);
497          throw new IllegalArgumentException(msg);
498        }
499    
500        return filterStack.get(0).filterComponent;
501      }
502    
503    
504    
505      /**
506       * Read a word at the current position. A word is a consecutive sequence of
507       * characters terminated by whitespace or a parenthesis, or a single opening
508       * or closing parenthesis. Whitespace before and after the word is consumed.
509       * The start of the word is saved in {@code markPos}.
510       *
511       * @return The word at the current position, or {@code null} if the end of
512       *         the input has been reached.
513       */
514      private String readWord()
515      {
516        skipWhitespace();
517        markPos = currentPos;
518    
519        loop:
520        while (currentPos < endPos)
521        {
522          final char c = filterString.charAt(currentPos);
523          switch (c)
524          {
525            case '(':
526            case ')':
527              if (currentPos == markPos)
528              {
529                currentPos++;
530              }
531              break loop;
532    
533            case ' ':
534              break loop;
535    
536            default:
537              currentPos++;
538              break;
539          }
540        }
541    
542        if (currentPos - markPos == 0)
543        {
544          return null;
545        }
546    
547        final String word = filterString.substring(markPos, currentPos);
548    
549        skipWhitespace();
550        return word;
551      }
552    
553    
554    
555      /**
556       * Rewind the current position to the start of the previous word or value.
557       */
558      private void rewind()
559      {
560        currentPos = markPos;
561      }
562    
563    
564    
565      /**
566       * Read a value at the current position. A value can be a number, or a
567       * boolean value (the words true or false), or a string value in double
568       * quotes, using the same syntax as for JSON values. Whitespace before and
569       * after the value is consumed. The start of the value is saved in
570       * {@code markPos}.
571       *
572       * @return A Boolean, Double, Integer, Long or String representing the value
573       *         at the current position, or {@code null} if the end of the input
574       *         has already been reached.
575       */
576      public Object readValue()
577      {
578        skipWhitespace();
579        markPos = currentPos;
580    
581        if (currentPos == endPos)
582        {
583          return null;
584        }
585    
586        if (filterString.charAt(currentPos) == '"')
587        {
588          currentPos++;
589    
590          final StringBuilder builder = new StringBuilder();
591          while (currentPos < endPos)
592          {
593            final char c = filterString.charAt(currentPos);
594            switch (c)
595            {
596              case '\\':
597                currentPos++;
598                if (endOfInput())
599                {
600                  final String msg = String.format(
601                      "End of input in a string value that began at " +
602                      "position %d", markPos);
603                  throw new IllegalArgumentException(msg);
604                }
605                final char escapeChar = filterString.charAt(currentPos);
606                currentPos++;
607                switch (escapeChar)
608                {
609                  case '"':
610                  case '/':
611                  case '\'':
612                  case '\\':
613                    builder.append(escapeChar);
614                    break;
615                  case 'b':
616                    builder.append('\b');
617                    break;
618                  case 'f':
619                    builder.append('\f');
620                    break;
621                  case 'n':
622                    builder.append('\n');
623                    break;
624                  case 'r':
625                    builder.append('\r');
626                    break;
627                  case 't':
628                    builder.append('\t');
629                    break;
630                  case 'u':
631                    if (currentPos + 4 > endPos)
632                    {
633                      final String msg = String.format(
634                          "End of input in a string value that began at " +
635                          "position %d", markPos);
636                      throw new IllegalArgumentException(msg);
637                    }
638                    final String hexChars =
639                        filterString.substring(currentPos, currentPos + 4);
640                    builder.append((char)Integer.parseInt(hexChars, 16));
641                    currentPos += 4;
642                    break;
643                  default:
644                    final String msg = String.format(
645                        "Unrecognized escape sequence '\\%c' in a string value " +
646                        "at position %d", escapeChar, currentPos - 2);
647                    throw new IllegalArgumentException(msg);
648                }
649                break;
650    
651              case '"':
652                currentPos++;
653                skipWhitespace();
654                return builder.toString();
655    
656              default:
657                builder.append(c);
658                currentPos++;
659                break;
660            }
661          }
662    
663          final String msg = String.format(
664              "End of input in a string value that began at " +
665              "position %d", markPos);
666          throw new IllegalArgumentException(msg);
667        }
668        else
669        {
670          loop:
671          while (currentPos < endPos)
672          {
673            final char c = filterString.charAt(currentPos);
674            switch (c)
675            {
676              case ' ':
677              case '(':
678              case ')':
679                break loop;
680    
681              case '+':
682              case '-':
683              case '.':
684              case '0':
685              case '1':
686              case '2':
687              case '3':
688              case '4':
689              case '5':
690              case '6':
691              case '7':
692              case '8':
693              case '9':
694              case 'A':
695              case 'B':
696              case 'C':
697              case 'D':
698              case 'E':
699              case 'F':
700              case 'G':
701              case 'H':
702              case 'I':
703              case 'J':
704              case 'K':
705              case 'L':
706              case 'M':
707              case 'N':
708              case 'O':
709              case 'P':
710              case 'Q':
711              case 'R':
712              case 'S':
713              case 'T':
714              case 'U':
715              case 'V':
716              case 'W':
717              case 'X':
718              case 'Y':
719              case 'Z':
720              case 'a':
721              case 'b':
722              case 'c':
723              case 'd':
724              case 'e':
725              case 'f':
726              case 'g':
727              case 'h':
728              case 'i':
729              case 'j':
730              case 'k':
731              case 'l':
732              case 'm':
733              case 'n':
734              case 'o':
735              case 'p':
736              case 'q':
737              case 'r':
738              case 's':
739              case 't':
740              case 'u':
741              case 'v':
742              case 'w':
743              case 'x':
744              case 'y':
745              case 'z':
746                // These are all OK.
747                currentPos++;
748                break;
749    
750              case '/':
751              case ':':
752              case ';':
753              case '<':
754              case '=':
755              case '>':
756              case '?':
757              case '@':
758              case '[':
759              case '\\':
760              case ']':
761              case '^':
762              case '_':
763              case '`':
764                // These are not allowed, but they are explicitly called out because
765                // they are included in the range of values between '-' and 'z', and
766                // making sure all possible characters are included can help make
767                // the switch statement more efficient.  We'll fall through to the
768                // default clause to reject them.
769              default:
770                final String msg = String.format(
771                    "Invalid character '%c' in a number or boolean value at " +
772                    "position %d",
773                    c, currentPos);
774                throw new IllegalArgumentException(msg);
775            }
776          }
777    
778          final String s = filterString.substring(markPos, currentPos);
779          skipWhitespace();
780          final Object value = JSONObject.stringToValue(s);
781    
782          if (value.equals(JSONObject.NULL) || value instanceof String)
783          {
784            final String msg = String.format(
785                "Invalid filter value beginning at position %d", markPos);
786            throw new IllegalArgumentException(msg);
787          }
788    
789          return value;
790        }
791      }
792    
793    
794    
795      /**
796       * Determine if the end of the input has been reached.
797       *
798       * @return  {@code true} if the end of the input has been reached.
799       */
800      private boolean endOfInput()
801      {
802        return currentPos == endPos;
803      }
804    
805    
806    
807      /**
808       * Skip over any whitespace at the current position.
809       */
810      private void skipWhitespace()
811      {
812        while (currentPos < endPos && filterString.charAt(currentPos) == ' ')
813        {
814          currentPos++;
815        }
816      }
817    }