001 /*
002 * Copyright 2011-2012 UnboundID Corp.
003 *
004 * This program is free software; you can redistribute it and/or modify
005 * it under the terms of the GNU General Public License (GPLv2 only)
006 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
007 * as published by the Free Software Foundation.
008 *
009 * This program is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
012 * GNU General Public License for more details.
013 *
014 * You should have received a copy of the GNU General Public License
015 * along with this program; if not, see <http://www.gnu.org/licenses>.
016 */
017
018 package com.unboundid.scim.sdk;
019
020 import org.json.JSONObject;
021
022 import java.text.MessageFormat;
023 import java.util.ArrayList;
024 import java.util.Arrays;
025 import java.util.List;
026 import java.util.Stack;
027
028
029
030 /**
031 * A parser for SCIM filter expressions.
032 */
033 public class FilterParser
034 {
035 /**
036 * The filter to be parsed.
037 */
038 private final String filterString;
039
040 /**
041 * The position one higher than the last character.
042 */
043 private int endPos;
044
045 /**
046 * The current character position.
047 */
048 private int currentPos;
049
050 /**
051 * The position marking the first character of the previous word or value.
052 */
053 private int markPos;
054
055
056
057 /**
058 * Base class for expression stack nodes. The expression stack is needed to
059 * employ the shunting-yard algorithm to parse the filter expression.
060 */
061 class Node
062 {
063 private final int pos;
064
065
066
067 /**
068 * Create a new node.
069 *
070 * @param pos The position of the node in the filter string.
071 */
072 public Node(final int pos)
073 {
074 this.pos = pos;
075 }
076
077
078
079 /**
080 * Retrieve the position of the node in the filter string.
081 * @return The position of the node in the filter string.
082 */
083 public int getPos()
084 {
085 return pos;
086 }
087 }
088
089
090
091 /**
092 * A node representing a filter component.
093 */
094 class FilterNode extends Node
095 {
096 private final SCIMFilter filterComponent;
097
098
099
100 /**
101 * Create a new filter component node.
102 *
103 * @param filterComponent The filter component.
104 * @param pos The position of the node in the filter string.
105 */
106 public FilterNode(final SCIMFilter filterComponent,
107 final int pos)
108 {
109 super(pos);
110 this.filterComponent = filterComponent;
111 }
112
113
114
115 /**
116 * Retrieve the filter component.
117 *
118 * @return The filter component.
119 */
120 public SCIMFilter getFilterComponent()
121 {
122 return filterComponent;
123 }
124
125
126
127 @Override
128 public String toString()
129 {
130 return "FilterNode{" +
131 "filterComponent=" + filterComponent +
132 "} " + super.toString();
133 }
134 }
135
136
137
138 /**
139 * A node representing a logical operator.
140 */
141 class OperatorNode extends Node
142 {
143 private final SCIMFilterType filterType;
144
145 /**
146 * Create a new logical operator node.
147 *
148 * @param filterType The type of operator, either SCIMFilterType.AND or
149 * SCIMFilterType.OR.
150 * @param pos The position of the node in the filter string.
151 */
152 public OperatorNode(final SCIMFilterType filterType,
153 final int pos)
154 {
155 super(pos);
156 this.filterType = filterType;
157 }
158
159
160
161 /**
162 * Retrieve the type of operator.
163 *
164 * @return The type of operator, either SCIMFilterType.AND or
165 * SCIMFilterType.OR.
166 */
167 public SCIMFilterType getFilterType()
168 {
169 return filterType;
170 }
171
172
173
174 /**
175 * Retrieve the precedence of the operator.
176 *
177 * @return The precedence of the operator.
178 */
179 public int getPrecedence()
180 {
181 switch (filterType)
182 {
183 case AND:
184 return 2;
185
186 case OR:
187 default:
188 return 1;
189 }
190 }
191
192
193
194 @Override
195 public String toString()
196 {
197 return "OperatorNode{" +
198 "filterType=" + filterType +
199 "} " + super.toString();
200 }
201 }
202
203
204
205 /**
206 * A node representing an opening parenthesis.
207 */
208 class LeftParenthesisNode extends Node
209 {
210 /**
211 * Create a new opening parenthesis node.
212 *
213 * @param pos The position of the parenthesis in the filter string.
214 */
215 public LeftParenthesisNode(final int pos)
216 {
217 super(pos);
218 }
219 }
220
221
222
223 /**
224 * Create a new instance of a filter parser.
225 *
226 * @param filterString The filter to be parsed.
227 */
228 public FilterParser(final String filterString)
229 {
230 this.filterString = filterString;
231 this.endPos = filterString.length();
232 this.currentPos = 0;
233 this.markPos = 0;
234 }
235
236
237
238 /**
239 * Parse the filter provided in the constructor.
240 *
241 * @return A parsed SCIM filter.
242 *
243 * @throws SCIMException If the filter string could not be parsed.
244 */
245 public SCIMFilter parse()
246 throws SCIMException
247 {
248 try
249 {
250 return readFilter();
251 }
252 catch (Exception e)
253 {
254 Debug.debugException(e);
255 throw SCIMException.createException(
256 400, MessageFormat.format("Invalid filter ''{0}'': {1}",
257 filterString, e.getMessage()));
258 }
259 }
260
261
262
263 /**
264 * Read a filter component at the current position. A filter component is
265 * <pre>
266 * attribute attribute-operator [value]
267 * </pre>
268 * Most attribute operators require a value but 'pr' (presence) requires
269 * no value.
270 *
271 * @return The parsed filter component.
272 */
273 private SCIMFilter readFilterComponent()
274 {
275 String word = readWord();
276 if (word == null)
277 {
278 final String msg = String.format(
279 "End of input at position %d but expected a filter expression",
280 markPos);
281 throw new IllegalArgumentException(msg);
282 }
283
284 final AttributePath filterAttribute;
285 try
286 {
287 filterAttribute = AttributePath.parse(word);
288 }
289 catch (final Exception e)
290 {
291 Debug.debugException(e);
292 final String msg = String.format(
293 "Expected an attribute reference at position %d: %s",
294 markPos, e.getMessage());
295 throw new IllegalArgumentException(msg);
296 }
297
298 final String operator = readWord();
299 if (operator == null)
300 {
301 final String msg = String.format(
302 "End of input at position %d but expected an attribute operator",
303 markPos);
304 throw new IllegalArgumentException(msg);
305 }
306
307 final SCIMFilterType filterType;
308 if (operator.equalsIgnoreCase("eq"))
309 {
310 filterType = SCIMFilterType.EQUALITY;
311 }
312 else if (operator.equalsIgnoreCase("co"))
313 {
314 filterType = SCIMFilterType.CONTAINS;
315 }
316 else if (operator.equalsIgnoreCase("sw"))
317 {
318 filterType = SCIMFilterType.STARTS_WITH;
319 }
320 else if (operator.equalsIgnoreCase("pr"))
321 {
322 filterType = SCIMFilterType.PRESENCE;
323 }
324 else if (operator.equalsIgnoreCase("gt"))
325 {
326 filterType = SCIMFilterType.GREATER_THAN;
327 }
328 else if (operator.equalsIgnoreCase("ge"))
329 {
330 filterType = SCIMFilterType.GREATER_OR_EQUAL;
331 }
332 else if (operator.equalsIgnoreCase("lt"))
333 {
334 filterType = SCIMFilterType.LESS_THAN;
335 }
336 else if (operator.equalsIgnoreCase("le"))
337 {
338 filterType = SCIMFilterType.LESS_OR_EQUAL;
339 }
340 else
341 {
342 final String msg = String.format(
343 "Unrecognized attribute operator '%s' at position %d. " +
344 "Expected: eq,co,sw,pr,gt,ge,lt,le", operator, markPos);
345 throw new IllegalArgumentException(msg);
346 }
347
348 final Object filterValue;
349 if (!filterType.equals(SCIMFilterType.PRESENCE))
350 {
351 filterValue = readValue();
352 if (filterValue == null)
353 {
354 final String msg = String.format(
355 "End of input at position %d while expecting a value for " +
356 "operator %s", markPos, operator);
357 throw new IllegalArgumentException(msg);
358 }
359 }
360 else
361 {
362 filterValue = null;
363 }
364
365 return new SCIMFilter(
366 filterType, filterAttribute,
367 filterValue != null ? filterValue.toString() : null,
368 (filterValue != null) && (filterValue instanceof String),
369 null);
370 }
371
372
373
374 /**
375 * Read a filter expression.
376 *
377 * @return The SCIM filter.
378 */
379 private SCIMFilter readFilter()
380 {
381 final Stack<Node> expressionStack = new Stack<Node>();
382
383 // Employ the shunting-yard algorithm to parse into reverse polish notation,
384 // where the operands are filter components and the operators are the
385 // logical AND and OR operators. This algorithm ensures that operator
386 // precedence and parentheses are respected.
387 final List<Node> reversePolish = new ArrayList<Node>();
388 for (String word = readWord(); word != null; word = readWord())
389 {
390 if (word.equalsIgnoreCase("and") || word.equalsIgnoreCase("or"))
391 {
392 final OperatorNode currentOperator;
393 if (word.equalsIgnoreCase("and"))
394 {
395 currentOperator = new OperatorNode(SCIMFilterType.AND, markPos);
396 }
397 else
398 {
399 currentOperator = new OperatorNode(SCIMFilterType.OR, markPos);
400 }
401 while (!expressionStack.empty() &&
402 (expressionStack.peek() instanceof OperatorNode))
403 {
404 final OperatorNode previousOperator =
405 (OperatorNode)expressionStack.peek();
406 if (previousOperator.getPrecedence() <
407 currentOperator.getPrecedence())
408 {
409 break;
410 }
411 reversePolish.add(expressionStack.pop());
412 }
413 expressionStack.push(currentOperator);
414 }
415 else if (word.equals("("))
416 {
417 expressionStack.push(new LeftParenthesisNode(markPos));
418 }
419 else if (word.equals(")"))
420 {
421 while (!expressionStack.empty() &&
422 !(expressionStack.peek() instanceof LeftParenthesisNode))
423 {
424 reversePolish.add(expressionStack.pop());
425 }
426 if (expressionStack.empty())
427 {
428 final String msg =
429 String.format("No opening parenthesis matching closing " +
430 "parenthesis at position %d", markPos);
431 throw new IllegalArgumentException(msg);
432 }
433 expressionStack.pop();
434 }
435 else
436 {
437 rewind();
438 final int pos = currentPos;
439 final SCIMFilter filterComponent = readFilterComponent();
440 reversePolish.add(new FilterNode(filterComponent, pos));
441 }
442 }
443
444 while (!expressionStack.empty())
445 {
446 final Node node = expressionStack.pop();
447 if (node instanceof LeftParenthesisNode)
448 {
449 final String msg =
450 String.format("No closing parenthesis matching opening " +
451 "parenthesis at position %d", node.getPos());
452 throw new IllegalArgumentException(msg);
453 }
454 reversePolish.add(node);
455 }
456
457 // Evaluate the reverse polish notation to create a single complex filter.
458 final Stack<FilterNode> filterStack = new Stack<FilterNode>();
459 for (final Node node : reversePolish)
460 {
461 if (node instanceof OperatorNode)
462 {
463 final FilterNode rightOperand = filterStack.pop();
464 final FilterNode leftOperand = filterStack.pop();
465
466 final OperatorNode operatorNode = (OperatorNode)node;
467 if (operatorNode.getFilterType().equals(SCIMFilterType.AND))
468 {
469 final SCIMFilter filter = SCIMFilter.createAndFilter(
470 Arrays.asList(leftOperand.getFilterComponent(),
471 rightOperand.getFilterComponent()));
472 filterStack.push(new FilterNode(filter, leftOperand.getPos()));
473 }
474 else
475 {
476 final SCIMFilter filter = SCIMFilter.createOrFilter(
477 Arrays.asList(leftOperand.getFilterComponent(),
478 rightOperand.getFilterComponent()));
479 filterStack.push(new FilterNode(filter, leftOperand.getPos()));
480 }
481 }
482 else
483 {
484 filterStack.push((FilterNode)node);
485 }
486 }
487
488 if (filterStack.size() == 0)
489 {
490 final String msg = String.format("Empty filter expression");
491 throw new IllegalArgumentException(msg);
492 }
493 else if (filterStack.size() > 1)
494 {
495 final String msg = String.format(
496 "Unexpected characters at position %d", expressionStack.get(1).pos);
497 throw new IllegalArgumentException(msg);
498 }
499
500 return filterStack.get(0).filterComponent;
501 }
502
503
504
505 /**
506 * Read a word at the current position. A word is a consecutive sequence of
507 * characters terminated by whitespace or a parenthesis, or a single opening
508 * or closing parenthesis. Whitespace before and after the word is consumed.
509 * The start of the word is saved in {@code markPos}.
510 *
511 * @return The word at the current position, or {@code null} if the end of
512 * the input has been reached.
513 */
514 private String readWord()
515 {
516 skipWhitespace();
517 markPos = currentPos;
518
519 loop:
520 while (currentPos < endPos)
521 {
522 final char c = filterString.charAt(currentPos);
523 switch (c)
524 {
525 case '(':
526 case ')':
527 if (currentPos == markPos)
528 {
529 currentPos++;
530 }
531 break loop;
532
533 case ' ':
534 break loop;
535
536 default:
537 currentPos++;
538 break;
539 }
540 }
541
542 if (currentPos - markPos == 0)
543 {
544 return null;
545 }
546
547 final String word = filterString.substring(markPos, currentPos);
548
549 skipWhitespace();
550 return word;
551 }
552
553
554
555 /**
556 * Rewind the current position to the start of the previous word or value.
557 */
558 private void rewind()
559 {
560 currentPos = markPos;
561 }
562
563
564
565 /**
566 * Read a value at the current position. A value can be a number, or a
567 * boolean value (the words true or false), or a string value in double
568 * quotes, using the same syntax as for JSON values. Whitespace before and
569 * after the value is consumed. The start of the value is saved in
570 * {@code markPos}.
571 *
572 * @return A Boolean, Double, Integer, Long or String representing the value
573 * at the current position, or {@code null} if the end of the input
574 * has already been reached.
575 */
576 public Object readValue()
577 {
578 skipWhitespace();
579 markPos = currentPos;
580
581 if (currentPos == endPos)
582 {
583 return null;
584 }
585
586 if (filterString.charAt(currentPos) == '"')
587 {
588 currentPos++;
589
590 final StringBuilder builder = new StringBuilder();
591 while (currentPos < endPos)
592 {
593 final char c = filterString.charAt(currentPos);
594 switch (c)
595 {
596 case '\\':
597 currentPos++;
598 if (endOfInput())
599 {
600 final String msg = String.format(
601 "End of input in a string value that began at " +
602 "position %d", markPos);
603 throw new IllegalArgumentException(msg);
604 }
605 final char escapeChar = filterString.charAt(currentPos);
606 currentPos++;
607 switch (escapeChar)
608 {
609 case '"':
610 case '/':
611 case '\'':
612 case '\\':
613 builder.append(escapeChar);
614 break;
615 case 'b':
616 builder.append('\b');
617 break;
618 case 'f':
619 builder.append('\f');
620 break;
621 case 'n':
622 builder.append('\n');
623 break;
624 case 'r':
625 builder.append('\r');
626 break;
627 case 't':
628 builder.append('\t');
629 break;
630 case 'u':
631 if (currentPos + 4 > endPos)
632 {
633 final String msg = String.format(
634 "End of input in a string value that began at " +
635 "position %d", markPos);
636 throw new IllegalArgumentException(msg);
637 }
638 final String hexChars =
639 filterString.substring(currentPos, currentPos + 4);
640 builder.append((char)Integer.parseInt(hexChars, 16));
641 currentPos += 4;
642 break;
643 default:
644 final String msg = String.format(
645 "Unrecognized escape sequence '\\%c' in a string value " +
646 "at position %d", escapeChar, currentPos - 2);
647 throw new IllegalArgumentException(msg);
648 }
649 break;
650
651 case '"':
652 currentPos++;
653 skipWhitespace();
654 return builder.toString();
655
656 default:
657 builder.append(c);
658 currentPos++;
659 break;
660 }
661 }
662
663 final String msg = String.format(
664 "End of input in a string value that began at " +
665 "position %d", markPos);
666 throw new IllegalArgumentException(msg);
667 }
668 else
669 {
670 loop:
671 while (currentPos < endPos)
672 {
673 final char c = filterString.charAt(currentPos);
674 switch (c)
675 {
676 case ' ':
677 case '(':
678 case ')':
679 break loop;
680
681 case '+':
682 case '-':
683 case '.':
684 case '0':
685 case '1':
686 case '2':
687 case '3':
688 case '4':
689 case '5':
690 case '6':
691 case '7':
692 case '8':
693 case '9':
694 case 'A':
695 case 'B':
696 case 'C':
697 case 'D':
698 case 'E':
699 case 'F':
700 case 'G':
701 case 'H':
702 case 'I':
703 case 'J':
704 case 'K':
705 case 'L':
706 case 'M':
707 case 'N':
708 case 'O':
709 case 'P':
710 case 'Q':
711 case 'R':
712 case 'S':
713 case 'T':
714 case 'U':
715 case 'V':
716 case 'W':
717 case 'X':
718 case 'Y':
719 case 'Z':
720 case 'a':
721 case 'b':
722 case 'c':
723 case 'd':
724 case 'e':
725 case 'f':
726 case 'g':
727 case 'h':
728 case 'i':
729 case 'j':
730 case 'k':
731 case 'l':
732 case 'm':
733 case 'n':
734 case 'o':
735 case 'p':
736 case 'q':
737 case 'r':
738 case 's':
739 case 't':
740 case 'u':
741 case 'v':
742 case 'w':
743 case 'x':
744 case 'y':
745 case 'z':
746 // These are all OK.
747 currentPos++;
748 break;
749
750 case '/':
751 case ':':
752 case ';':
753 case '<':
754 case '=':
755 case '>':
756 case '?':
757 case '@':
758 case '[':
759 case '\\':
760 case ']':
761 case '^':
762 case '_':
763 case '`':
764 // These are not allowed, but they are explicitly called out because
765 // they are included in the range of values between '-' and 'z', and
766 // making sure all possible characters are included can help make
767 // the switch statement more efficient. We'll fall through to the
768 // default clause to reject them.
769 default:
770 final String msg = String.format(
771 "Invalid character '%c' in a number or boolean value at " +
772 "position %d",
773 c, currentPos);
774 throw new IllegalArgumentException(msg);
775 }
776 }
777
778 final String s = filterString.substring(markPos, currentPos);
779 skipWhitespace();
780 final Object value = JSONObject.stringToValue(s);
781
782 if (value.equals(JSONObject.NULL) || value instanceof String)
783 {
784 final String msg = String.format(
785 "Invalid filter value beginning at position %d", markPos);
786 throw new IllegalArgumentException(msg);
787 }
788
789 return value;
790 }
791 }
792
793
794
795 /**
796 * Determine if the end of the input has been reached.
797 *
798 * @return {@code true} if the end of the input has been reached.
799 */
800 private boolean endOfInput()
801 {
802 return currentPos == endPos;
803 }
804
805
806
807 /**
808 * Skip over any whitespace at the current position.
809 */
810 private void skipWhitespace()
811 {
812 while (currentPos < endPos && filterString.charAt(currentPos) == ' ')
813 {
814 currentPos++;
815 }
816 }
817 }