001/* 002 * Copyright 2011-2013 UnboundID Corp. 003 * 004 * This program is free software; you can redistribute it and/or modify 005 * it under the terms of the GNU General Public License (GPLv2 only) 006 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 007 * as published by the Free Software Foundation. 008 * 009 * This program is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 012 * GNU General Public License for more details. 013 * 014 * You should have received a copy of the GNU General Public License 015 * along with this program; if not, see <http://www.gnu.org/licenses>. 016 */ 017 018package com.unboundid.scim.sdk; 019 020import org.json.JSONObject; 021 022import java.text.MessageFormat; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026import java.util.Stack; 027 028 029 030/** 031 * A parser for SCIM filter expressions. 032 */ 033public class FilterParser 034{ 035 /** 036 * The filter to be parsed. 037 */ 038 private final String filterString; 039 040 /** 041 * The default schema that should be assumed when parsing attributes with 042 * no schema explicitly defined in the URN. 043 */ 044 private final String defaultSchema; 045 046 /** 047 * The position one higher than the last character. 048 */ 049 private int endPos; 050 051 /** 052 * The current character position. 053 */ 054 private int currentPos; 055 056 /** 057 * The position marking the first character of the previous word or value. 058 */ 059 private int markPos; 060 061 062 063 /** 064 * Base class for expression stack nodes. The expression stack is needed to 065 * employ the shunting-yard algorithm to parse the filter expression. 066 */ 067 class Node 068 { 069 private final int pos; 070 071 072 073 /** 074 * Create a new node. 075 * 076 * @param pos The position of the node in the filter string. 077 */ 078 public Node(final int pos) 079 { 080 this.pos = pos; 081 } 082 083 084 085 /** 086 * Retrieve the position of the node in the filter string. 087 * @return The position of the node in the filter string. 088 */ 089 public int getPos() 090 { 091 return pos; 092 } 093 } 094 095 096 097 /** 098 * A node representing a filter component. 099 */ 100 class FilterNode extends Node 101 { 102 private final SCIMFilter filterComponent; 103 104 105 106 /** 107 * Create a new filter component node. 108 * 109 * @param filterComponent The filter component. 110 * @param pos The position of the node in the filter string. 111 */ 112 public FilterNode(final SCIMFilter filterComponent, 113 final int pos) 114 { 115 super(pos); 116 this.filterComponent = filterComponent; 117 } 118 119 120 121 /** 122 * Retrieve the filter component. 123 * 124 * @return The filter component. 125 */ 126 public SCIMFilter getFilterComponent() 127 { 128 return filterComponent; 129 } 130 131 132 133 @Override 134 public String toString() 135 { 136 return "FilterNode{" + 137 "filterComponent=" + filterComponent + 138 "} " + super.toString(); 139 } 140 } 141 142 143 144 /** 145 * A node representing a logical operator. 146 */ 147 class OperatorNode extends Node 148 { 149 private final SCIMFilterType filterType; 150 151 /** 152 * Create a new logical operator node. 153 * 154 * @param filterType The type of operator, either SCIMFilterType.AND or 155 * SCIMFilterType.OR. 156 * @param pos The position of the node in the filter string. 157 */ 158 public OperatorNode(final SCIMFilterType filterType, 159 final int pos) 160 { 161 super(pos); 162 this.filterType = filterType; 163 } 164 165 166 167 /** 168 * Retrieve the type of operator. 169 * 170 * @return The type of operator, either SCIMFilterType.AND or 171 * SCIMFilterType.OR. 172 */ 173 public SCIMFilterType getFilterType() 174 { 175 return filterType; 176 } 177 178 179 180 /** 181 * Retrieve the precedence of the operator. 182 * 183 * @return The precedence of the operator. 184 */ 185 public int getPrecedence() 186 { 187 switch (filterType) 188 { 189 case AND: 190 return 2; 191 192 case OR: 193 default: 194 return 1; 195 } 196 } 197 198 199 200 @Override 201 public String toString() 202 { 203 return "OperatorNode{" + 204 "filterType=" + filterType + 205 "} " + super.toString(); 206 } 207 } 208 209 210 211 /** 212 * A node representing an opening parenthesis. 213 */ 214 class LeftParenthesisNode extends Node 215 { 216 /** 217 * Create a new opening parenthesis node. 218 * 219 * @param pos The position of the parenthesis in the filter string. 220 */ 221 public LeftParenthesisNode(final int pos) 222 { 223 super(pos); 224 } 225 } 226 227 228 229 /** 230 * Create a new instance of a filter parser. 231 * 232 * @param filterString The filter to be parsed. 233 * @param defaultSchema The default schema that should be assumed when parsing 234 * attributes without the schema explicitly defined in 235 * the URN. 236 */ 237 public FilterParser(final String filterString, final String defaultSchema) 238 { 239 this.filterString = filterString; 240 this.endPos = filterString.length(); 241 this.currentPos = 0; 242 this.markPos = 0; 243 this.defaultSchema = defaultSchema; 244 } 245 246 247 248 /** 249 * Parse the filter provided in the constructor. 250 * 251 * @return A parsed SCIM filter. 252 * 253 * @throws SCIMException If the filter string could not be parsed. 254 */ 255 public SCIMFilter parse() 256 throws SCIMException 257 { 258 try 259 { 260 return readFilter(); 261 } 262 catch (Exception e) 263 { 264 Debug.debugException(e); 265 throw SCIMException.createException( 266 400, MessageFormat.format("Invalid filter ''{0}'': {1}", 267 filterString, e.getMessage())); 268 } 269 } 270 271 272 273 /** 274 * Read a filter component at the current position. A filter component is 275 * <pre> 276 * attribute attribute-operator [value] 277 * </pre> 278 * Most attribute operators require a value but 'pr' (presence) requires 279 * no value. 280 * 281 * @return The parsed filter component. 282 */ 283 private SCIMFilter readFilterComponent() 284 { 285 String word = readWord(); 286 if (word == null) 287 { 288 final String msg = String.format( 289 "End of input at position %d but expected a filter expression", 290 markPos); 291 throw new IllegalArgumentException(msg); 292 } 293 294 final AttributePath filterAttribute; 295 try 296 { 297 filterAttribute = AttributePath.parse(word, defaultSchema); 298 } 299 catch (final Exception e) 300 { 301 Debug.debugException(e); 302 final String msg = String.format( 303 "Expected an attribute reference at position %d: %s", 304 markPos, e.getMessage()); 305 throw new IllegalArgumentException(msg); 306 } 307 308 final String operator = readWord(); 309 if (operator == null) 310 { 311 final String msg = String.format( 312 "End of input at position %d but expected an attribute operator", 313 markPos); 314 throw new IllegalArgumentException(msg); 315 } 316 317 final SCIMFilterType filterType; 318 if (operator.equalsIgnoreCase("eq")) 319 { 320 filterType = SCIMFilterType.EQUALITY; 321 } 322 else if (operator.equalsIgnoreCase("co")) 323 { 324 filterType = SCIMFilterType.CONTAINS; 325 } 326 else if (operator.equalsIgnoreCase("sw")) 327 { 328 filterType = SCIMFilterType.STARTS_WITH; 329 } 330 else if (operator.equalsIgnoreCase("pr")) 331 { 332 filterType = SCIMFilterType.PRESENCE; 333 } 334 else if (operator.equalsIgnoreCase("gt")) 335 { 336 filterType = SCIMFilterType.GREATER_THAN; 337 } 338 else if (operator.equalsIgnoreCase("ge")) 339 { 340 filterType = SCIMFilterType.GREATER_OR_EQUAL; 341 } 342 else if (operator.equalsIgnoreCase("lt")) 343 { 344 filterType = SCIMFilterType.LESS_THAN; 345 } 346 else if (operator.equalsIgnoreCase("le")) 347 { 348 filterType = SCIMFilterType.LESS_OR_EQUAL; 349 } 350 else 351 { 352 final String msg = String.format( 353 "Unrecognized attribute operator '%s' at position %d. " + 354 "Expected: eq,co,sw,pr,gt,ge,lt,le", operator, markPos); 355 throw new IllegalArgumentException(msg); 356 } 357 358 final Object filterValue; 359 if (!filterType.equals(SCIMFilterType.PRESENCE)) 360 { 361 filterValue = readValue(); 362 if (filterValue == null) 363 { 364 final String msg = String.format( 365 "End of input at position %d while expecting a value for " + 366 "operator %s", markPos, operator); 367 throw new IllegalArgumentException(msg); 368 } 369 } 370 else 371 { 372 filterValue = null; 373 } 374 375 return new SCIMFilter( 376 filterType, filterAttribute, 377 filterValue != null ? filterValue.toString() : null, 378 (filterValue != null) && (filterValue instanceof String), 379 null); 380 } 381 382 383 384 /** 385 * Read a filter expression. 386 * 387 * @return The SCIM filter. 388 */ 389 private SCIMFilter readFilter() 390 { 391 final Stack<Node> expressionStack = new Stack<Node>(); 392 393 // Employ the shunting-yard algorithm to parse into reverse polish notation, 394 // where the operands are filter components and the operators are the 395 // logical AND and OR operators. This algorithm ensures that operator 396 // precedence and parentheses are respected. 397 final List<Node> reversePolish = new ArrayList<Node>(); 398 for (String word = readWord(); word != null; word = readWord()) 399 { 400 if (word.equalsIgnoreCase("and") || word.equalsIgnoreCase("or")) 401 { 402 final OperatorNode currentOperator; 403 if (word.equalsIgnoreCase("and")) 404 { 405 currentOperator = new OperatorNode(SCIMFilterType.AND, markPos); 406 } 407 else 408 { 409 currentOperator = new OperatorNode(SCIMFilterType.OR, markPos); 410 } 411 while (!expressionStack.empty() && 412 (expressionStack.peek() instanceof OperatorNode)) 413 { 414 final OperatorNode previousOperator = 415 (OperatorNode)expressionStack.peek(); 416 if (previousOperator.getPrecedence() < 417 currentOperator.getPrecedence()) 418 { 419 break; 420 } 421 reversePolish.add(expressionStack.pop()); 422 } 423 expressionStack.push(currentOperator); 424 } 425 else if (word.equals("(")) 426 { 427 expressionStack.push(new LeftParenthesisNode(markPos)); 428 } 429 else if (word.equals(")")) 430 { 431 while (!expressionStack.empty() && 432 !(expressionStack.peek() instanceof LeftParenthesisNode)) 433 { 434 reversePolish.add(expressionStack.pop()); 435 } 436 if (expressionStack.empty()) 437 { 438 final String msg = 439 String.format("No opening parenthesis matching closing " + 440 "parenthesis at position %d", markPos); 441 throw new IllegalArgumentException(msg); 442 } 443 expressionStack.pop(); 444 } 445 else 446 { 447 rewind(); 448 final int pos = currentPos; 449 final SCIMFilter filterComponent = readFilterComponent(); 450 reversePolish.add(new FilterNode(filterComponent, pos)); 451 } 452 } 453 454 while (!expressionStack.empty()) 455 { 456 final Node node = expressionStack.pop(); 457 if (node instanceof LeftParenthesisNode) 458 { 459 final String msg = 460 String.format("No closing parenthesis matching opening " + 461 "parenthesis at position %d", node.getPos()); 462 throw new IllegalArgumentException(msg); 463 } 464 reversePolish.add(node); 465 } 466 467 // Evaluate the reverse polish notation to create a single complex filter. 468 final Stack<FilterNode> filterStack = new Stack<FilterNode>(); 469 for (final Node node : reversePolish) 470 { 471 if (node instanceof OperatorNode) 472 { 473 final FilterNode rightOperand = filterStack.pop(); 474 final FilterNode leftOperand = filterStack.pop(); 475 476 final OperatorNode operatorNode = (OperatorNode)node; 477 if (operatorNode.getFilterType().equals(SCIMFilterType.AND)) 478 { 479 final SCIMFilter filter = SCIMFilter.createAndFilter( 480 Arrays.asList(leftOperand.getFilterComponent(), 481 rightOperand.getFilterComponent())); 482 filterStack.push(new FilterNode(filter, leftOperand.getPos())); 483 } 484 else 485 { 486 final SCIMFilter filter = SCIMFilter.createOrFilter( 487 Arrays.asList(leftOperand.getFilterComponent(), 488 rightOperand.getFilterComponent())); 489 filterStack.push(new FilterNode(filter, leftOperand.getPos())); 490 } 491 } 492 else 493 { 494 filterStack.push((FilterNode)node); 495 } 496 } 497 498 if (filterStack.size() == 0) 499 { 500 final String msg = String.format("Empty filter expression"); 501 throw new IllegalArgumentException(msg); 502 } 503 else if (filterStack.size() > 1) 504 { 505 final String msg = String.format( 506 "Unexpected characters at position %d", expressionStack.get(1).pos); 507 throw new IllegalArgumentException(msg); 508 } 509 510 return filterStack.get(0).filterComponent; 511 } 512 513 514 515 /** 516 * Read a word at the current position. A word is a consecutive sequence of 517 * characters terminated by whitespace or a parenthesis, or a single opening 518 * or closing parenthesis. Whitespace before and after the word is consumed. 519 * The start of the word is saved in {@code markPos}. 520 * 521 * @return The word at the current position, or {@code null} if the end of 522 * the input has been reached. 523 */ 524 private String readWord() 525 { 526 skipWhitespace(); 527 markPos = currentPos; 528 529 loop: 530 while (currentPos < endPos) 531 { 532 final char c = filterString.charAt(currentPos); 533 switch (c) 534 { 535 case '(': 536 case ')': 537 if (currentPos == markPos) 538 { 539 currentPos++; 540 } 541 break loop; 542 543 case ' ': 544 break loop; 545 546 default: 547 currentPos++; 548 break; 549 } 550 } 551 552 if (currentPos - markPos == 0) 553 { 554 return null; 555 } 556 557 final String word = filterString.substring(markPos, currentPos); 558 559 skipWhitespace(); 560 return word; 561 } 562 563 564 565 /** 566 * Rewind the current position to the start of the previous word or value. 567 */ 568 private void rewind() 569 { 570 currentPos = markPos; 571 } 572 573 574 575 /** 576 * Read a value at the current position. A value can be a number, or a 577 * boolean value (the words true or false), or a string value in double 578 * quotes, using the same syntax as for JSON values. Whitespace before and 579 * after the value is consumed. The start of the value is saved in 580 * {@code markPos}. 581 * 582 * @return A Boolean, Double, Integer, Long or String representing the value 583 * at the current position, or {@code null} if the end of the input 584 * has already been reached. 585 */ 586 public Object readValue() 587 { 588 skipWhitespace(); 589 markPos = currentPos; 590 591 if (currentPos == endPos) 592 { 593 return null; 594 } 595 596 if (filterString.charAt(currentPos) == '"') 597 { 598 currentPos++; 599 600 final StringBuilder builder = new StringBuilder(); 601 while (currentPos < endPos) 602 { 603 final char c = filterString.charAt(currentPos); 604 switch (c) 605 { 606 case '\\': 607 currentPos++; 608 if (endOfInput()) 609 { 610 final String msg = String.format( 611 "End of input in a string value that began at " + 612 "position %d", markPos); 613 throw new IllegalArgumentException(msg); 614 } 615 final char escapeChar = filterString.charAt(currentPos); 616 currentPos++; 617 switch (escapeChar) 618 { 619 case '"': 620 case '/': 621 case '\'': 622 case '\\': 623 builder.append(escapeChar); 624 break; 625 case 'b': 626 builder.append('\b'); 627 break; 628 case 'f': 629 builder.append('\f'); 630 break; 631 case 'n': 632 builder.append('\n'); 633 break; 634 case 'r': 635 builder.append('\r'); 636 break; 637 case 't': 638 builder.append('\t'); 639 break; 640 case 'u': 641 if (currentPos + 4 > endPos) 642 { 643 final String msg = String.format( 644 "End of input in a string value that began at " + 645 "position %d", markPos); 646 throw new IllegalArgumentException(msg); 647 } 648 final String hexChars = 649 filterString.substring(currentPos, currentPos + 4); 650 builder.append((char)Integer.parseInt(hexChars, 16)); 651 currentPos += 4; 652 break; 653 default: 654 final String msg = String.format( 655 "Unrecognized escape sequence '\\%c' in a string value " + 656 "at position %d", escapeChar, currentPos - 2); 657 throw new IllegalArgumentException(msg); 658 } 659 break; 660 661 case '"': 662 currentPos++; 663 skipWhitespace(); 664 return builder.toString(); 665 666 default: 667 builder.append(c); 668 currentPos++; 669 break; 670 } 671 } 672 673 final String msg = String.format( 674 "End of input in a string value that began at " + 675 "position %d", markPos); 676 throw new IllegalArgumentException(msg); 677 } 678 else 679 { 680 loop: 681 while (currentPos < endPos) 682 { 683 final char c = filterString.charAt(currentPos); 684 switch (c) 685 { 686 case ' ': 687 case '(': 688 case ')': 689 break loop; 690 691 case '+': 692 case '-': 693 case '.': 694 case '0': 695 case '1': 696 case '2': 697 case '3': 698 case '4': 699 case '5': 700 case '6': 701 case '7': 702 case '8': 703 case '9': 704 case 'A': 705 case 'B': 706 case 'C': 707 case 'D': 708 case 'E': 709 case 'F': 710 case 'G': 711 case 'H': 712 case 'I': 713 case 'J': 714 case 'K': 715 case 'L': 716 case 'M': 717 case 'N': 718 case 'O': 719 case 'P': 720 case 'Q': 721 case 'R': 722 case 'S': 723 case 'T': 724 case 'U': 725 case 'V': 726 case 'W': 727 case 'X': 728 case 'Y': 729 case 'Z': 730 case 'a': 731 case 'b': 732 case 'c': 733 case 'd': 734 case 'e': 735 case 'f': 736 case 'g': 737 case 'h': 738 case 'i': 739 case 'j': 740 case 'k': 741 case 'l': 742 case 'm': 743 case 'n': 744 case 'o': 745 case 'p': 746 case 'q': 747 case 'r': 748 case 's': 749 case 't': 750 case 'u': 751 case 'v': 752 case 'w': 753 case 'x': 754 case 'y': 755 case 'z': 756 // These are all OK. 757 currentPos++; 758 break; 759 760 case '/': 761 case ':': 762 case ';': 763 case '<': 764 case '=': 765 case '>': 766 case '?': 767 case '@': 768 case '[': 769 case '\\': 770 case ']': 771 case '^': 772 case '_': 773 case '`': 774 // These are not allowed, but they are explicitly called out because 775 // they are included in the range of values between '-' and 'z', and 776 // making sure all possible characters are included can help make 777 // the switch statement more efficient. We'll fall through to the 778 // default clause to reject them. 779 default: 780 final String msg = String.format( 781 "Invalid character '%c' in a number or boolean value at " + 782 "position %d", 783 c, currentPos); 784 throw new IllegalArgumentException(msg); 785 } 786 } 787 788 final String s = filterString.substring(markPos, currentPos); 789 skipWhitespace(); 790 final Object value = JSONObject.stringToValue(s); 791 792 if (value.equals(JSONObject.NULL) || value instanceof String) 793 { 794 final String msg = String.format( 795 "Invalid filter value beginning at position %d", markPos); 796 throw new IllegalArgumentException(msg); 797 } 798 799 return value; 800 } 801 } 802 803 804 805 /** 806 * Determine if the end of the input has been reached. 807 * 808 * @return {@code true} if the end of the input has been reached. 809 */ 810 private boolean endOfInput() 811 { 812 return currentPos == endPos; 813 } 814 815 816 817 /** 818 * Skip over any whitespace at the current position. 819 */ 820 private void skipWhitespace() 821 { 822 while (currentPos < endPos && filterString.charAt(currentPos) == ' ') 823 { 824 currentPos++; 825 } 826 } 827}