001/* 002 * Copyright 2011-2016 UnboundID Corp. 003 * 004 * This program is free software; you can redistribute it and/or modify 005 * it under the terms of the GNU General Public License (GPLv2 only) 006 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 007 * as published by the Free Software Foundation. 008 * 009 * This program is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 012 * GNU General Public License for more details. 013 * 014 * You should have received a copy of the GNU General Public License 015 * along with this program; if not, see <http://www.gnu.org/licenses>. 016 */ 017 018package com.unboundid.scim.sdk; 019 020import org.json.JSONObject; 021 022import java.text.MessageFormat; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.List; 026import java.util.Stack; 027 028 029 030/** 031 * A parser for SCIM filter expressions. 032 */ 033public class FilterParser 034{ 035 /** 036 * The filter to be parsed. 037 */ 038 private final String filterString; 039 040 /** 041 * The default schema that should be assumed when parsing attributes with 042 * no schema explicitly defined in the URN. 043 */ 044 private final String defaultSchema; 045 046 /** 047 * The position one higher than the last character. 048 */ 049 private int endPos; 050 051 /** 052 * The current character position. 053 */ 054 private int currentPos; 055 056 /** 057 * The position marking the first character of the previous word or value. 058 */ 059 private int markPos; 060 061 062 063 /** 064 * Base class for expression stack nodes. The expression stack is needed to 065 * employ the shunting-yard algorithm to parse the filter expression. 066 */ 067 class Node 068 { 069 private final int pos; 070 071 072 073 /** 074 * Create a new node. 075 * 076 * @param pos The position of the node in the filter string. 077 */ 078 public Node(final int pos) 079 { 080 this.pos = pos; 081 } 082 083 084 085 /** 086 * Retrieve the position of the node in the filter string. 087 * @return The position of the node in the filter string. 088 */ 089 public int getPos() 090 { 091 return pos; 092 } 093 } 094 095 096 097 /** 098 * A node representing a filter component. 099 */ 100 class FilterNode extends Node 101 { 102 private final SCIMFilter filterComponent; 103 104 105 106 /** 107 * Create a new filter component node. 108 * 109 * @param filterComponent The filter component. 110 * @param pos The position of the node in the filter string. 111 */ 112 public FilterNode(final SCIMFilter filterComponent, 113 final int pos) 114 { 115 super(pos); 116 this.filterComponent = filterComponent; 117 } 118 119 120 121 /** 122 * Retrieve the filter component. 123 * 124 * @return The filter component. 125 */ 126 public SCIMFilter getFilterComponent() 127 { 128 return filterComponent; 129 } 130 131 132 133 @Override 134 public String toString() 135 { 136 return "FilterNode{" + 137 "filterComponent=" + filterComponent + 138 "} " + super.toString(); 139 } 140 } 141 142 143 144 /** 145 * A node representing a logical operator. 146 */ 147 class OperatorNode extends Node 148 { 149 private final SCIMFilterType filterType; 150 151 /** 152 * Create a new logical operator node. 153 * 154 * @param filterType The type of operator, either SCIMFilterType.AND or 155 * SCIMFilterType.OR. 156 * @param pos The position of the node in the filter string. 157 */ 158 public OperatorNode(final SCIMFilterType filterType, 159 final int pos) 160 { 161 super(pos); 162 this.filterType = filterType; 163 } 164 165 166 167 /** 168 * Retrieve the type of operator. 169 * 170 * @return The type of operator, either SCIMFilterType.AND or 171 * SCIMFilterType.OR. 172 */ 173 public SCIMFilterType getFilterType() 174 { 175 return filterType; 176 } 177 178 179 180 /** 181 * Retrieve the precedence of the operator. 182 * 183 * @return The precedence of the operator. 184 */ 185 public int getPrecedence() 186 { 187 switch (filterType) 188 { 189 case AND: 190 return 2; 191 192 case OR: 193 default: 194 return 1; 195 } 196 } 197 198 199 200 @Override 201 public String toString() 202 { 203 return "OperatorNode{" + 204 "filterType=" + filterType + 205 "} " + super.toString(); 206 } 207 } 208 209 210 211 /** 212 * A node representing an opening parenthesis. 213 */ 214 class LeftParenthesisNode extends Node 215 { 216 /** 217 * Create a new opening parenthesis node. 218 * 219 * @param pos The position of the parenthesis in the filter string. 220 */ 221 public LeftParenthesisNode(final int pos) 222 { 223 super(pos); 224 } 225 } 226 227 228 229 /** 230 * Create a new instance of a filter parser. 231 * 232 * @param filterString The filter to be parsed. 233 * @param defaultSchema The default schema that should be assumed when parsing 234 * attributes without the schema explicitly defined in 235 * the URN. 236 */ 237 public FilterParser(final String filterString, final String defaultSchema) 238 { 239 this.filterString = filterString; 240 this.endPos = filterString.length(); 241 this.currentPos = 0; 242 this.markPos = 0; 243 this.defaultSchema = defaultSchema; 244 } 245 246 247 248 /** 249 * Parse the filter provided in the constructor. 250 * 251 * @return A parsed SCIM filter. 252 * 253 * @throws SCIMException If the filter string could not be parsed. 254 */ 255 public SCIMFilter parse() 256 throws SCIMException 257 { 258 try 259 { 260 return readFilter(); 261 } 262 catch (Exception e) 263 { 264 Debug.debugException(e); 265 throw SCIMException.createException( 266 400, MessageFormat.format("Invalid filter ''{0}'': {1}", 267 filterString, e.getMessage())); 268 } 269 } 270 271 272 273 /** 274 * Read a filter component at the current position. A filter component is 275 * <pre> 276 * attribute attribute-operator [value] 277 * </pre> 278 * Most attribute operators require a value but 'pr' (presence) requires 279 * no value. 280 * 281 * @return The parsed filter component. 282 */ 283 private SCIMFilter readFilterComponent() 284 { 285 String word = readWord(); 286 if (word == null) 287 { 288 final String msg = String.format( 289 "End of input at position %d but expected a filter expression", 290 markPos); 291 throw new IllegalArgumentException(msg); 292 } 293 294 final AttributePath filterAttribute; 295 try 296 { 297 filterAttribute = AttributePath.parse(word, defaultSchema); 298 } 299 catch (final Exception e) 300 { 301 Debug.debugException(e); 302 final String msg = String.format( 303 "Expected an attribute reference at position %d: %s", 304 markPos, e.getMessage()); 305 throw new IllegalArgumentException(msg); 306 } 307 308 final String operator = readWord(); 309 if (operator == null) 310 { 311 final String msg = String.format( 312 "End of input at position %d but expected an attribute operator", 313 markPos); 314 throw new IllegalArgumentException(msg); 315 } 316 317 final SCIMFilterType filterType; 318 if (operator.equalsIgnoreCase("eq")) 319 { 320 filterType = SCIMFilterType.EQUALITY; 321 } 322 else if (operator.equalsIgnoreCase("co")) 323 { 324 filterType = SCIMFilterType.CONTAINS; 325 } 326 else if (operator.equalsIgnoreCase("sw")) 327 { 328 filterType = SCIMFilterType.STARTS_WITH; 329 } 330 else if (operator.equalsIgnoreCase("pr")) 331 { 332 filterType = SCIMFilterType.PRESENCE; 333 } 334 else if (operator.equalsIgnoreCase("gt")) 335 { 336 filterType = SCIMFilterType.GREATER_THAN; 337 } 338 else if (operator.equalsIgnoreCase("ge")) 339 { 340 filterType = SCIMFilterType.GREATER_OR_EQUAL; 341 } 342 else if (operator.equalsIgnoreCase("lt")) 343 { 344 filterType = SCIMFilterType.LESS_THAN; 345 } 346 else if (operator.equalsIgnoreCase("le")) 347 { 348 filterType = SCIMFilterType.LESS_OR_EQUAL; 349 } 350 else 351 { 352 final String msg = String.format( 353 "Unrecognized attribute operator '%s' at position %d. " + 354 "Expected: eq,co,sw,pr,gt,ge,lt,le", operator, markPos); 355 throw new IllegalArgumentException(msg); 356 } 357 358 final String filterValueString; 359 if (!filterType.equals(SCIMFilterType.PRESENCE)) 360 { 361 filterValueString = readValue(); 362 if (filterValueString == null) 363 { 364 final String msg = String.format( 365 "End of input at position %d while expecting a value for " + 366 "operator %s", markPos, operator); 367 throw new IllegalArgumentException(msg); 368 } 369 } 370 else 371 { 372 filterValueString = null; 373 } 374 375 return new SCIMFilter( 376 filterType, filterAttribute, filterValueString, 377 (filterValueString != null), null); 378 } 379 380 381 382 /** 383 * Read a filter expression. 384 * 385 * @return The SCIM filter. 386 */ 387 private SCIMFilter readFilter() 388 { 389 final Stack<Node> expressionStack = new Stack<Node>(); 390 391 // Employ the shunting-yard algorithm to parse into reverse polish notation, 392 // where the operands are filter components and the operators are the 393 // logical AND and OR operators. This algorithm ensures that operator 394 // precedence and parentheses are respected. 395 final List<Node> reversePolish = new ArrayList<Node>(); 396 for (String word = readWord(); word != null; word = readWord()) 397 { 398 if (word.equalsIgnoreCase("and") || word.equalsIgnoreCase("or")) 399 { 400 final OperatorNode currentOperator; 401 if (word.equalsIgnoreCase("and")) 402 { 403 currentOperator = new OperatorNode(SCIMFilterType.AND, markPos); 404 } 405 else 406 { 407 currentOperator = new OperatorNode(SCIMFilterType.OR, markPos); 408 } 409 while (!expressionStack.empty() && 410 (expressionStack.peek() instanceof OperatorNode)) 411 { 412 final OperatorNode previousOperator = 413 (OperatorNode)expressionStack.peek(); 414 if (previousOperator.getPrecedence() < 415 currentOperator.getPrecedence()) 416 { 417 break; 418 } 419 reversePolish.add(expressionStack.pop()); 420 } 421 expressionStack.push(currentOperator); 422 } 423 else if (word.equals("(")) 424 { 425 expressionStack.push(new LeftParenthesisNode(markPos)); 426 } 427 else if (word.equals(")")) 428 { 429 while (!expressionStack.empty() && 430 !(expressionStack.peek() instanceof LeftParenthesisNode)) 431 { 432 reversePolish.add(expressionStack.pop()); 433 } 434 if (expressionStack.empty()) 435 { 436 final String msg = 437 String.format("No opening parenthesis matching closing " + 438 "parenthesis at position %d", markPos); 439 throw new IllegalArgumentException(msg); 440 } 441 expressionStack.pop(); 442 } 443 else 444 { 445 rewind(); 446 final int pos = currentPos; 447 final SCIMFilter filterComponent = readFilterComponent(); 448 reversePolish.add(new FilterNode(filterComponent, pos)); 449 } 450 } 451 452 while (!expressionStack.empty()) 453 { 454 final Node node = expressionStack.pop(); 455 if (node instanceof LeftParenthesisNode) 456 { 457 final String msg = 458 String.format("No closing parenthesis matching opening " + 459 "parenthesis at position %d", node.getPos()); 460 throw new IllegalArgumentException(msg); 461 } 462 reversePolish.add(node); 463 } 464 465 // Evaluate the reverse polish notation to create a single complex filter. 466 final Stack<FilterNode> filterStack = new Stack<FilterNode>(); 467 for (final Node node : reversePolish) 468 { 469 if (node instanceof OperatorNode) 470 { 471 final FilterNode rightOperand = filterStack.pop(); 472 final FilterNode leftOperand = filterStack.pop(); 473 474 final OperatorNode operatorNode = (OperatorNode)node; 475 if (operatorNode.getFilterType().equals(SCIMFilterType.AND)) 476 { 477 final SCIMFilter filter = SCIMFilter.createAndFilter( 478 Arrays.asList(leftOperand.getFilterComponent(), 479 rightOperand.getFilterComponent())); 480 filterStack.push(new FilterNode(filter, leftOperand.getPos())); 481 } 482 else 483 { 484 final SCIMFilter filter = SCIMFilter.createOrFilter( 485 Arrays.asList(leftOperand.getFilterComponent(), 486 rightOperand.getFilterComponent())); 487 filterStack.push(new FilterNode(filter, leftOperand.getPos())); 488 } 489 } 490 else 491 { 492 filterStack.push((FilterNode)node); 493 } 494 } 495 496 if (filterStack.size() == 0) 497 { 498 final String msg = String.format("Empty filter expression"); 499 throw new IllegalArgumentException(msg); 500 } 501 else if (filterStack.size() > 1) 502 { 503 final String msg = String.format( 504 "Unexpected characters at position %d", expressionStack.get(1).pos); 505 throw new IllegalArgumentException(msg); 506 } 507 508 return filterStack.get(0).filterComponent; 509 } 510 511 512 513 /** 514 * Read a word at the current position. A word is a consecutive sequence of 515 * characters terminated by whitespace or a parenthesis, or a single opening 516 * or closing parenthesis. Whitespace before and after the word is consumed. 517 * The start of the word is saved in {@code markPos}. 518 * 519 * @return The word at the current position, or {@code null} if the end of 520 * the input has been reached. 521 */ 522 private String readWord() 523 { 524 skipWhitespace(); 525 markPos = currentPos; 526 527 loop: 528 while (currentPos < endPos) 529 { 530 final char c = filterString.charAt(currentPos); 531 switch (c) 532 { 533 case '(': 534 case ')': 535 if (currentPos == markPos) 536 { 537 currentPos++; 538 } 539 break loop; 540 541 case ' ': 542 break loop; 543 544 default: 545 currentPos++; 546 break; 547 } 548 } 549 550 if (currentPos - markPos == 0) 551 { 552 return null; 553 } 554 555 final String word = filterString.substring(markPos, currentPos); 556 557 skipWhitespace(); 558 return word; 559 } 560 561 562 563 /** 564 * Rewind the current position to the start of the previous word or value. 565 */ 566 private void rewind() 567 { 568 currentPos = markPos; 569 } 570 571 572 573 /** 574 * Read a value at the current position. A value can be a number, a datetime 575 * or a boolean value (the words true or false), or a string value in double 576 * quotes, using the same syntax as for JSON values. Whitespace before and 577 * after the value is consumed. The start of the value is saved in 578 * {@code markPos}. 579 * 580 * @return A String representing the value at the current position, or 581 * {@code null} if the end of the input has already been reached. 582 */ 583 public String readValue() 584 { 585 skipWhitespace(); 586 markPos = currentPos; 587 588 if (currentPos == endPos) 589 { 590 return null; 591 } 592 593 if (filterString.charAt(currentPos) == '"') 594 { 595 currentPos++; 596 597 final StringBuilder builder = new StringBuilder(); 598 while (currentPos < endPos) 599 { 600 final char c = filterString.charAt(currentPos); 601 switch (c) 602 { 603 case '\\': 604 currentPos++; 605 if (endOfInput()) 606 { 607 final String msg = String.format( 608 "End of input in a string value that began at " + 609 "position %d", markPos); 610 throw new IllegalArgumentException(msg); 611 } 612 final char escapeChar = filterString.charAt(currentPos); 613 currentPos++; 614 switch (escapeChar) 615 { 616 case '"': 617 case '/': 618 case '\'': 619 case '\\': 620 builder.append(escapeChar); 621 break; 622 case 'b': 623 builder.append('\b'); 624 break; 625 case 'f': 626 builder.append('\f'); 627 break; 628 case 'n': 629 builder.append('\n'); 630 break; 631 case 'r': 632 builder.append('\r'); 633 break; 634 case 't': 635 builder.append('\t'); 636 break; 637 case 'u': 638 if (currentPos + 4 > endPos) 639 { 640 final String msg = String.format( 641 "End of input in a string value that began at " + 642 "position %d", markPos); 643 throw new IllegalArgumentException(msg); 644 } 645 final String hexChars = 646 filterString.substring(currentPos, currentPos + 4); 647 builder.append((char)Integer.parseInt(hexChars, 16)); 648 currentPos += 4; 649 break; 650 default: 651 final String msg = String.format( 652 "Unrecognized escape sequence '\\%c' in a string value " + 653 "at position %d", escapeChar, currentPos - 2); 654 throw new IllegalArgumentException(msg); 655 } 656 break; 657 658 case '"': 659 currentPos++; 660 skipWhitespace(); 661 return builder.toString(); 662 663 default: 664 builder.append(c); 665 currentPos++; 666 break; 667 } 668 } 669 670 final String msg = String.format( 671 "End of input in a string value that began at " + 672 "position %d", markPos); 673 throw new IllegalArgumentException(msg); 674 } 675 else 676 { 677 loop: 678 while (currentPos < endPos) 679 { 680 final char c = filterString.charAt(currentPos); 681 switch (c) 682 { 683 case ' ': 684 case '(': 685 case ')': 686 break loop; 687 688 case '+': 689 case '-': 690 case '.': 691 case '0': 692 case '1': 693 case '2': 694 case '3': 695 case '4': 696 case '5': 697 case '6': 698 case '7': 699 case '8': 700 case '9': 701 case 'A': 702 case 'B': 703 case 'C': 704 case 'D': 705 case 'E': 706 case 'F': 707 case 'G': 708 case 'H': 709 case 'I': 710 case 'J': 711 case 'K': 712 case 'L': 713 case 'M': 714 case 'N': 715 case 'O': 716 case 'P': 717 case 'Q': 718 case 'R': 719 case 'S': 720 case 'T': 721 case 'U': 722 case 'V': 723 case 'W': 724 case 'X': 725 case 'Y': 726 case 'Z': 727 case 'a': 728 case 'b': 729 case 'c': 730 case 'd': 731 case 'e': 732 case 'f': 733 case 'g': 734 case 'h': 735 case 'i': 736 case 'j': 737 case 'k': 738 case 'l': 739 case 'm': 740 case 'n': 741 case 'o': 742 case 'p': 743 case 'q': 744 case 'r': 745 case 's': 746 case 't': 747 case 'u': 748 case 'v': 749 case 'w': 750 case 'x': 751 case 'y': 752 case 'z': 753 // These are all OK. 754 currentPos++; 755 break; 756 757 case '/': 758 case ':': 759 case ';': 760 case '<': 761 case '=': 762 case '>': 763 case '?': 764 case '@': 765 case '[': 766 case '\\': 767 case ']': 768 case '^': 769 case '_': 770 case '`': 771 // These are not allowed, but they are explicitly called out because 772 // they are included in the range of values between '-' and 'z', and 773 // making sure all possible characters are included can help make 774 // the switch statement more efficient. We'll fall through to the 775 // default clause to reject them. 776 default: 777 final String msg = String.format( 778 "Invalid character '%c' in a number or boolean value at " + 779 "position %d", 780 c, currentPos); 781 throw new IllegalArgumentException(msg); 782 } 783 } 784 785 final String s = filterString.substring(markPos, currentPos); 786 skipWhitespace(); 787 final Object value = JSONObject.stringToValue(s); 788 if (value.equals(JSONObject.NULL) || value instanceof String) 789 { 790 final String msg = String.format( 791 "Invalid filter value beginning at position %d", markPos); 792 throw new IllegalArgumentException(msg); 793 } 794 795 return s; 796 } 797 } 798 799 800 801 /** 802 * Determine if the end of the input has been reached. 803 * 804 * @return {@code true} if the end of the input has been reached. 805 */ 806 private boolean endOfInput() 807 { 808 return currentPos == endPos; 809 } 810 811 812 813 /** 814 * Skip over any whitespace at the current position. 815 */ 816 private void skipWhitespace() 817 { 818 while (currentPos < endPos && filterString.charAt(currentPos) == ' ') 819 { 820 currentPos++; 821 } 822 } 823}