001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.util.BitSet; 020import java.util.List; 021 022/** 023 * Encoder for unsafe URI characters. 024 * <p/> 025 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article. 026 */ 027public final class UnsafeUriCharactersEncoder { 028 private static BitSet unsafeCharactersFastParser; 029 private static BitSet unsafeCharactersRfc1738; 030 private static BitSet unsafeCharactersHttp; 031 private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 032 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'}; 033 034 static { 035 unsafeCharactersFastParser = new BitSet(14); 036 unsafeCharactersFastParser.set(' '); 037 unsafeCharactersFastParser.set('"'); 038 unsafeCharactersFastParser.set('<'); 039 unsafeCharactersFastParser.set('>'); 040 unsafeCharactersFastParser.set('%'); 041 unsafeCharactersFastParser.set('{'); 042 unsafeCharactersFastParser.set('}'); 043 unsafeCharactersFastParser.set('|'); 044 unsafeCharactersFastParser.set('\\'); 045 unsafeCharactersFastParser.set('^'); 046 unsafeCharactersFastParser.set('~'); 047 unsafeCharactersFastParser.set('['); 048 unsafeCharactersFastParser.set(']'); 049 unsafeCharactersFastParser.set('`'); 050 // we allow # as a safe when using the fast parser as its used for 051 // looking up beans in the registry (foo=#myBar) 052 } 053 054 static { 055 unsafeCharactersRfc1738 = new BitSet(15); 056 unsafeCharactersRfc1738.set(' '); 057 unsafeCharactersRfc1738.set('"'); 058 unsafeCharactersRfc1738.set('<'); 059 unsafeCharactersRfc1738.set('>'); 060 unsafeCharactersRfc1738.set('#'); 061 unsafeCharactersRfc1738.set('%'); 062 unsafeCharactersRfc1738.set('{'); 063 unsafeCharactersRfc1738.set('}'); 064 unsafeCharactersRfc1738.set('|'); 065 unsafeCharactersRfc1738.set('\\'); 066 unsafeCharactersRfc1738.set('^'); 067 unsafeCharactersRfc1738.set('~'); 068 unsafeCharactersRfc1738.set('['); 069 unsafeCharactersRfc1738.set(']'); 070 unsafeCharactersRfc1738.set('`'); 071 } 072 073 static { 074 unsafeCharactersHttp = new BitSet(13); 075 unsafeCharactersHttp.set(' '); 076 unsafeCharactersHttp.set('"'); 077 unsafeCharactersHttp.set('<'); 078 unsafeCharactersHttp.set('>'); 079 unsafeCharactersHttp.set('#'); 080 unsafeCharactersHttp.set('%'); 081 unsafeCharactersHttp.set('{'); 082 unsafeCharactersHttp.set('}'); 083 unsafeCharactersHttp.set('|'); 084 unsafeCharactersHttp.set('\\'); 085 unsafeCharactersHttp.set('^'); 086 unsafeCharactersHttp.set('~'); 087 unsafeCharactersHttp.set('`'); 088 } 089 090 private UnsafeUriCharactersEncoder() { 091 // util class 092 } 093 094 public static boolean isSafeFastParser(char ch) { 095 return !unsafeCharactersFastParser.get(ch); 096 } 097 098 public static String encode(String s) { 099 return encode(s, unsafeCharactersRfc1738); 100 } 101 102 public static String encodeHttpURI(String s) { 103 return encode(s, unsafeCharactersHttp); 104 } 105 106 public static String encode(String s, BitSet unsafeCharacters) { 107 return encode(s, unsafeCharacters, false); 108 } 109 110 public static String encode(String s, boolean checkRaw) { 111 return encode(s, unsafeCharactersRfc1738, checkRaw); 112 } 113 114 public static String encodeHttpURI(String s, boolean checkRaw) { 115 return encode(s, unsafeCharactersHttp, checkRaw); 116 } 117 118 // Just skip the encode for isRAW part 119 public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) { 120 if (s == null) { 121 return null; 122 } 123 int len = s.length(); 124 if (len == 0) { 125 return s; 126 } 127 128 // first check whether we actually need to encode 129 boolean safe = true; 130 for (int i = 0; i < len; i++) { 131 char ch = s.charAt(i); 132 // just deal with the ascii character 133 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) { 134 safe = false; 135 break; 136 } 137 } 138 if (safe) { 139 return s; 140 } 141 142 List<Pair<Integer>> rawPairs = null; 143 if (checkRaw) { 144 rawPairs = URISupport.scanRaw(s); 145 } 146 147 // add a bit of extra space as initial capacity 148 int initial = len + 8; 149 150 // okay there are some unsafe characters so we do need to encode 151 // see details at: http://en.wikipedia.org/wiki/Url_encode 152 StringBuilder sb = new StringBuilder(initial); 153 for (int i = 0; i < len; i++) { 154 char ch = s.charAt(i); 155 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) { 156 // special for % sign as it may be a decimal encoded value 157 if (ch == '%') { 158 char next = i + 1 < len ? s.charAt(i + 1) : ' '; 159 char next2 = i + 2 < len ? s.charAt(i + 2) : ' '; 160 161 if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) { 162 // its already encoded (decimal encoded) so just append as is 163 sb.append(ch); 164 } else { 165 // must escape then, as its an unsafe character 166 appendEscape(sb, (byte)ch); 167 } 168 } else { 169 // must escape then, as its an unsafe character 170 appendEscape(sb, (byte)ch); 171 } 172 } else { 173 sb.append(ch); 174 } 175 } 176 return sb.toString(); 177 } 178 179 private static void appendEscape(StringBuilder sb, byte b) { 180 sb.append('%'); 181 sb.append(HEX_DIGITS[(b >> 4) & 0x0f]); 182 sb.append(HEX_DIGITS[(b >> 0) & 0x0f]); 183 } 184 185 private static boolean isHexDigit(char ch) { 186 // 0..9 A..F a..f 187 return ch >= 48 && ch <= 57 || ch >= 65 && ch <= 70 || ch >= 97 && ch <= 102; 188 } 189 190}