001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.camel.util;
018
019import java.util.BitSet;
020import java.util.List;
021
022/**
023 * Encoder for unsafe URI characters.
024 * <p/>
025 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
026 */
027public final class UnsafeUriCharactersEncoder {
028    private static BitSet unsafeCharactersFastParser;
029    private static BitSet unsafeCharactersRfc1738;
030    private static BitSet unsafeCharactersHttp;
031    private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
032                                              'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
033
034    static {
035        unsafeCharactersFastParser = new BitSet(14);
036        unsafeCharactersFastParser.set(' ');
037        unsafeCharactersFastParser.set('"');
038        unsafeCharactersFastParser.set('<');
039        unsafeCharactersFastParser.set('>');
040        unsafeCharactersFastParser.set('%');
041        unsafeCharactersFastParser.set('{');
042        unsafeCharactersFastParser.set('}');
043        unsafeCharactersFastParser.set('|');
044        unsafeCharactersFastParser.set('\\');
045        unsafeCharactersFastParser.set('^');
046        unsafeCharactersFastParser.set('~');
047        unsafeCharactersFastParser.set('[');
048        unsafeCharactersFastParser.set(']');
049        unsafeCharactersFastParser.set('`');
050        // we allow # as a safe when using the fast parser as its used for
051        // looking up beans in the registry (foo=#myBar)
052    }
053
054    static {
055        unsafeCharactersRfc1738 = new BitSet(15);
056        unsafeCharactersRfc1738.set(' ');
057        unsafeCharactersRfc1738.set('"');
058        unsafeCharactersRfc1738.set('<');
059        unsafeCharactersRfc1738.set('>');
060        unsafeCharactersRfc1738.set('#');
061        unsafeCharactersRfc1738.set('%');
062        unsafeCharactersRfc1738.set('{');
063        unsafeCharactersRfc1738.set('}');
064        unsafeCharactersRfc1738.set('|');
065        unsafeCharactersRfc1738.set('\\');
066        unsafeCharactersRfc1738.set('^');
067        unsafeCharactersRfc1738.set('~');
068        unsafeCharactersRfc1738.set('[');
069        unsafeCharactersRfc1738.set(']');
070        unsafeCharactersRfc1738.set('`');
071    }
072
073    static {
074        unsafeCharactersHttp = new BitSet(13);
075        unsafeCharactersHttp.set(' ');
076        unsafeCharactersHttp.set('"');
077        unsafeCharactersHttp.set('<');
078        unsafeCharactersHttp.set('>');
079        unsafeCharactersHttp.set('#');
080        unsafeCharactersHttp.set('%');
081        unsafeCharactersHttp.set('{');
082        unsafeCharactersHttp.set('}');
083        unsafeCharactersHttp.set('|');
084        unsafeCharactersHttp.set('\\');
085        unsafeCharactersHttp.set('^');
086        unsafeCharactersHttp.set('~');
087        unsafeCharactersHttp.set('`');
088    }
089
090    private UnsafeUriCharactersEncoder() {
091        // util class
092    }
093
094    public static boolean isSafeFastParser(char ch) {
095        return !unsafeCharactersFastParser.get(ch);
096    }
097
098    public static String encode(String s) {
099        return encode(s, unsafeCharactersRfc1738);
100    }
101    
102    public static String encodeHttpURI(String s) {
103        return encode(s, unsafeCharactersHttp);
104    }
105    
106    public static String encode(String s, BitSet unsafeCharacters) {
107        return encode(s, unsafeCharacters, false);
108    }
109    
110    public static String encode(String s, boolean checkRaw) {
111        return encode(s, unsafeCharactersRfc1738, checkRaw);
112    }
113    
114    public static String encodeHttpURI(String s, boolean checkRaw) {
115        return encode(s, unsafeCharactersHttp, checkRaw);
116    }
117
118    // Just skip the encode for isRAW part
119    public static String encode(String s, BitSet unsafeCharacters, boolean checkRaw) {
120        if (s == null) {
121            return null;
122        }
123        int len = s.length();
124        if (len == 0) {
125            return s;
126        }
127
128        // first check whether we actually need to encode
129        boolean safe = true;
130        for (int i = 0; i < len; i++) {
131            char ch = s.charAt(i);
132            // just deal with the ascii character
133            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
134                safe = false;
135                break;
136            }
137        }
138        if (safe) {
139            return s;
140        }
141
142        List<Pair<Integer>> rawPairs = null;
143        if (checkRaw) {
144            rawPairs = URISupport.scanRaw(s);
145        }
146
147        // add a bit of extra space as initial capacity
148        int initial = len + 8;
149
150        // okay there are some unsafe characters so we do need to encode
151        // see details at: http://en.wikipedia.org/wiki/Url_encode
152        StringBuilder sb = new StringBuilder(initial);
153        for (int i = 0; i < len; i++) {
154            char ch = s.charAt(i);
155            if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
156                // special for % sign as it may be a decimal encoded value
157                if (ch == '%') {
158                    char next = i + 1 < len ? s.charAt(i + 1) : ' ';
159                    char next2 = i + 2 < len ? s.charAt(i + 2) : ' ';
160
161                    if (isHexDigit(next) && isHexDigit(next2) && !URISupport.isRaw(i, rawPairs)) {
162                        // its already encoded (decimal encoded) so just append as is
163                        sb.append(ch);
164                    } else {
165                        // must escape then, as its an unsafe character
166                        appendEscape(sb, (byte)ch);
167                    }
168                } else {
169                    // must escape then, as its an unsafe character
170                    appendEscape(sb, (byte)ch);
171                }
172            } else {
173                sb.append(ch);
174            }
175        }
176        return sb.toString();
177    }
178
179    private static void appendEscape(StringBuilder sb, byte b) {
180        sb.append('%');
181        sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
182        sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
183    }
184
185    private static boolean isHexDigit(char ch) {
186        // 0..9 A..F a..f
187        return ch >= 48 && ch <= 57 || ch >= 65 && ch <= 70 || ch >= 97 && ch <= 102;
188    }
189
190}