Source code

001package co.codewizards.cloudstore.core.util;
002
003import static java.util.Objects.*;
004
005import java.io.UnsupportedEncodingException;
006import java.nio.charset.Charset;
007import java.nio.charset.IllegalCharsetNameException;
008import java.nio.charset.StandardCharsets;
009import java.nio.charset.UnsupportedCharsetException;
010
011/**
012 * URL-decoder corresponding to {@link UrlEncoder}.
013 * <p>
014 * In contrast to the {@link java.net.URLDecoder URLDecoder}, this class therefore does <b>not</b> decode
015 * '+' (plus) into ' ' (space)!
016 * <p>
017 * Additionally, this class does not use the default encoding, but always UTF-8, if not specified
018 * otherwise.
019 * <p>
020 * The reason for this class is that {@link java.io.File#toURI() File.toURI()}
021 * does not encode a "+" sign. Therefore, our URL-encoding and decoding must
022 * not handle the "+" specifically.
023 * <p>
024 * Another reason is <a href="https://java.net/jira/browse/JERSEY-417">JERSEY-417</a>.
025 * I originally used {@code org.glassfish.jersey.uri.UriComponent.encode(String, Type)}
026 * at some code locations, but since not all code locations have a dependency on Jersey,
027 * I decided to switch consistently everywhere to {@link UrlEncoder} and {@code UrlDecoder}.
028 * <p>
029 * This class was copied from {@link java.net.URLDecoder URLDecoder} and changed to fit our needs.
030 * @see UrlEncoder
031 * @author Marco หงุ่ยตระกูล-Schulze - marco at codewizards dot co
032 */
033public final class UrlDecoder {
034
035        private UrlDecoder() {
036        }
037
038    /**
039     * Decodes a {@code application/x-www-form-urlencoded} string using UTF-8.
040     * @param s the {@code String} to decode
041     * @return the newly decoded {@code String}
042     * @see UrlEncoder#encode(String)
043     */
044        public static String decode(String s) {
045        String str = decode(s, StandardCharsets.UTF_8);
046        return str;
047    }
048
049    /**
050     * Decodes a {@code application/x-www-form-urlencoded} string using a specific
051     * encoding scheme.
052     * The supplied encoding is used to determine
053     * what characters are represented by any consecutive sequences of the
054     * form "<i>{@code %xy}</i>".
055     * <p>
056     * <em><strong>Note:</strong> The <a href=
057     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
058     * World Wide Web Consortium Recommendation</a> states that
059     * UTF-8 should be used. Not doing so may introduce
060     * incompatibilities.</em>
061     *
062     * @param s the {@code String} to decode
063     * @param enc   The name of a supported
064     *    <a href="../lang/package-summary.html#charenc">character
065     *    encoding</a>.
066     * @return the newly decoded {@code String}
067     * @exception  UnsupportedEncodingException
068     *             If character encoding needs to be consulted, but
069     *             named character encoding is not supported
070     * @see UrlEncoder#encode(String, String)
071     * @deprecated UTF-8 should be used; it is thus recommended to invoke {@link #decode(String)} instead.
072     */
073    @Deprecated
074        public static String decode(String s, String enc) throws UnsupportedEncodingException {
075        requireNonNull(s, "s");
076        requireNonNull(enc, "enc");
077
078        Charset charset;
079        try {
080            charset = Charset.forName(enc);
081        } catch (IllegalCharsetNameException e) {
082            throw new UnsupportedEncodingException(enc);
083        } catch (UnsupportedCharsetException e) {
084            throw new UnsupportedEncodingException(enc);
085        }
086        return decode(s, charset);
087    }
088
089    /**
090     * Decodes a {@code application/x-www-form-urlencoded} string using a specific
091     * encoding scheme.
092     * The supplied encoding is used to determine
093     * what characters are represented by any consecutive sequences of the
094     * form "<i>{@code %xy}</i>".
095     * <p>
096     * <em><strong>Note:</strong> The <a href=
097     * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
098     * World Wide Web Consortium Recommendation</a> states that
099     * UTF-8 should be used. Not doing so may introduce
100     * incompatibilities.</em>
101     *
102     * @param s the {@code String} to decode
103     * @param charset The <a href="../lang/package-summary.html#charenc">character encoding</a>.
104     * @return the newly decoded {@code String}
105     * @exception  UnsupportedEncodingException
106     *             If character encoding needs to be consulted, but
107     *             named character encoding is not supported
108     * @see UrlEncoder#encode(String, Charset)
109     * @deprecated UTF-8 should be used; it is thus recommended to invoke {@link #decode(String)} instead.
110     */
111    @Deprecated
112        public static String decode(String s, Charset charset) {
113        requireNonNull(s, "s");
114        requireNonNull(charset, "charset");
115
116        boolean needToChange = false;
117        int numChars = s.length();
118        StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
119        int i = 0;
120
121        char c;
122        byte[] bytes = null;
123        while (i < numChars) {
124            c = s.charAt(i);
125            switch (c) {
126            case '%':
127                /*
128                 * Starting with this instance of %, process all
129                 * consecutive substrings of the form %xy. Each
130                 * substring %xy will yield a byte. Convert all
131                 * consecutive  bytes obtained this way to whatever
132                 * character(s) they represent in the provided
133                 * encoding.
134                 */
135
136                try {
137
138                    // (numChars-i)/3 is an upper bound for the number
139                    // of remaining bytes
140                    if (bytes == null)
141                        bytes = new byte[(numChars-i)/3];
142                    int pos = 0;
143
144                    while ( ((i+2) < numChars) &&
145                            (c=='%')) {
146                        int v = Integer.parseInt(s.substring(i+1,i+3),16);
147                        if (v < 0)
148                            throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
149                        bytes[pos++] = (byte) v;
150                        i+= 3;
151                        if (i < numChars)
152                            c = s.charAt(i);
153                    }
154
155                    // A trailing, incomplete byte encoding such as
156                    // "%x" will cause an exception to be thrown
157
158                    if ((i < numChars) && (c=='%'))
159                        throw new IllegalArgumentException(
160                         "URLDecoder: Incomplete trailing escape (%) pattern");
161
162                    sb.append(new String(bytes, 0, pos, charset));
163                } catch (NumberFormatException e) {
164                    throw new IllegalArgumentException(
165                    "URLDecoder: Illegal hex characters in escape (%) pattern - "
166                    + e.getMessage());
167                }
168                needToChange = true;
169                break;
170            default:
171                sb.append(c);
172                i++;
173                break;
174            }
175        }
176
177        return (needToChange? sb.toString() : s);
178    }
179}