001package co.codewizards.cloudstore.core.util; 002 003import static java.util.Objects.*; 004 005import java.io.UnsupportedEncodingException; 006import java.nio.charset.Charset; 007import java.nio.charset.IllegalCharsetNameException; 008import java.nio.charset.StandardCharsets; 009import java.nio.charset.UnsupportedCharsetException; 010 011/** 012 * URL-decoder corresponding to {@link UrlEncoder}. 013 * <p> 014 * In contrast to the {@link java.net.URLDecoder URLDecoder}, this class therefore does <b>not</b> decode 015 * '+' (plus) into ' ' (space)! 016 * <p> 017 * Additionally, this class does not use the default encoding, but always UTF-8, if not specified 018 * otherwise. 019 * <p> 020 * The reason for this class is that {@link java.io.File#toURI() File.toURI()} 021 * does not encode a "+" sign. Therefore, our URL-encoding and decoding must 022 * not handle the "+" specifically. 023 * <p> 024 * Another reason is <a href="https://java.net/jira/browse/JERSEY-417">JERSEY-417</a>. 025 * I originally used {@code org.glassfish.jersey.uri.UriComponent.encode(String, Type)} 026 * at some code locations, but since not all code locations have a dependency on Jersey, 027 * I decided to switch consistently everywhere to {@link UrlEncoder} and {@code UrlDecoder}. 028 * <p> 029 * This class was copied from {@link java.net.URLDecoder URLDecoder} and changed to fit our needs. 030 * @see UrlEncoder 031 * @author Marco หงุ่ยตระกูล-Schulze - marco at codewizards dot co 032 */ 033public final class UrlDecoder { 034 035 private UrlDecoder() { 036 } 037 038 /** 039 * Decodes a {@code application/x-www-form-urlencoded} string using UTF-8. 040 * @param s the {@code String} to decode 041 * @return the newly decoded {@code String} 042 * @see UrlEncoder#encode(String) 043 */ 044 public static String decode(String s) { 045 String str = decode(s, StandardCharsets.UTF_8); 046 return str; 047 } 048 049 /** 050 * Decodes a {@code application/x-www-form-urlencoded} string using a specific 051 * encoding scheme. 052 * The supplied encoding is used to determine 053 * what characters are represented by any consecutive sequences of the 054 * form "<i>{@code %xy}</i>". 055 * <p> 056 * <em><strong>Note:</strong> The <a href= 057 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> 058 * World Wide Web Consortium Recommendation</a> states that 059 * UTF-8 should be used. Not doing so may introduce 060 * incompatibilities.</em> 061 * 062 * @param s the {@code String} to decode 063 * @param enc The name of a supported 064 * <a href="../lang/package-summary.html#charenc">character 065 * encoding</a>. 066 * @return the newly decoded {@code String} 067 * @exception UnsupportedEncodingException 068 * If character encoding needs to be consulted, but 069 * named character encoding is not supported 070 * @see UrlEncoder#encode(String, String) 071 * @deprecated UTF-8 should be used; it is thus recommended to invoke {@link #decode(String)} instead. 072 */ 073 @Deprecated 074 public static String decode(String s, String enc) throws UnsupportedEncodingException { 075 requireNonNull(s, "s"); 076 requireNonNull(enc, "enc"); 077 078 Charset charset; 079 try { 080 charset = Charset.forName(enc); 081 } catch (IllegalCharsetNameException e) { 082 throw new UnsupportedEncodingException(enc); 083 } catch (UnsupportedCharsetException e) { 084 throw new UnsupportedEncodingException(enc); 085 } 086 return decode(s, charset); 087 } 088 089 /** 090 * Decodes a {@code application/x-www-form-urlencoded} string using a specific 091 * encoding scheme. 092 * The supplied encoding is used to determine 093 * what characters are represented by any consecutive sequences of the 094 * form "<i>{@code %xy}</i>". 095 * <p> 096 * <em><strong>Note:</strong> The <a href= 097 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"> 098 * World Wide Web Consortium Recommendation</a> states that 099 * UTF-8 should be used. Not doing so may introduce 100 * incompatibilities.</em> 101 * 102 * @param s the {@code String} to decode 103 * @param charset The <a href="../lang/package-summary.html#charenc">character encoding</a>. 104 * @return the newly decoded {@code String} 105 * @exception UnsupportedEncodingException 106 * If character encoding needs to be consulted, but 107 * named character encoding is not supported 108 * @see UrlEncoder#encode(String, Charset) 109 * @deprecated UTF-8 should be used; it is thus recommended to invoke {@link #decode(String)} instead. 110 */ 111 @Deprecated 112 public static String decode(String s, Charset charset) { 113 requireNonNull(s, "s"); 114 requireNonNull(charset, "charset"); 115 116 boolean needToChange = false; 117 int numChars = s.length(); 118 StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars); 119 int i = 0; 120 121 char c; 122 byte[] bytes = null; 123 while (i < numChars) { 124 c = s.charAt(i); 125 switch (c) { 126 case '%': 127 /* 128 * Starting with this instance of %, process all 129 * consecutive substrings of the form %xy. Each 130 * substring %xy will yield a byte. Convert all 131 * consecutive bytes obtained this way to whatever 132 * character(s) they represent in the provided 133 * encoding. 134 */ 135 136 try { 137 138 // (numChars-i)/3 is an upper bound for the number 139 // of remaining bytes 140 if (bytes == null) 141 bytes = new byte[(numChars-i)/3]; 142 int pos = 0; 143 144 while ( ((i+2) < numChars) && 145 (c=='%')) { 146 int v = Integer.parseInt(s.substring(i+1,i+3),16); 147 if (v < 0) 148 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value"); 149 bytes[pos++] = (byte) v; 150 i+= 3; 151 if (i < numChars) 152 c = s.charAt(i); 153 } 154 155 // A trailing, incomplete byte encoding such as 156 // "%x" will cause an exception to be thrown 157 158 if ((i < numChars) && (c=='%')) 159 throw new IllegalArgumentException( 160 "URLDecoder: Incomplete trailing escape (%) pattern"); 161 162 sb.append(new String(bytes, 0, pos, charset)); 163 } catch (NumberFormatException e) { 164 throw new IllegalArgumentException( 165 "URLDecoder: Illegal hex characters in escape (%) pattern - " 166 + e.getMessage()); 167 } 168 needToChange = true; 169 break; 170 default: 171 sb.append(c); 172 i++; 173 break; 174 } 175 } 176 177 return (needToChange? sb.toString() : s); 178 } 179}