001/* URLDecoder.java -- Class to decode URL's from encoded form. 002 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. 003 004This file is part of GNU Classpath. 005 006GNU Classpath is free software; you can redistribute it and/or modify 007it under the terms of the GNU General Public License as published by 008the Free Software Foundation; either version 2, or (at your option) 009any later version. 010 011GNU Classpath is distributed in the hope that it will be useful, but 012WITHOUT ANY WARRANTY; without even the implied warranty of 013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014General Public License for more details. 015 016You should have received a copy of the GNU General Public License 017along with GNU Classpath; see the file COPYING. If not, write to the 018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 01902110-1301 USA. 020 021Linking this library statically or dynamically with other modules is 022making a combined work based on this library. Thus, the terms and 023conditions of the GNU General Public License cover the whole 024combination. 025 026As a special exception, the copyright holders of this library give you 027permission to link this library with independent modules to produce an 028executable, regardless of the license terms of these independent 029modules, and to copy and distribute the resulting executable under 030terms of your choice, provided that you also meet, for each linked 031independent module, the terms and conditions of the license of that 032module. An independent module is a module which is not derived from 033or based on this library. If you modify this library, you may extend 034this exception to your version of the library, but you are not 035obligated to do so. If you do not wish to do so, delete this 036exception statement from your version. */ 037 038package java.net; 039 040import gnu.java.lang.CPStringBuilder; 041 042import java.io.UnsupportedEncodingException; 043 044 045/** 046 * This utility class contains static methods that converts a 047 * string encoded in the x-www-form-urlencoded format to the original 048 * text. The x-www-form-urlencoded format replaces certain disallowed 049 * characters with encoded equivalents. All upper case and lower case 050 * letters in the US alphabet remain as is, the space character (' ') 051 * is replaced with '+' sign, and all other characters are converted to a 052 * "%XX" format where XX is the hexadecimal representation of that character 053 * in a given character encoding (default is "UTF-8"). 054 * <p> 055 * This method is very useful for decoding strings sent to CGI scripts 056 * 057 * Written using on-line Java Platform 1.2/1.4 API Specification. 058 * Status: Believed complete and correct. 059 * 060 * @since 1.2 061 * 062 * @author Warren Levy (warrenl@cygnus.com) 063 * @author Aaron M. Renn (arenn@urbanophile.com) (documentation comments) 064 * @author Mark Wielaard (mark@klomp.org) 065 */ 066public class URLDecoder 067{ 068 /** 069 * Public contructor. Note that this class has only static methods. 070 */ 071 public URLDecoder() 072 { 073 } 074 075 /** 076 * This method translates the passed in string from x-www-form-urlencoded 077 * format using the default encoding "UTF-8" to decode the hex encoded 078 * unsafe characters. 079 * 080 * @param s the String to convert 081 * 082 * @return the converted String 083 * 084 * @deprecated 085 */ 086 public static String decode(String s) 087 { 088 try 089 { 090 return decode(s, "UTF-8"); 091 } 092 catch (UnsupportedEncodingException uee) 093 { 094 // Should never happen since UTF-8 encoding should always be supported 095 return s; 096 } 097 } 098 099 /** 100 * This method translates the passed in string from x-www-form-urlencoded 101 * format using the given character encoding to decode the hex encoded 102 * unsafe characters. 103 * 104 * This implementation will decode the string even if it contains 105 * unsafe characters (characters that should have been encoded) or if the 106 * two characters following a % do not represent a hex encoded byte. 107 * In those cases the unsafe character or the % character will be added 108 * verbatim to the decoded result. 109 * 110 * @param s the String to convert 111 * @param encoding the character encoding to use the decode the hex encoded 112 * unsafe characters 113 * 114 * @return the converted String 115 * 116 * @exception UnsupportedEncodingException If the named encoding is not 117 * supported 118 * 119 * @since 1.4 120 */ 121 public static String decode(String s, String encoding) 122 throws UnsupportedEncodingException 123 { 124 // First convert all '+' characters to spaces. 125 String str = s.replace('+', ' '); 126 127 // Then go through the whole string looking for byte encoded characters 128 int i; 129 int start = 0; 130 byte[] bytes = null; 131 int length = str.length(); 132 CPStringBuilder result = new CPStringBuilder(length); 133 while ((i = str.indexOf('%', start)) >= 0) 134 { 135 // Add all non-encoded characters to the result buffer 136 result.append(str.substring(start, i)); 137 start = i; 138 139 // Get all consecutive encoded bytes 140 while ((i + 2 < length) && (str.charAt(i) == '%')) 141 i += 3; 142 143 // Decode all these bytes 144 if ((bytes == null) || (bytes.length < ((i - start) / 3))) 145 bytes = new byte[((i - start) / 3)]; 146 147 int index = 0; 148 try 149 { 150 while (start < i) 151 { 152 String sub = str.substring(start + 1, start + 3); 153 bytes[index] = (byte) Integer.parseInt(sub, 16); 154 index++; 155 start += 3; 156 } 157 } 158 catch (NumberFormatException nfe) 159 { 160 // One of the hex encoded strings was bad 161 } 162 163 // Add the bytes as characters according to the given encoding 164 result.append(new String(bytes, 0, index, encoding)); 165 166 // Make sure we skip to just after a % sign 167 // There might not have been enough encoded characters after the % 168 // or the hex chars were not actually hex chars (NumberFormatException) 169 if (start < length && s.charAt(start) == '%') 170 { 171 result.append('%'); 172 start++; 173 } 174 } 175 176 // Add any characters left 177 if (start < str.length()) 178 result.append(str.substring(start)); 179 180 return result.toString(); 181 } 182} // class URLDecoder