sunlabs.brazil.util.http.HttpRequest Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of sunlabs.brazil Show documentation
Extremely small footprint Java HTTP stack.
The newest version!
/*
 * HttpRequest.java
 *
 * Brazil project web application toolkit,
 * export version: 2.3 
 * Copyright (c) 1999-2007 Sun Microsystems, Inc.
 *
 * Sun Public License Notice
 *
 * The contents of this file are subject to the Sun Public License Version 
 * 1.0 (the "License"). You may not use this file except in compliance with 
 * the License. A copy of the License is included as the file "license.terms",
 * and also available at http://www.sun.com/
 * 
 * The Original Code is from:
 *    Brazil project web application toolkit release 2.3.
 * The Initial Developer of the Original Code is: cstevens.
 * Portions created by cstevens are Copyright (C) Sun Microsystems, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s): cstevens, drach, suhler.
 *
 * Version:  2.7
 * Created by cstevens on 99/09/15
 * Last modified by suhler on 07/03/26 13:53:18
 *
 * Version Histories:
 *
 * 2.7 07/03/26-13:53:18 (suhler)
 *   doc updates
 *
 * 2.6 07/03/26-13:44:17 (suhler)
 *   add sample main() to act as a simple "wget"
 *
 * 2.5 04/11/30-15:19:40 (suhler)
 *   fixed sccs version string
 *
 * 2.4 03/08/01-16:18:01 (suhler)
 *   fixes for javadoc
 *
 * 2.3 03/05/12-16:26:13 (suhler)
 *   Merged changes between child workspace "/home/suhler/brazil/naws" and
 *   parent workspace "/net/mack.eng/export/ws/brazil/naws".
 *
 * 1.20.1.1 03/04/17-10:03:47 (suhler)
 *   no changes made
 *
 * 2.2 03/04/15-17:29:08 (drach)
 *   Add protected modifier to variable connected so subclasses outside
 *   package can access it.
 *
 * 2.1 02/10/01-16:36:54 (suhler)
 *   version change
 *
 * 1.20 02/07/23-08:31:15 (suhler)
 *   check for no content type
 *
 * 1.19 02/07/11-15:37:33 (suhler)
 *   add encoding diagnostics
 *
 * 1.18 02/07/11-15:03:40 (suhler)
 *   add getContent() and getEncoding() convenience methods for dealing
 *   with charset encoding
 *
 * 1.17 02/04/29-17:04:41 (suhler)
 *   added public static boolean displayAllHeaders to turn on
 *   http header debugging during development.
 *
 * 1.16 02/04/24-13:36:21 (suhler)
 *   doc lint
 *
 * 1.15 02/02/26-14:42:14 (suhler)
 *   doc lint
 *
 * 1.14 02/02/26-14:32:38 (suhler)
 *   typo
 *
 * 1.13 02/02/26-14:25:52 (suhler)
 *   added "addHeaders" convenience method for adding http headers from
 *   properties objects
 *
 * 1.12 00/07/11-11:23:47 (cstevens)
 *   Some servers send "HTTP/1.0 100 Continue" in response to an HTTP/1.1 POST!
 *
 * 1.11 00/07/06-15:03:10 (cstevens)
 *   Although HTTP/1.1 chunking spec says that there is one "\r\n" between
 *   chunks, some servers (for example, maps.yahoo.com) send more than one blank
 *   line between chunks.  So, read and skip all the blank lines seen between
 *   chunks.
 *
 * 1.10 99/11/30-09:48:14 (suhler)
 *   remove diagnostics
 *
 * 1.9 99/11/09-20:23:23 (cstevens)
 *   bugs revealed by writing tests.
 *
 * 1.8 99/10/26-18:56:38 (cstevens)
 *   Change MimeHeaders so it uses "put" instead of "set", to be compatible with
 *   names chosen by Hashtable and StringMap.
 *
 * 1.7 99/10/14-14:16:31 (cstevens)
 *   merge issues.
 *
 * 1.6 99/10/14-13:19:18 (cstevens)
 *   Merged changes between child workspace "/home/cstevens/ws/brazil/naws" and
 *   parent workspace "/export/ws/brazil/naws".
 *
 * 1.4.1.2 99/10/14-13:01:06 (cstevens)
 *   Documentation.
 *   Fold TimedThread and the default HttpSocketPool into this file, since they are
 *   not used outside of this file (at this time).
 *
 * 1.5 99/10/11-12:38:38 (suhler)
 *   Merged changes between child workspace "/home/suhler/brazil/naws" and
 *   parent workspace "/net/mack.eng/export/ws/brazil/naws".
 *
 * 1.4.1.1 99/10/08-16:54:45 (cstevens)
 *   documentation
 *   Move logic for removing point-to-point headers into the HttpRequest as a
 *   static method.
 *
 * 1.4 99/10/07-13:17:55 (cstevens)
 *   Documentation for HttpRequest (in progress).
 *
 * 1.3.1.1 99/10/06-12:31:57 (suhler)
 *   comment out debugging
 *
 * 1.3 99/09/15-15:57:16 (cstevens)
 *   debugging
 *
 * 1.2 99/09/15-14:52:02 (cstevens)
 *   import *;
 *
 * 1.2 99/09/15-14:39:36 (Codemgr)
 *   SunPro Code Manager data about conflicts, renames, etc...
 *   Name history : 2 1 request/HttpRequest.java
 *   Name history : 1 0 util/http/HttpRequest.java
 *
 * 1.1 99/09/15-14:39:35 (cstevens)
 *   date and time created 99/09/15 14:39:35 by cstevens
 *
 */

package sunlabs.brazil.util.http;

import sunlabs.brazil.server.Server;

import sunlabs.brazil.util.SocketFactory;
import sunlabs.brazil.util.regexp.Regexp;

import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Properties;
import java.util.Vector;
import java.util.StringTokenizer;

/**
 * Sends an HTTP request to some target host and gets the answer back.
 * Similar to the URLConnection class.
 * 
 * Caches connections to hosts, and reuses them if possible.  Talks
 * HTTP/1.1 to the hosts, in order to keep alive connections as much
 * as possible.
 * 

 * The sequence of events for using an HttpRequest is similar
 * to how URLConnection is used:
 * 

 *  A new HttpRequest object is constructed.
 * 
 The setup parameters are modified:
 *     
 *      {@link #setMethod setMethod}
 *     
 {@link #setRequestHeader setRequestHeader}
 *     
 {@link #getOutputStream getOutputStream}
 *     
 * 
 The host (or proxy) is contacted and the HTTP request is issued:
 *     
 *      {@link #connect connect}
 *     
 {@link #getInputStream getInputStream}
 *     
 * 
 The response headers and body are examined:
 *     
 *      {@link #getResponseCode getResponseCode}
 *     
 {@link #getResponseHeader getResponseHeader}
 *     
 {@link #getContentLength getContentLength}
 *     
 * 
 The connection is closed:
 *     
 *      {@link #close close}
 *     
 * 
 * 
 * In the common case, all the setup parameters are initialized to sensible
 * values and won't need to be modified.  Most users will only need to
 * construct a new HttpRequest object and then call
 * getInputStream to read the contents.  The rest of the
 * member variables and methods are only needed for advanced behavior.
 * 

 * The HttpRequest class is intended to be a replacement for the
 * URLConnection class.  It operates at a lower level and makes
 * fewer decisions on behavior.  Some differences between the
 * HttpRequest class and the URLConnection class
 * follow: 

 *  there are no undocumented global variables (specified in
 *	System.getProperties) that modify the behavior of
 *	HttpRequest.
 * 
 HttpRequest does not automatically follow redirects.
 * 
 HttpRequest does not turn HTTP responses with a status
 *	code other than "200 OK" into IOExceptions.  Sometimes
 *	it may be necessary and even quite useful to examine the results of
 *	an "unsuccessful" HTTP request.
 * 
 HttpRequest issues HTTP/1.1 requests and handles
 *	HTTP/0.9, HTTP/1.0, and HTTP/1.1 responses.
 * 
 the URLConnection class leaks open sockets if there is
 *	an error reading the response or if the target does not use
 *	Keep-Alive, and depends upon the garabge collector to close and
 *	release the open socket in these cases, which is unreliable because
 *	it may lead to intermittently running out of sockets if the garbage
 *	collector doesn't run often enough.
 * 
 If the user doesn't read all the data from an
 *	URLConnection, there are bugs in its implementation
 *	(as of JDK1.2) that may cause the program to block forever and/or
 *	read an insufficient amount of data before trying to reuse the
 *	underlying socket.
 * 
 * 
 * A number of the fields in the HttpRequest object are public,
 * by design.  Most of the methods mentioned above are convenience methods;
 * the underlying data fields are meant to be accessed for more complicated
 * operations, such as changing the socket factory or accessing the raw HTTP
 * response line.  Note however, that the order of the methods described
 * above is important.  For instance, the user cannot examine the response
 * headers (by calling getResponseHeader or by examining the
 * variable responseHeaders) without first having connected to
 * the host.
 * 

 * However, if the user wants to modify the default behavior, the
 * HttpRequest uses the value of a number of variables and
 * automatically sets some HTTP headers when sending the request.  The user
 * can change these settings up until the time connect is
 * called, as follows: 

 *  variable {@link #version}
 *	
 By default, the HttpRequest issues HTTP/1.1
 *	requests.  The user can set version to change this to
 *	HTTP/1.0.
 * 
 variable {@link #method}
 *	
 If method is null (the default),
 *	the HttpRequest decides what the HTTP request method
 *	should be as follows:  If the user has called
 *	getOutputStream, then the method will be "POST",
 *	otherwise the method will be "GET".
 * 
 variable {@link #proxyHost}
 *	
 If the proxy host is specified, the HTTP request will be
 *	sent via the specified proxy: 
 *	 connect opens a connection to the proxy.
 *	
 uses the "Proxy-Connection" header to keep alive the connection.
 *	
 sends a fully qualified URL in the request line, for example
 *	    "http://www.foo.com/index.html".  The fully qualified URL
 *	    tells the proxy to forward the request to the specified host.
 *      
 *	Otherwise, the HTTP request will go directly to the host: 
 *	 connect opens a connection to the remote host.
 *	
 uses the "Connection" header to keep alive the connection.
 *	
 sends a host-relative URL in the request line, for example
 *	    "/index.html".  The relative URL is derived from the fully
 *	    qualified URL used to construct this HttpRequest.
 *	
 * 
 header "Connection" or "Proxy-Connection"
 *	
 The HttpRequest sets the appropriate connection
 *	header to "Keep-Alive" to keep alive the connection to the host or
 *	proxy (respectively).  By setting the appropriate connection header,
 *	the user can control whether the HttpRequest tries to
 *	use Keep-Alives.
 * 
 header "Host"
 *	
 The HTTP/1.1 protocol requires that the "Host" header be set
 *	to the name of the machine being contacted.  By default, this is
 *	derived from the URL used to construct the HttpRequest,
 *	and is set automatically if the user does not set it.
 * 
 header "Content-Length"
 *	
 If the user calls getOutputStream and writes some
 *	data to it, the "Content-Length" header will be set to the amount of
 *	data that has been written at the time that connect
 *	is called.
 * 
 * 
 * Once all data has been read from the remote host, the underlying socket
 * may be automatically recycled and used again for subsequent requests to
 * the same remote host.  If the user is not planning on reading all the data
 * from the remote host, the user should call close to release
 * the socket.  Although it happens under the covers, the user should be
 * aware that if an IOException occurs or once data has been read normally
 * from the remote host, close is called automatically.  This
 * is to ensure that the minimal number of sockets are left open at any time.
 * 
 * The input stream that getInputStream provides automatically
 * hides whether the remote host is providing HTTP/1.1 "chunked" encoding or
 * regular streaming data.  The user can simply read until reaching the
 * end of the input stream, which signifies that all the available data from
 * this request has been read.  If reading from a "chunked" source, the
 * data is automatically de-chunked as it is presented to the user.  Currently,
 * no access is provided to the underlying raw input stream.
 *
 * @author      Colin Stevens ([email protected])
 * @version		2.7
 */
public class HttpRequest
{
    /**
     * Timeout (in msec) to drain an input stream that has been closed before
     * the entire HTTP response has been read.
     * 

     * If the user closes the HttpRequest before reading all of
     * the data, but the remote host has agreed to keep this socket alive, we
     * need to read and discard the rest of the response before issuing a new
     * request.  If it takes longer than DRAIN_TIMEOUT to read
     * and discard the data, we will just forcefully close the connection to
     * the remote host rather than waiting to read any more.
     * 

     * Default value is 10000.
     */
    public static int DRAIN_TIMEOUT = 10000;

    /**
     * Maximum length of a line in the HTTP response headers (sanity check).
     * 

     * If an HTTP response line is longer than this, the response is
     * considered to be malformed.
     * 

     * Default value is 1000.
     */
    public static int LINE_LIMIT = 1000;

    /**
     * The default HTTP version string to send to the remote host when
     * issuing requests.
     * 

     * The default value can be overridden on a per-request basis by
     * setting the version instance variable.
     * 

     * Default value is "HTTP/1.1".
     *
     * @see #version
     */
    public static String defaultHTTPVersion = "HTTP/1.1";

    /**
     * The default proxy host for HTTP requests.  If non-null,
     * then all new HTTP requests will be sent via this proxy.  If
     * null, then all new HTTP requests are sent directly to
     * the host specified when the HttpRequest object was
     * constructed.
     * 

     * The default value can be overridden on a per-request basis by
     * calling the setProxy method or setting the
     * proxyHost instance variables.
     * 

     * Default value is null.
     *
     * @see #defaultProxyPort
     * @see #proxyHost
     * @see #setProxy
     */
    public static String defaultProxyHost = null;

    /**
     * The default proxy port for HTTP requests.  
     * 

     * Default value is 80.
     *
     * @see #defaultProxyHost
     * @see #proxyPort
     */
    public static int defaultProxyPort = 80;

    /**
     * The factory for constructing new Sockets objects used to connect to
     * remote hosts when issuing HTTP requests.  The user can set this
     * to provide a new type of socket, such as SSL sockets.
     * 

     * Default value is null, which signifies plain sockets.
     */
    public static SocketFactory socketFactory = null;

    /**
     * The cache of idle sockets.  Once a request has been handled, the
     * now-idle socket can be remembered and reused later if another HTTP
     * request is made to the same remote host.  
     */
    public static HttpSocketPool pool = new SimpleHttpSocketPool();

    /**
     * The URL used to construct this HttpRequest.
     */
    public URL url;

    /**
     * The host extracted from the URL used to construct this
     * HttpRequest.
     *
     * @see #url
     */
    public String host;

    /**
     * The port extracted from the URL used to construct this
     * HttpRequest.
     *
     * @see #url
     */
    public int port;

    /**
     * If non-null, sends this HTTP request via the specified
     * proxy host and port.
     * 

     * Initialized from defaultProxyHost, but may be changed
     * by the user at any time up until the HTTP request is actually sent.
     * 
     * @see #defaultProxyHost
     * @see #proxyPort
     * @see #setProxy
     * @see #connect
     */
    public String proxyHost;

    /**
     * The proxy port.
     *
     * @see #proxyHost
     */
    public int proxyPort;

    protected boolean connected;
    boolean eof;
    HttpSocket hs;

    /**
     * The HTTP method, such as "GET", "POST", or "HEAD".
     * 

     * May be set by the user at any time up until the HTTP request is
     * actually sent.
     */
    public String method;

    /**
     * The HTTP version string.
     * 

     * Initialized from defaultHTTPVersion, but may be changed
     * by the user at any time up until the HTTP request is actually sent.
     */
    public String version;

    /**
     * The headers for the HTTP request.  All of these headers will be sent
     * when the connection is actually made.
     */
    public MimeHeaders requestHeaders;

    /**
     * setting this to "true" causing all http headers to be printed
     * on the standard error stream;  useful for debugging client/server
     * interactions.
     */
    public static boolean displayAllHeaders = false;

    ByteArrayOutputStream postData;

    String uri;
    String connectionHeader;

    HttpInputStream in;
    InputStream under;

    /**
     * The status line from the HTTP response.  This field is not valid until
     * after connect has been called and the HTTP response has
     * been read.
     */
    public String status;

    /**
     * The headers that were present in the HTTP response.  This field is
     * not valid until after connect has been called and the
     * HTTP response has been read.
     */
    public MimeHeaders responseHeaders;

    /*
     * Cached value of keep-alive from the response headers.
     */
    boolean keepAlive;

    /**
     * An artifact of HTTP/1.1 chunked encoding.  At the end of an HTTP/1.1
     * chunked response, there may be more MimeHeaders.  It is only possible
     * to access these MimeHeaders after all the data from the input stream
     * returned by getInputStream has been read.  At that point,
     * this field will automatically be initialized to the set of any headers
     * that were found.  If not reading from an HTTP/1.1 chunked source, then
     * this field is irrelevant and will remain null.
     */
    public MimeHeaders responseTrailers;

    /**
     * Creates a new HttpRequest object that will send an
     * HTTP request to fetch the resource represented by the URL.
     * 

     * The host specified by the URL is not contacted at this time.
     *
     * @param	url
     *		A fully qualified "http:" URL.
     *
     * @throws	IllegalArgumentException
     *		if url is not an "http:" URL.
     */
    public
    HttpRequest(URL url)
    {
	if (url.getProtocol().equals("http") == false) {
	    throw new IllegalArgumentException(url.toString());
	}

	this.url = url;

	this.host = url.getHost();
	this.port = url.getPort();
	if (this.port < 0) {
	    this.port = 80;
	}
	this.proxyHost = defaultProxyHost;
	this.proxyPort = defaultProxyPort;

	this.version = defaultHTTPVersion;
	this.requestHeaders = new MimeHeaders();
	this.responseHeaders = new MimeHeaders();
    }

    /**
     * Creates a new HttpRequest object that will send an
     * HTTP request to fetch the resource represented by the URL.
     * 

     * The host specified by the URL is not contacted at this time.
     *
     * @param	url
     *		A string representing a fully qualified "http:" URL.
     *
     * @throws	IllegalArgumentException
     *		if url is not a well-formed "http:" URL.
     */
    public
    HttpRequest(String url)
    {
	this(toURL(url));
    }

    /*
     * Artifact of Java: cannot implement HttpRequest(String) as follows
     * because this(new URL(url)) must be first line in
     * constructor; it can't be inside of try statement:
     *
     * public HttpRequest(String url) {
     *     try {
     *         this(new URL(url));
     *     } catch (MalformedURLException e) {
     *         throw new IllegalArgumentException(url);
     *     }
     * }
     */
    private static URL
    toURL(String url)
    {
	try {
	    return new URL(url);
	} catch (MalformedURLException e) {
	    throw new IllegalArgumentException(url);
	}
    }

    /**
     * Sets the HTTP method to the specified value.  Some of the normal
     * HTTP methods are "GET", "POST", "HEAD", "PUT", "DELETE", but the
     * user can set the method to any value desired.
     * 

     * If this method is called, it must be called before connect
     * is called.  Otherwise it will have no effect.
     *
     * @param	method
     *		The string for the HTTP method, or null to
     *		allow this HttpRequest to pick the method for
     *		itself.
     */
    public void
    setMethod(String method)
    {
	this.method = method;
    }

    /**
     * Sets the proxy for this request.  The HTTP proxy request will be sent
     * to the specified proxy host.
     * 

     * If this method is called, it must be called before connect
     * is called.  Otherwise it will have no effect.
     *
     * @param	proxyHost
     *		The proxy that will handle the request, or null
     *		to not use a proxy.
     *
     * @param	proxyPort
     *		The port on the proxy, for the proxy request.  Ignored if
     *		proxyHost is null.
     */
    public void
    setProxy(String proxyHost, int proxyPort)
    {
	this.proxyHost = proxyHost;
	this.proxyPort = proxyPort;
    }

    /**
     * Sets a request header in the HTTP request that will be issued.  In
     * order to do fancier things like appending a value to an existing
     * request header, the user may directly access the 
     * requestHeaders variable.
     * 

     * If this method is called, it must be called before connect
     * is called.  Otherwise it will have no effect.
     *
     * @param	key
     *		The header name.  
     *
     * @param	value
     *		The value for the request header.
     *
     * @see	#requestHeaders
     */
    public void
    setRequestHeader(String key, String value)
    {
	requestHeaders.put(key, value);
    }

    /**
     * Gets an output stream that can be used for uploading data to the
     * host.  
     * 

     * If this method is called, it must be called before connect
     * is called.  Otherwise it will have no effect.
     * 

     * Currently the implementation is not as good as it could be.  The
     * user should avoid uploading huge amounts of data, for some definition
     * of huge.
     */
    public OutputStream
    getOutputStream()
	throws IOException
    {
	if (postData == null) {
	    postData = new ByteArrayOutputStream();
	}
	return postData;
    }

    /**
     * Connect to the target host (or proxy), send the request, and read the
     * response headers.  Any setup routines must be called before the call
     * to this method, and routines to examine the result must be called after
     * this method.
     * 

     *
     * @throws	UnknownHostException
     *		if the target host (or proxy) could not be contacted.
     *
     * @throws	IOException
     *		if there is a problem writing the HTTP request or reading
     *		the HTTP response headers.
     */
    public void
    connect()
	throws UnknownHostException, IOException
    {
	if (connected) {
	    return;
	}
	connected = true;
	
	prepareHeaders();
	openSocket(true);
	try {
	    try {
		sendRequest();
		readStatusLine();
	    } catch (IOException e) {
		if (hs.firstTime) {
		    throw e;
		}
		closeSocket(false);
		openSocket(false);
		sendRequest();
		readStatusLine();
	    }
	    responseHeaders.read(in);

            if (displayAllHeaders) {
               System.err.println(status);
               responseHeaders.print(System.err);
               System.err.println();
	    }
	} catch (IOException e) {
	    closeSocket(false);
	    throw e;
	}
	parseResponse();
    }

    void
    prepareHeaders()
    {
	if (postData != null) {
	    if (method == null) {
		method = "POST";
	    }
	    setRequestHeader("Content-Length",
		    Integer.toString(postData.size()));
	}
	if (method == null) {
	    method = "GET";
	}

	if (proxyHost == null) {
	    uri = url.getFile();
	    connectionHeader = "Connection";
	} else {
	    uri = url.toString();
	    connectionHeader = "Proxy-Connection";
	}

	requestHeaders.putIfNotPresent(connectionHeader, "Keep-Alive");
	requestHeaders.putIfNotPresent("Host", host + ":" + port);
    }

    void
    openSocket(boolean reuse)
	throws IOException
    {
	String targetHost;
	int targetPort;

	if (proxyHost != null) {
	    targetHost = proxyHost;
	    targetPort = proxyPort;
	} else {
	    targetHost = host;
	    targetPort = port;
	}

	hs = pool.get(targetHost, targetPort, reuse);
	under = hs.in;
	in = new HttpInputStream(under);
    }

    void
    closeSocket(boolean reuse)
    {
	if (hs != null) {
	    HttpSocket tmp = hs;
	    hs = null;

	    keepAlive &= reuse;

	    /*
	     * Before we can reuse a keep-alive socket, we must first drain
	     * the input stream if there is any data left in it.  The soft
	     * 'eof' flag will have been set if we have already read all the
	     * data that we're supposed to read and the socket is ready to be
	     * recycled now.
	     */

	    if (keepAlive && !eof) {
		new BackgroundCloser(tmp, under, DRAIN_TIMEOUT).start();
	    } else {
		pool.close(tmp, keepAlive);
	    }
	}
    }

    class BackgroundCloser extends Thread 
    {
	HttpSocket hs;
	InputStream in;
	int timeout;
	Killer killer;

	BackgroundCloser(HttpSocket hs, InputStream in, int timeout)
	{
	    this.hs = hs;
	    this.in = in;
	    this.timeout = timeout;
	}

	public void start()
	{
	    killer = new Killer(this);
	    killer.start();
	    super.start();
	}

	public void run()
	{
	    try {
		byte[] buf = new byte[4096];

		while (true) {
		    if (in.read(buf, 0, buf.length) < 0) {
			break;
		    }
		}
	    } catch (IOException e) {
		keepAlive = false;
	    }
	    pool.close(hs, keepAlive);
	    killer.interrupt();
	}
    }

    static class Killer extends Thread
    {
	BackgroundCloser b;
	int timeout;
	
	Killer(BackgroundCloser b)
	{
	    this.b = b;
	}
	public void run()
	{
	    try {
		Thread.sleep(b.timeout);
		b.interrupt();
	    } catch (Exception e) {}
	}
    }

    void
    sendRequest()
	throws IOException
    {
        if (displayAllHeaders) {
            System.err.print(method + " " + uri + " " + version + "\r\n");
            requestHeaders.print(System.err);
            System.err.print("\r\n");
        }

	PrintStream p = new PrintStream(hs.out);
	p.print(method + " " + uri + " " + version + "\r\n");
	requestHeaders.print(p);
	p.print("\r\n");

	if (postData != null) {
	    postData.writeTo(p);
	    postData = null;			// Release memory.
	}
	p.flush();
    }

    void
    readStatusLine()
	throws IOException
    {
	while (true) {
	    status = in.readLine(LINE_LIMIT);
	    if (status == null) {
		throw new EOFException();
	    }
	    if (status.startsWith("HTTP/1.1 100")
                    || status.startsWith("HTTP/1.0 100")) {
		/*
		 * Ignore the "100 Continue" response that some HTTP/1.1
		 * servers send.  We can't depend upon it being sent, because
		 * we might be talking to an HTTP/1.0 server or an HTTP/1.1
		 * server that doesn't send the "100 Continue" response, so
		 * we can't use the response for any decision making, such as
		 * not sending the post data.
                 *
                 * www.u-net.com sends "HTTP/1.0 100 Continue"!
		 */

		while (true) {
		    status = in.readLine();
		    if ((status == null) || (status.length() == 0)) {
			break;
		    }
		}
	    } else if (status.startsWith("HTTP/1.")) {
		return;
	    } else if (status.length() == 0) {
//		System.out.println(this + ": got a blank line");
	    } else if (status.length() == LINE_LIMIT) {
		throw new IOException("malformed server response");
	    } else if (hs.firstTime) {
		/*
		 * Some servers don't send back any headers, even if they
		 * accept a HTTP/1.0 or greater request!  We have to push
		 * back this line, so it can be re-read as the body.
		 * Since this is coming back with no headers, the content
		 * length will be unknown and so the socket will be closed.
		 */

// System.out.println("receiving HTTP/0.9 response");
		PushbackInputStream pin = new PushbackInputStream(hs.in,
			status.length() + 4);

		pin.unread('\n'); 
		pin.unread('\r');
		for (int i = status.length(); --i >= 0; ) {
		    pin.unread(status.charAt(i));
		}

		/*
		 * And push back a blank line, so the user thinks it got to
		 * the end of the headers
		 */
		pin.unread('\n');
		pin.unread('\r');

		status = "HTTP/1.0 200 OK";
		hs.in = pin;
		under = pin;
		in = new HttpInputStream(under);
		break;
	    } else {
		/*
		 * If we see funny responses (missing headers, etc.) from a
		 * socket that we've reused, then we probably got out of sync
		 * with the remote host (e.g., didn't read enough from the
		 * last response), and should abort this request.
		 */

		throw new IOException("malformed server response");
	    }
	}
    }

    void
    parseResponse()
    {
	String str;

	str = getResponseHeader(connectionHeader);
	if (str != null) {
	    keepAlive = str.equalsIgnoreCase("Keep-Alive");
	} else if (status.startsWith("HTTP/1.1")) {
	    keepAlive = true;
	} else {
	    keepAlive = false;
	}

	str = getResponseHeader("Transfer-Encoding");
	if ((str != null) && str.equals("chunked")) {
	    under = new UnchunkingInputStream(this);
	    in = new RecycleInputStream(this, under);
	    return;
	}

	int contentLength = getContentLength();
	if (contentLength < 0) {
	    /*
	     * Some servers leave off the content length for return codes
	     * known to require no content.
	     */

	    if (status.indexOf("304") > 0 || status.indexOf("204") > 0) {
		responseHeaders.put("Content-Length", "0");
		contentLength = 0;
	    }
	}

	if ((contentLength == 0) || method.equals("HEAD")) {
	    under = new NullInputStream();
	    in = new HttpInputStream(under);
	    closeSocket(keepAlive);
	} else if (contentLength > 0) {
	    under = new LimitInputStream(this, contentLength);
	    in = new RecycleInputStream(this, under);
	} else {
	    keepAlive = false;
	    in = new RecycleInputStream(this, under);
	}
    }

    /**
     * Gets an input stream that can be used to read the body of the
     * HTTP response.  Unlike the other convenience methods for accessing
     * the HTTP response, this one automatically connects to the
     * target host if not already connected.
     * 

     * The input stream that getInputStream provides
     * automatically hides the differences between "Content-Length", no
     * "Content-Length", and "chunked" for HTTP/1.0 and HTTP/1.1 responses.
     * In all cases, the user can simply read until reaching the end of the
     * input stream, which signifies that all the available data from this
     * request has been read.  (If reading from a "chunked" source, the data
     * is automatically de-chunked as it is presented to the user.  There is
     * no way to access the raw underlying stream that contains the HTTP/1.1
     * chunking packets.)
     *
     * @throws	IOException
     *		if there is problem connecting to the target.
     *
     * @see	#connect
     */
    public HttpInputStream
    getInputStream()
	throws IOException
    {
	connect();
	return in;
    }

    /**
     * Gracefully closes this HTTP request when user is done with it.
     * 

     * The user can either call this method or close on the
     * input stream obtained from the getInputStream
     * method -- the results are the same.
     * 

     * When all the response data is read from the input stream, the
     * input stream is automatically closed (recycled).  If the user is
     * not going to read all the response data from input stream, the user
     * must call close to
     * release the resources associated with the open request.  Otherwise
     * the program may consume all available sockets, waiting forever for
     * the user to finish reading.
     * 

     * Note that the input stream is automatically closed if the input
     * stream throws an exception while reading.
     * 

     * In order to interrupt a pending I/O operation in another thread
     * (for example, to stop a request that is taking too long), the user
     * should call disconnect or interrupt the blocked thread.
     * The user should not call close in this case because
     * close will not interrupt the pending I/O operation.
     * 

     * Closing the request multiple times is allowed.
     * 

     * In order to make sure that open sockets are not left lying around
     * the user should use code similar to the following:
     * 
     * OutputStream out = ...
     * HttpRequest http = new HttpRequest("http://bob.com/index.html");
     * try {
     *     HttpInputStream in = http.getInputStream();
     *     in.copyTo(out);
     * } finally {
     *     // Copying to "out" could have failed.  Close "http" in case
     *     // not all the data has been read from it yet.
     *     http.close();
     * }
     * 
     */

    public void
    close()
    {
	closeSocket(true);
    }

    /**
     * Interrupts this HTTP request.  Can be used to halt an in-progress
     * HTTP request from another thread, by causing it to
     * throw an InterruptedIOException during the connect
     * or while reading from the input stream, depending upon what state
     * this HTTP request is in when it is disconnected.
     *
     * @see	#close
     */
    public void
    disconnect()
    {
	closeSocket(false);
    }

    /**
     * Gets the HTTP response status code.  From responses like:
     *      * HTTP/1.0 200 OK
     * HTTP/1.0 401 Unauthorized
     * 
     * this method extracts the integers 200 and 401
     * respectively.  Returns -1 if the response status code
     * was malformed.
     * 
     * If this method is called, it must be called after connect
     * has been called.  Otherwise the information is not yet available and
     * this method will return -1.
     * 

     * For advanced features, the user can directly access the
     * status variable.
     *
     * @return	The integer status code from the HTTP response.
     *
     * @see	#connect
     * @see	#status
     */
    public int
    getResponseCode()
    {
	try {
	    int start = status.indexOf(' ') + 1;
	    int end = status.indexOf(' ', start + 1);
	    if (end < 0) {
		/*
		 * Sometimes the status line has the status code but no
		 * status phrase.
		 */
		end = status.length();
	    }
	    return Integer.parseInt(status.substring(start, end));
	} catch (Exception e) {
	    return -1;
	}
    }

    /**
     * Gets the value associated with the given case-insensitive header name
     * from the HTTP response.
     * 

     * If this method is called, it must be called after connect
     * has been called.  Otherwise the information is not available and
     * this method will return null.
     * 

     * For advanced features, such as enumerating over all response headers,
     * the user should directly access the responseHeaders
     * variable.
     *
     * @param	key
     *		The case-insensitive name of the response header.
     *
     * @return	The value associated with the given name, or null
     *		if there is no such header in the response.
     *
     * @see	#connect
     * @see	#responseHeaders
     */
    public String
    getResponseHeader(String key)
    {
	return responseHeaders.get(key);
    }

    /**
     * Convenience method to get the "Content-Length" header from the
     * HTTP response.
     * 

     * If this method is called, it must be called after connect
     * has been called.  Otherwise the information is not available and
     * this method will return -1.
     *
     * @return	The content length specified in the response headers, or
     *		-1 if the length was not specified or malformed
     *		(not a number).
     *
     * @see	#connect
     * @see	#getResponseHeader
     */
    public int
    getContentLength()
    {
	try {
	    return Integer.parseInt(responseHeaders.get("Content-Length"));
	} catch (Exception e) {
	    return -1;
	}
    }

    /**
     * Removes all the point-to-point (hop-by-hop) headers from
     * the given mime headers.
     *
     * @param	headers
     *		The mime headers to be modified.
     *
     * @param	response
     *		true to remove the point-to-point response
     *		headers, false to remove the point-to-point
     *		request headers.
     *
     * @see RFC 2068
     */
    public static void
    removePointToPointHeaders(MimeHeaders headers, boolean response)
    {
	headers.remove("Connection");
	headers.remove("Proxy-Connection");
	headers.remove("Keep-Alive");
	headers.remove("Upgrade");

	if (response == false) {
	    headers.remove("Proxy-Authorization");
	} else {
	    headers.remove("Proxy-Authenticate");
	    headers.remove("Public");
	    headers.remove("Transfer-Encoding");
	}
    }

    /**
     * Convenience method for adding request headers by looking them
     * up in a properties object.
     * @param tokens	a white space delimited set of tokens that refer
     *			to headers that will be added to the HTTP request.
     * @param props	Keys of the form [token].name and
     *			[token].value are used to lookup additional
     *			HTTP headers to be added to the request.
     * @return		The number of headers added to the request
     * @see #setRequestHeader
     */

    public int
    addHeaders(String tokens, Properties props) {
	int count = 0;
	StringTokenizer st = new StringTokenizer(tokens);
	while (st.hasMoreTokens()) {
	    String token = st.nextToken();
	    String name = props.getProperty(token + ".name");
	    String value = props.getProperty(token + ".value");
	    if (name!=null && value!=null) {
		setRequestHeader(name, value);
		count++;
	    }
	}
	return count;
    }

    /**
     * Get the content as a string.  Uses the character
     * encoding specified in the HTTP headers if available.
     * Otherwise the supplied encoding is used, or (if
     * encoding is null), the platform default encoding.
     * @param encoding		The ISO character encoding to use, if
     *				the encoding can't be determined by
     *				context.
     * @return			The content as a string.
     */

    public String getContent(String encoding)
		throws IOException, UnsupportedEncodingException {
	HttpInputStream in = getInputStream();
	ByteArrayOutputStream out = new ByteArrayOutputStream();
	in.copyTo(out);
	in.close();

	String enc = getEncoding();
	if (enc == null) {
	    enc = encoding;
	}
	String result;
	if (enc != null) {
	   result = out.toString(enc);
	} else {
	   result = out.toString();
	}
	out.close();
	return result;
    }

    /**
     * Return the content as a string.
     */

    public String getContent()
		throws IOException, UnsupportedEncodingException {
	    return getContent(null);
    }

    /**
     * Get the ISO character encoding (if any) associated with this
     * text stream,
     * or "null" if none found.  Response headers must be available.
     */

    Regexp encExp = new Regexp("^text/.*;[ \t]*charset=([^ \t;]*)",true);
    public String getEncoding() {
	String type = getResponseHeader("content-type");
	if (type == null) {
	    return null;
	} else {
	    type = type.trim();
	}
	if (displayAllHeaders) {
	    System.err.println("Looking for encoding in: " + type);
	}
	String subs[] = new String[2];
	if (type != null && encExp.match(type, subs)) {
	    return subs[1];
	} else {
	    return null;
	}
    }

    /**
     * Grab http document(s) and save them in the filesystem.
     * This is a simple batch HTTP url fetcher.  Usage:
     * 
     * java ... sunlabs.brazil.request.HttpRequest [-v(erbose)] [-h(headers)] [-p] url...
     * 
     * 
     * -v
Verbose.  Print the target URL and destination file on stderr
     * 
-h
Print all the HTTP headers on stderr
     * 
-phttp://proxyhost:port
The following url's are to be fetched
     * via a proxy.
     * 
     * The options and url's may be given in any order.  Use "-p" by itself
     * to disable the proxy for all following requests.
     * 
     * There are many limitations: only HTTP GET requests are supported, the
     * output filename is derived autmatically from the URL and can't be 
     * overridden, if a destination file already exists, it is overwritten.
     */

    public static void
    main(String[] args) throws Exception {
	String proxyHost = null;
	int proxyPort = 80;
	boolean isVerbose = false;

	if (args.length == 0) {
	    System.err.println("Usage: [-v(erbose) -h(headers) -p] url...");
	    System.exit(1);
	}
	for (int i=0; i 7) {
			    URL url = new URL(arg.substring(1));
			    proxyHost = url.getHost();
			    proxyPort = url.getPort();
			    if (proxyPort < 0) {
				proxyPort = 80;
			    }
			} else {
			    proxyHost = null;
			}
			break;
		    default:
			System.err.println("Invalid argument, ignored: -" +
			    arg);
		}
		continue;
	    }
	    try {
		HttpRequest target = new HttpRequest(args[i]);
		String name = url2file(args[i]);
		if (isVerbose) {
		    System.err.println("Fetching (" + args[i] + ") to (" +
			name + ")");
		}
		target.setProxy(proxyHost, proxyPort);
		HttpInputStream in = target.getInputStream();
		FileOutputStream out = new FileOutputStream(name);
		in.copyTo(out);
		in.close();
		out.close();
	    } catch (IOException e) {
		System.err.println("Error fetching " + args[i] +
			": " + e.getMessage());
	    }
	}
    }

    /* Invent a url from a file name. */

    static String url2file(String url) throws IOException {
	String path = HttpUtil.extractUrlPath(url);
	if (path==null) {
	    throw new IOException("Invalid url: " + url);
	}
	return path.substring(1).replace('/', '_');
    }
}

class RecycleInputStream
    extends HttpInputStream
{
    HttpRequest target;
    boolean closed;

    public
    RecycleInputStream(HttpRequest target, InputStream in)
    {
	super(in);
	this.target = target;
    }

    /**
     * Reads from the underlying input stream, which might be a raw
     * input stream, a limit input stream, or an unchunking input stream.
     * If we get EOF or there is an error reading, close the socket.
     */
    public int
    read()
	throws IOException
    {
	if (closed) {
	    return -1;
	}
	try {
	    int ch = in.read();
	    if (ch < 0) {
		close(false);
	    }
	    return ch;
	} catch (IOException e) {
	    close(false);
	    throw e;
	}
    }

    public int
    read(byte[] buf, int off, int len)
	throws IOException
    {
	if (closed) {
	    return -1;
	}
	try {
	    int count = in.read(buf, off, len);
	    if (count < 0) {
		close(false);
	    }
	    return count;
	} catch (IOException e) {
	    close(false);
	    throw e;
	}
    }

    private void
    close(boolean reuse)
    {
	if (closed == false) {
	    closed = true;
	    target.closeSocket(reuse);
	}
    }

    public void
    close()
    {
	close(true);
    }
}

class NullInputStream
    extends InputStream
{
    public int
    read()
    {
	return -1;
    }

    public int
    read(char[] buf, int off, int len)
    {
	return -1;
    }
}

class LimitInputStream
    extends HttpInputStream
{
    HttpRequest target;
    int limit;

    public
    LimitInputStream(HttpRequest target, int limit)
    {
	super(target.hs.in);
	this.target = target;
	this.limit = limit;
    }

    public int
    read()
	throws IOException
    {
	if (limit <= 0) {
	    return -1;
	}
	
	int ch = in.read();
	if ((ch >= 0) && (--limit <= 0)) {
	    target.eof = true;
	    target.closeSocket(true);
	}
	return ch;
    }

    public int
    read(byte[] buf, int off, int len)
	throws IOException
    {
	if (limit <= 0) {
	    return -1;
	}

	len = Math.min(len, limit);
	int count = in.read(buf, off, len);
	if (count < 0) {
	    limit = 0;
	    return -1;
	}
	limit -= count;
	if (limit <= 0) {
	    target.eof = true;
	    target.closeSocket(true);
	}
	return count;
    }
}

class UnchunkingInputStream
    extends HttpInputStream
{
    HttpRequest target;
    boolean eof;
    int bytesLeft;

    public
    UnchunkingInputStream(HttpRequest target)
    {
	super(target.in);
	this.target = target;
    }

    public int
    read()
	throws IOException
    {
	if ((bytesLeft <= 0) && (getChunkSize() == false)) {
	    return -1;
	}
	bytesLeft--;
	return in.read();
    }

    public int
    read(byte[] buf, int off, int len)
	throws IOException
    {
	int total = 0;
	while (true) {
	    if ((bytesLeft <= 0) && (getChunkSize() == false)) {
		break;
	    }
	    int count = super.read(buf, off, Math.min(bytesLeft, len));
	    total += count;
	    off += count;
	    bytesLeft -= count;
	    len -= count;

	    if ((len <= 0) || (available() == 0)) {
		break;
	    }
	}

	return (total == 0) ? -1 : total;
    }

    private boolean
    getChunkSize()
	throws IOException
    {
	if (eof) {
	    return false;
	}

	/*
	 * Although HTTP/1.1 chunking spec says that there is one "\r\n"
	 * between chunks, some servers (for example, maps.yahoo.com) 
	 * send more than one blank line between chunks.  So, read and skip
	 * all the blank lines seen between chunks.
	 */

	String line;
	do {
	    // Sanity check: limit chars when expecting a chunk size.

	    line = ((HttpInputStream) in).readLine(HttpRequest.LINE_LIMIT);
	} while ((line != null) && (line.length() == 0));

	try {
	    bytesLeft = Integer.parseInt(line.trim(), 16);
	} catch (Exception e) {
	    throw new IOException("malformed chunk");
	}
	if (bytesLeft == 0) {
	    eof = true;
	    target.responseTrailers = new MimeHeaders((HttpInputStream) in);
	    target.eof = true;
	    target.closeSocket(true);
	    return false;
	}

	return true;
    }
}

class SimpleHttpSocketPool
    implements Runnable, HttpSocketPool
{
    public int maxIdle = 10;	// size of the socket pool
    public int maxAge  = 20000;	// max age of idle socket (mseconds)
    public int reapInterval=10000;// interval (in msec) to run reaper thread

    // pool of idle connections
    Vector idle = new Vector();

    /**
     * Start the background thread that removes old connections
     */

    Thread reaper;

    public
    SimpleHttpSocketPool()
    {
	reaper = new Thread(this);
	reaper.setDaemon(true);
	reaper.start();
    }

    /**
     * Get a potentially "pooled" target object.
     * Call this instead of the constructor to use the pool.
     * @param host	the target content server (or web proxy)
     * @param port	target web server port
     * @param proxy	if true, use telnet passthru mode.
     */

    public HttpSocket
    get(String host, int port, boolean reuse)
	throws IOException, UnknownHostException
    {
	host = host.toLowerCase();

	if (reuse) {
	    synchronized (idle) {
		/*
		 * Start at end to reuse the most recent socket, which is
		 * hopefully the most likely to still be alive.
		 */

		int i = idle.size();
		while (--i >= 0) {
		    HttpSocket hs = (HttpSocket) idle.elementAt(i);
		    if (hs.host.equals(host) && (hs.port == port)) {
			idle.removeElementAt(i);
/*System.out.println("reusing:" + hs);*/
			hs.timesUsed++;
			return hs;
		    }
		}
	    }
	}

	HttpSocket hs = new HttpSocket(host, port);

/*System.out.println("new:" + hs);*/

	return hs;
    }

    public void
    close(HttpSocket hs, boolean reuse)
    {
	if (reuse) {
/*System.out.println("recycling: " + hs);*/
	    synchronized (idle) {
		if (idle.size() >= maxIdle) {
		    HttpSocket bump = (HttpSocket) idle.firstElement();
		    idle.removeElementAt(0);
		    bump.close();
		}
		hs.firstTime = false;
		hs.lastUsed = System.currentTimeMillis();
		idle.addElement(hs);
	    }
	} else {
/*System.out.println("closing: " + hs);*/
	    hs.close();
	}
    }

    int lastSize = -1;

    public void
    run()
    {
	while(true) {
	    try {
		Thread.sleep(reapInterval);
	    } catch (InterruptedException e) {
		break;
	    }

	    /*
	     * expire after age seconds
	     */

	    long expired = System.currentTimeMillis() - maxAge;
	    boolean any = false;
	    synchronized (idle) {
		while (idle.size() > 0) {
		    HttpSocket hs = (HttpSocket) idle.firstElement();
		    if (hs.lastUsed >= expired) {
			break;
		    }
		    any = true;
		    idle.removeElementAt(0);
		    hs.close();
		}
	    }

if (false) {
  if (idle.size() > 0 || lastSize != 0) {
    long now = System.currentTimeMillis();
    System.out.print("socket cache:");
    for (int i = 0; i < idle.size(); i++) {
	HttpSocket hs = (HttpSocket) idle.elementAt(i);
	System.out.print(" (" + hs + " " + (now - hs.lastUsed)/1000 + ")");
    }
    System.out.println();
    lastSize = idle.size();
  }
}
	}
    }

    public String
    toString()
    {
	if (idle == null) {
	    return "(null)";
	}
	StringBuffer sb = new StringBuffer();
	for (int i = 0; i < idle.size(); i++) {
	    HttpSocket hs = (HttpSocket) idle.elementAt(i);
	    sb.append(hs.toString() + ", ");
	}
	return sb.toString();
    }
}