sunlabs.brazil.util.http.HttpRequest Maven / Gradle / Ivy
Show all versions of sunlabs.brazil Show documentation
/*
* HttpRequest.java
*
* Brazil project web application toolkit,
* export version: 2.3
* Copyright (c) 1999-2007 Sun Microsystems, Inc.
*
* Sun Public License Notice
*
* The contents of this file are subject to the Sun Public License Version
* 1.0 (the "License"). You may not use this file except in compliance with
* the License. A copy of the License is included as the file "license.terms",
* and also available at http://www.sun.com/
*
* The Original Code is from:
* Brazil project web application toolkit release 2.3.
* The Initial Developer of the Original Code is: cstevens.
* Portions created by cstevens are Copyright (C) Sun Microsystems, Inc.
* All Rights Reserved.
*
* Contributor(s): cstevens, drach, suhler.
*
* Version: 2.7
* Created by cstevens on 99/09/15
* Last modified by suhler on 07/03/26 13:53:18
*
* Version Histories:
*
* 2.7 07/03/26-13:53:18 (suhler)
* doc updates
*
* 2.6 07/03/26-13:44:17 (suhler)
* add sample main() to act as a simple "wget"
*
* 2.5 04/11/30-15:19:40 (suhler)
* fixed sccs version string
*
* 2.4 03/08/01-16:18:01 (suhler)
* fixes for javadoc
*
* 2.3 03/05/12-16:26:13 (suhler)
* Merged changes between child workspace "/home/suhler/brazil/naws" and
* parent workspace "/net/mack.eng/export/ws/brazil/naws".
*
* 1.20.1.1 03/04/17-10:03:47 (suhler)
* no changes made
*
* 2.2 03/04/15-17:29:08 (drach)
* Add protected modifier to variable connected so subclasses outside
* package can access it.
*
* 2.1 02/10/01-16:36:54 (suhler)
* version change
*
* 1.20 02/07/23-08:31:15 (suhler)
* check for no content type
*
* 1.19 02/07/11-15:37:33 (suhler)
* add encoding diagnostics
*
* 1.18 02/07/11-15:03:40 (suhler)
* add getContent() and getEncoding() convenience methods for dealing
* with charset encoding
*
* 1.17 02/04/29-17:04:41 (suhler)
* added public static boolean displayAllHeaders to turn on
* http header debugging during development.
*
* 1.16 02/04/24-13:36:21 (suhler)
* doc lint
*
* 1.15 02/02/26-14:42:14 (suhler)
* doc lint
*
* 1.14 02/02/26-14:32:38 (suhler)
* typo
*
* 1.13 02/02/26-14:25:52 (suhler)
* added "addHeaders" convenience method for adding http headers from
* properties objects
*
* 1.12 00/07/11-11:23:47 (cstevens)
* Some servers send "HTTP/1.0 100 Continue" in response to an HTTP/1.1 POST!
*
* 1.11 00/07/06-15:03:10 (cstevens)
* Although HTTP/1.1 chunking spec says that there is one "\r\n" between
* chunks, some servers (for example, maps.yahoo.com) send more than one blank
* line between chunks. So, read and skip all the blank lines seen between
* chunks.
*
* 1.10 99/11/30-09:48:14 (suhler)
* remove diagnostics
*
* 1.9 99/11/09-20:23:23 (cstevens)
* bugs revealed by writing tests.
*
* 1.8 99/10/26-18:56:38 (cstevens)
* Change MimeHeaders so it uses "put" instead of "set", to be compatible with
* names chosen by Hashtable and StringMap.
*
* 1.7 99/10/14-14:16:31 (cstevens)
* merge issues.
*
* 1.6 99/10/14-13:19:18 (cstevens)
* Merged changes between child workspace "/home/cstevens/ws/brazil/naws" and
* parent workspace "/export/ws/brazil/naws".
*
* 1.4.1.2 99/10/14-13:01:06 (cstevens)
* Documentation.
* Fold TimedThread and the default HttpSocketPool into this file, since they are
* not used outside of this file (at this time).
*
* 1.5 99/10/11-12:38:38 (suhler)
* Merged changes between child workspace "/home/suhler/brazil/naws" and
* parent workspace "/net/mack.eng/export/ws/brazil/naws".
*
* 1.4.1.1 99/10/08-16:54:45 (cstevens)
* documentation
* Move logic for removing point-to-point headers into the HttpRequest as a
* static method.
*
* 1.4 99/10/07-13:17:55 (cstevens)
* Documentation for HttpRequest (in progress).
*
* 1.3.1.1 99/10/06-12:31:57 (suhler)
* comment out debugging
*
* 1.3 99/09/15-15:57:16 (cstevens)
* debugging
*
* 1.2 99/09/15-14:52:02 (cstevens)
* import *;
*
* 1.2 99/09/15-14:39:36 (Codemgr)
* SunPro Code Manager data about conflicts, renames, etc...
* Name history : 2 1 request/HttpRequest.java
* Name history : 1 0 util/http/HttpRequest.java
*
* 1.1 99/09/15-14:39:35 (cstevens)
* date and time created 99/09/15 14:39:35 by cstevens
*
*/
package sunlabs.brazil.util.http;
import sunlabs.brazil.server.Server;
import sunlabs.brazil.util.SocketFactory;
import sunlabs.brazil.util.regexp.Regexp;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.PushbackInputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Properties;
import java.util.Vector;
import java.util.StringTokenizer;
/**
* Sends an HTTP request to some target host and gets the answer back.
* Similar to the URLConnection
class.
*
* Caches connections to hosts, and reuses them if possible. Talks
* HTTP/1.1 to the hosts, in order to keep alive connections as much
* as possible.
*
* The sequence of events for using an HttpRequest
is similar
* to how URLConnection
is used:
*
* - A new
HttpRequest
object is constructed.
* - The setup parameters are modified:
*
* - {@link #setMethod setMethod}
*
- {@link #setRequestHeader setRequestHeader}
*
- {@link #getOutputStream getOutputStream}
*
* - The host (or proxy) is contacted and the HTTP request is issued:
*
* - {@link #connect connect}
*
- {@link #getInputStream getInputStream}
*
* - The response headers and body are examined:
*
* - {@link #getResponseCode getResponseCode}
*
- {@link #getResponseHeader getResponseHeader}
*
- {@link #getContentLength getContentLength}
*
* - The connection is closed:
*
* - {@link #close close}
*
*
*
* In the common case, all the setup parameters are initialized to sensible
* values and won't need to be modified. Most users will only need to
* construct a new HttpRequest
object and then call
* getInputStream
to read the contents. The rest of the
* member variables and methods are only needed for advanced behavior.
*
* The HttpRequest
class is intended to be a replacement for the
* URLConnection
class. It operates at a lower level and makes
* fewer decisions on behavior. Some differences between the
* HttpRequest
class and the URLConnection
class
* follow:
* - there are no undocumented global variables (specified in
*
System.getProperties
) that modify the behavior of
* HttpRequest
.
* -
HttpRequest
does not automatically follow redirects.
* -
HttpRequest
does not turn HTTP responses with a status
* code other than "200 OK" into IOExceptions
. Sometimes
* it may be necessary and even quite useful to examine the results of
* an "unsuccessful" HTTP request.
* -
HttpRequest
issues HTTP/1.1 requests and handles
* HTTP/0.9, HTTP/1.0, and HTTP/1.1 responses.
* - the
URLConnection
class leaks open sockets if there is
* an error reading the response or if the target does not use
* Keep-Alive, and depends upon the garabge collector to close and
* release the open socket in these cases, which is unreliable because
* it may lead to intermittently running out of sockets if the garbage
* collector doesn't run often enough.
* - If the user doesn't read all the data from an
*
URLConnection
, there are bugs in its implementation
* (as of JDK1.2) that may cause the program to block forever and/or
* read an insufficient amount of data before trying to reuse the
* underlying socket.
*
*
* A number of the fields in the HttpRequest
object are public,
* by design. Most of the methods mentioned above are convenience methods;
* the underlying data fields are meant to be accessed for more complicated
* operations, such as changing the socket factory or accessing the raw HTTP
* response line. Note however, that the order of the methods described
* above is important. For instance, the user cannot examine the response
* headers (by calling getResponseHeader
or by examining the
* variable responseHeaders
) without first having connected to
* the host.
*
* However, if the user wants to modify the default behavior, the
* HttpRequest
uses the value of a number of variables and
* automatically sets some HTTP headers when sending the request. The user
* can change these settings up until the time connect
is
* called, as follows:
* - variable {@link #version}
*
- By default, the
HttpRequest
issues HTTP/1.1
* requests. The user can set version
to change this to
* HTTP/1.0.
* - variable {@link #method}
*
- If
method
is null
(the default),
* the HttpRequest
decides what the HTTP request method
* should be as follows: If the user has called
* getOutputStream
, then the method will be "POST",
* otherwise the method will be "GET".
* - variable {@link #proxyHost}
*
- If the proxy host is specified, the HTTP request will be
* sent via the specified proxy:
* -
connect
opens a connection to the proxy.
* - uses the "Proxy-Connection" header to keep alive the connection.
*
- sends a fully qualified URL in the request line, for example
* "http://www.foo.com/index.html". The fully qualified URL
* tells the proxy to forward the request to the specified host.
*
* Otherwise, the HTTP request will go directly to the host:
* -
connect
opens a connection to the remote host.
* - uses the "Connection" header to keep alive the connection.
*
- sends a host-relative URL in the request line, for example
* "/index.html". The relative URL is derived from the fully
* qualified URL used to construct this
HttpRequest
.
*
* - header "Connection" or "Proxy-Connection"
*
- The
HttpRequest
sets the appropriate connection
* header to "Keep-Alive" to keep alive the connection to the host or
* proxy (respectively). By setting the appropriate connection header,
* the user can control whether the HttpRequest
tries to
* use Keep-Alives.
* - header "Host"
*
- The HTTP/1.1 protocol requires that the "Host" header be set
* to the name of the machine being contacted. By default, this is
* derived from the URL used to construct the
HttpRequest
,
* and is set automatically if the user does not set it.
* - header "Content-Length"
*
- If the user calls
getOutputStream
and writes some
* data to it, the "Content-Length" header will be set to the amount of
* data that has been written at the time that connect
* is called.
*
*
* Once all data has been read from the remote host, the underlying socket
* may be automatically recycled and used again for subsequent requests to
* the same remote host. If the user is not planning on reading all the data
* from the remote host, the user should call close
to release
* the socket. Although it happens under the covers, the user should be
* aware that if an IOException occurs or once data has been read normally
* from the remote host, close
is called automatically. This
* is to ensure that the minimal number of sockets are left open at any time.
*
* The input stream that getInputStream
provides automatically
* hides whether the remote host is providing HTTP/1.1 "chunked" encoding or
* regular streaming data. The user can simply read until reaching the
* end of the input stream, which signifies that all the available data from
* this request has been read. If reading from a "chunked" source, the
* data is automatically de-chunked as it is presented to the user. Currently,
* no access is provided to the underlying raw input stream.
*
* @author Colin Stevens ([email protected])
* @version 2.7
*/
public class HttpRequest
{
/**
* Timeout (in msec) to drain an input stream that has been closed before
* the entire HTTP response has been read.
*
* If the user closes the HttpRequest
before reading all of
* the data, but the remote host has agreed to keep this socket alive, we
* need to read and discard the rest of the response before issuing a new
* request. If it takes longer than DRAIN_TIMEOUT
to read
* and discard the data, we will just forcefully close the connection to
* the remote host rather than waiting to read any more.
*
* Default value is 10000.
*/
public static int DRAIN_TIMEOUT = 10000;
/**
* Maximum length of a line in the HTTP response headers (sanity check).
*
* If an HTTP response line is longer than this, the response is
* considered to be malformed.
*
* Default value is 1000.
*/
public static int LINE_LIMIT = 1000;
/**
* The default HTTP version string to send to the remote host when
* issuing requests.
*
* The default value can be overridden on a per-request basis by
* setting the version
instance variable.
*
* Default value is "HTTP/1.1".
*
* @see #version
*/
public static String defaultHTTPVersion = "HTTP/1.1";
/**
* The default proxy host for HTTP requests. If non-null
,
* then all new HTTP requests will be sent via this proxy. If
* null
, then all new HTTP requests are sent directly to
* the host specified when the HttpRequest
object was
* constructed.
*
* The default value can be overridden on a per-request basis by
* calling the setProxy
method or setting the
* proxyHost
instance variables.
*
* Default value is null
.
*
* @see #defaultProxyPort
* @see #proxyHost
* @see #setProxy
*/
public static String defaultProxyHost = null;
/**
* The default proxy port for HTTP requests.
*
* Default value is 80
.
*
* @see #defaultProxyHost
* @see #proxyPort
*/
public static int defaultProxyPort = 80;
/**
* The factory for constructing new Sockets objects used to connect to
* remote hosts when issuing HTTP requests. The user can set this
* to provide a new type of socket, such as SSL sockets.
*
* Default value is null
, which signifies plain sockets.
*/
public static SocketFactory socketFactory = null;
/**
* The cache of idle sockets. Once a request has been handled, the
* now-idle socket can be remembered and reused later if another HTTP
* request is made to the same remote host.
*/
public static HttpSocketPool pool = new SimpleHttpSocketPool();
/**
* The URL used to construct this HttpRequest
.
*/
public URL url;
/**
* The host extracted from the URL used to construct this
* HttpRequest
.
*
* @see #url
*/
public String host;
/**
* The port extracted from the URL used to construct this
* HttpRequest
.
*
* @see #url
*/
public int port;
/**
* If non-null
, sends this HTTP request via the specified
* proxy host and port.
*
* Initialized from defaultProxyHost
, but may be changed
* by the user at any time up until the HTTP request is actually sent.
*
* @see #defaultProxyHost
* @see #proxyPort
* @see #setProxy
* @see #connect
*/
public String proxyHost;
/**
* The proxy port.
*
* @see #proxyHost
*/
public int proxyPort;
protected boolean connected;
boolean eof;
HttpSocket hs;
/**
* The HTTP method, such as "GET", "POST", or "HEAD".
*
* May be set by the user at any time up until the HTTP request is
* actually sent.
*/
public String method;
/**
* The HTTP version string.
*
* Initialized from defaultHTTPVersion
, but may be changed
* by the user at any time up until the HTTP request is actually sent.
*/
public String version;
/**
* The headers for the HTTP request. All of these headers will be sent
* when the connection is actually made.
*/
public MimeHeaders requestHeaders;
/**
* setting this to "true" causing all http headers to be printed
* on the standard error stream; useful for debugging client/server
* interactions.
*/
public static boolean displayAllHeaders = false;
ByteArrayOutputStream postData;
String uri;
String connectionHeader;
HttpInputStream in;
InputStream under;
/**
* The status line from the HTTP response. This field is not valid until
* after connect
has been called and the HTTP response has
* been read.
*/
public String status;
/**
* The headers that were present in the HTTP response. This field is
* not valid until after connect
has been called and the
* HTTP response has been read.
*/
public MimeHeaders responseHeaders;
/*
* Cached value of keep-alive from the response headers.
*/
boolean keepAlive;
/**
* An artifact of HTTP/1.1 chunked encoding. At the end of an HTTP/1.1
* chunked response, there may be more MimeHeaders. It is only possible
* to access these MimeHeaders after all the data from the input stream
* returned by getInputStream
has been read. At that point,
* this field will automatically be initialized to the set of any headers
* that were found. If not reading from an HTTP/1.1 chunked source, then
* this field is irrelevant and will remain null
.
*/
public MimeHeaders responseTrailers;
/**
* Creates a new HttpRequest
object that will send an
* HTTP request to fetch the resource represented by the URL.
*
* The host specified by the URL is not contacted at this time.
*
* @param url
* A fully qualified "http:" URL.
*
* @throws IllegalArgumentException
* if url
is not an "http:" URL.
*/
public
HttpRequest(URL url)
{
if (url.getProtocol().equals("http") == false) {
throw new IllegalArgumentException(url.toString());
}
this.url = url;
this.host = url.getHost();
this.port = url.getPort();
if (this.port < 0) {
this.port = 80;
}
this.proxyHost = defaultProxyHost;
this.proxyPort = defaultProxyPort;
this.version = defaultHTTPVersion;
this.requestHeaders = new MimeHeaders();
this.responseHeaders = new MimeHeaders();
}
/**
* Creates a new HttpRequest
object that will send an
* HTTP request to fetch the resource represented by the URL.
*
* The host specified by the URL is not contacted at this time.
*
* @param url
* A string representing a fully qualified "http:" URL.
*
* @throws IllegalArgumentException
* if url
is not a well-formed "http:" URL.
*/
public
HttpRequest(String url)
{
this(toURL(url));
}
/*
* Artifact of Java: cannot implement HttpRequest(String) as follows
* because this(new URL(url))
must be first line in
* constructor; it can't be inside of try statement:
*
* public HttpRequest(String url) {
* try {
* this(new URL(url));
* } catch (MalformedURLException e) {
* throw new IllegalArgumentException(url);
* }
* }
*/
private static URL
toURL(String url)
{
try {
return new URL(url);
} catch (MalformedURLException e) {
throw new IllegalArgumentException(url);
}
}
/**
* Sets the HTTP method to the specified value. Some of the normal
* HTTP methods are "GET", "POST", "HEAD", "PUT", "DELETE", but the
* user can set the method to any value desired.
*
* If this method is called, it must be called before connect
* is called. Otherwise it will have no effect.
*
* @param method
* The string for the HTTP method, or null
to
* allow this HttpRequest
to pick the method for
* itself.
*/
public void
setMethod(String method)
{
this.method = method;
}
/**
* Sets the proxy for this request. The HTTP proxy request will be sent
* to the specified proxy host.
*
* If this method is called, it must be called before connect
* is called. Otherwise it will have no effect.
*
* @param proxyHost
* The proxy that will handle the request, or null
* to not use a proxy.
*
* @param proxyPort
* The port on the proxy, for the proxy request. Ignored if
* proxyHost
is null
.
*/
public void
setProxy(String proxyHost, int proxyPort)
{
this.proxyHost = proxyHost;
this.proxyPort = proxyPort;
}
/**
* Sets a request header in the HTTP request that will be issued. In
* order to do fancier things like appending a value to an existing
* request header, the user may directly access the
* requestHeaders
variable.
*
* If this method is called, it must be called before connect
* is called. Otherwise it will have no effect.
*
* @param key
* The header name.
*
* @param value
* The value for the request header.
*
* @see #requestHeaders
*/
public void
setRequestHeader(String key, String value)
{
requestHeaders.put(key, value);
}
/**
* Gets an output stream that can be used for uploading data to the
* host.
*
* If this method is called, it must be called before connect
* is called. Otherwise it will have no effect.
*
* Currently the implementation is not as good as it could be. The
* user should avoid uploading huge amounts of data, for some definition
* of huge.
*/
public OutputStream
getOutputStream()
throws IOException
{
if (postData == null) {
postData = new ByteArrayOutputStream();
}
return postData;
}
/**
* Connect to the target host (or proxy), send the request, and read the
* response headers. Any setup routines must be called before the call
* to this method, and routines to examine the result must be called after
* this method.
*
*
* @throws UnknownHostException
* if the target host (or proxy) could not be contacted.
*
* @throws IOException
* if there is a problem writing the HTTP request or reading
* the HTTP response headers.
*/
public void
connect()
throws UnknownHostException, IOException
{
if (connected) {
return;
}
connected = true;
prepareHeaders();
openSocket(true);
try {
try {
sendRequest();
readStatusLine();
} catch (IOException e) {
if (hs.firstTime) {
throw e;
}
closeSocket(false);
openSocket(false);
sendRequest();
readStatusLine();
}
responseHeaders.read(in);
if (displayAllHeaders) {
System.err.println(status);
responseHeaders.print(System.err);
System.err.println();
}
} catch (IOException e) {
closeSocket(false);
throw e;
}
parseResponse();
}
void
prepareHeaders()
{
if (postData != null) {
if (method == null) {
method = "POST";
}
setRequestHeader("Content-Length",
Integer.toString(postData.size()));
}
if (method == null) {
method = "GET";
}
if (proxyHost == null) {
uri = url.getFile();
connectionHeader = "Connection";
} else {
uri = url.toString();
connectionHeader = "Proxy-Connection";
}
requestHeaders.putIfNotPresent(connectionHeader, "Keep-Alive");
requestHeaders.putIfNotPresent("Host", host + ":" + port);
}
void
openSocket(boolean reuse)
throws IOException
{
String targetHost;
int targetPort;
if (proxyHost != null) {
targetHost = proxyHost;
targetPort = proxyPort;
} else {
targetHost = host;
targetPort = port;
}
hs = pool.get(targetHost, targetPort, reuse);
under = hs.in;
in = new HttpInputStream(under);
}
void
closeSocket(boolean reuse)
{
if (hs != null) {
HttpSocket tmp = hs;
hs = null;
keepAlive &= reuse;
/*
* Before we can reuse a keep-alive socket, we must first drain
* the input stream if there is any data left in it. The soft
* 'eof' flag will have been set if we have already read all the
* data that we're supposed to read and the socket is ready to be
* recycled now.
*/
if (keepAlive && !eof) {
new BackgroundCloser(tmp, under, DRAIN_TIMEOUT).start();
} else {
pool.close(tmp, keepAlive);
}
}
}
class BackgroundCloser extends Thread
{
HttpSocket hs;
InputStream in;
int timeout;
Killer killer;
BackgroundCloser(HttpSocket hs, InputStream in, int timeout)
{
this.hs = hs;
this.in = in;
this.timeout = timeout;
}
public void start()
{
killer = new Killer(this);
killer.start();
super.start();
}
public void run()
{
try {
byte[] buf = new byte[4096];
while (true) {
if (in.read(buf, 0, buf.length) < 0) {
break;
}
}
} catch (IOException e) {
keepAlive = false;
}
pool.close(hs, keepAlive);
killer.interrupt();
}
}
static class Killer extends Thread
{
BackgroundCloser b;
int timeout;
Killer(BackgroundCloser b)
{
this.b = b;
}
public void run()
{
try {
Thread.sleep(b.timeout);
b.interrupt();
} catch (Exception e) {}
}
}
void
sendRequest()
throws IOException
{
if (displayAllHeaders) {
System.err.print(method + " " + uri + " " + version + "\r\n");
requestHeaders.print(System.err);
System.err.print("\r\n");
}
PrintStream p = new PrintStream(hs.out);
p.print(method + " " + uri + " " + version + "\r\n");
requestHeaders.print(p);
p.print("\r\n");
if (postData != null) {
postData.writeTo(p);
postData = null; // Release memory.
}
p.flush();
}
void
readStatusLine()
throws IOException
{
while (true) {
status = in.readLine(LINE_LIMIT);
if (status == null) {
throw new EOFException();
}
if (status.startsWith("HTTP/1.1 100")
|| status.startsWith("HTTP/1.0 100")) {
/*
* Ignore the "100 Continue" response that some HTTP/1.1
* servers send. We can't depend upon it being sent, because
* we might be talking to an HTTP/1.0 server or an HTTP/1.1
* server that doesn't send the "100 Continue" response, so
* we can't use the response for any decision making, such as
* not sending the post data.
*
* www.u-net.com sends "HTTP/1.0 100 Continue"!
*/
while (true) {
status = in.readLine();
if ((status == null) || (status.length() == 0)) {
break;
}
}
} else if (status.startsWith("HTTP/1.")) {
return;
} else if (status.length() == 0) {
// System.out.println(this + ": got a blank line");
} else if (status.length() == LINE_LIMIT) {
throw new IOException("malformed server response");
} else if (hs.firstTime) {
/*
* Some servers don't send back any headers, even if they
* accept a HTTP/1.0 or greater request! We have to push
* back this line, so it can be re-read as the body.
* Since this is coming back with no headers, the content
* length will be unknown and so the socket will be closed.
*/
// System.out.println("receiving HTTP/0.9 response");
PushbackInputStream pin = new PushbackInputStream(hs.in,
status.length() + 4);
pin.unread('\n');
pin.unread('\r');
for (int i = status.length(); --i >= 0; ) {
pin.unread(status.charAt(i));
}
/*
* And push back a blank line, so the user thinks it got to
* the end of the headers
*/
pin.unread('\n');
pin.unread('\r');
status = "HTTP/1.0 200 OK";
hs.in = pin;
under = pin;
in = new HttpInputStream(under);
break;
} else {
/*
* If we see funny responses (missing headers, etc.) from a
* socket that we've reused, then we probably got out of sync
* with the remote host (e.g., didn't read enough from the
* last response), and should abort this request.
*/
throw new IOException("malformed server response");
}
}
}
void
parseResponse()
{
String str;
str = getResponseHeader(connectionHeader);
if (str != null) {
keepAlive = str.equalsIgnoreCase("Keep-Alive");
} else if (status.startsWith("HTTP/1.1")) {
keepAlive = true;
} else {
keepAlive = false;
}
str = getResponseHeader("Transfer-Encoding");
if ((str != null) && str.equals("chunked")) {
under = new UnchunkingInputStream(this);
in = new RecycleInputStream(this, under);
return;
}
int contentLength = getContentLength();
if (contentLength < 0) {
/*
* Some servers leave off the content length for return codes
* known to require no content.
*/
if (status.indexOf("304") > 0 || status.indexOf("204") > 0) {
responseHeaders.put("Content-Length", "0");
contentLength = 0;
}
}
if ((contentLength == 0) || method.equals("HEAD")) {
under = new NullInputStream();
in = new HttpInputStream(under);
closeSocket(keepAlive);
} else if (contentLength > 0) {
under = new LimitInputStream(this, contentLength);
in = new RecycleInputStream(this, under);
} else {
keepAlive = false;
in = new RecycleInputStream(this, under);
}
}
/**
* Gets an input stream that can be used to read the body of the
* HTTP response. Unlike the other convenience methods for accessing
* the HTTP response, this one automatically connects to the
* target host if not already connected.
*
* The input stream that getInputStream
provides
* automatically hides the differences between "Content-Length", no
* "Content-Length", and "chunked" for HTTP/1.0 and HTTP/1.1 responses.
* In all cases, the user can simply read until reaching the end of the
* input stream, which signifies that all the available data from this
* request has been read. (If reading from a "chunked" source, the data
* is automatically de-chunked as it is presented to the user. There is
* no way to access the raw underlying stream that contains the HTTP/1.1
* chunking packets.)
*
* @throws IOException
* if there is problem connecting to the target.
*
* @see #connect
*/
public HttpInputStream
getInputStream()
throws IOException
{
connect();
return in;
}
/**
* Gracefully closes this HTTP request when user is done with it.
*
* The user can either call this method or close
on the
* input stream obtained from the getInputStream
* method -- the results are the same.
*
* When all the response data is read from the input stream, the
* input stream is automatically closed (recycled). If the user is
* not going to read all the response data from input stream, the user
* must call close
to
* release the resources associated with the open request. Otherwise
* the program may consume all available sockets, waiting forever for
* the user to finish reading.
*
* Note that the input stream is automatically closed if the input
* stream throws an exception while reading.
*
* In order to interrupt a pending I/O operation in another thread
* (for example, to stop a request that is taking too long), the user
* should call disconnect
or interrupt the blocked thread.
* The user should not call close
in this case because
* close
will not interrupt the pending I/O operation.
*
* Closing the request multiple times is allowed.
*
* In order to make sure that open sockets are not left lying around
* the user should use code similar to the following:
*
* OutputStream out = ...
* HttpRequest http = new HttpRequest("http://bob.com/index.html");
* try {
* HttpInputStream in = http.getInputStream();
* in.copyTo(out);
* } finally {
* // Copying to "out" could have failed. Close "http" in case
* // not all the data has been read from it yet.
* http.close();
* }
*
*/
public void
close()
{
closeSocket(true);
}
/**
* Interrupts this HTTP request. Can be used to halt an in-progress
* HTTP request from another thread, by causing it to
* throw an InterruptedIOException
during the connect
* or while reading from the input stream, depending upon what state
* this HTTP request is in when it is disconnected.
*
* @see #close
*/
public void
disconnect()
{
closeSocket(false);
}
/**
* Gets the HTTP response status code. From responses like:
*
* HTTP/1.0 200 OK
* HTTP/1.0 401 Unauthorized
*
* this method extracts the integers 200
and 401
* respectively. Returns -1
if the response status code
* was malformed.
*
* If this method is called, it must be called after connect
* has been called. Otherwise the information is not yet available and
* this method will return -1
.
*
* For advanced features, the user can directly access the
* status
variable.
*
* @return The integer status code from the HTTP response.
*
* @see #connect
* @see #status
*/
public int
getResponseCode()
{
try {
int start = status.indexOf(' ') + 1;
int end = status.indexOf(' ', start + 1);
if (end < 0) {
/*
* Sometimes the status line has the status code but no
* status phrase.
*/
end = status.length();
}
return Integer.parseInt(status.substring(start, end));
} catch (Exception e) {
return -1;
}
}
/**
* Gets the value associated with the given case-insensitive header name
* from the HTTP response.
*
* If this method is called, it must be called after connect
* has been called. Otherwise the information is not available and
* this method will return null
.
*
* For advanced features, such as enumerating over all response headers,
* the user should directly access the responseHeaders
* variable.
*
* @param key
* The case-insensitive name of the response header.
*
* @return The value associated with the given name, or null
* if there is no such header in the response.
*
* @see #connect
* @see #responseHeaders
*/
public String
getResponseHeader(String key)
{
return responseHeaders.get(key);
}
/**
* Convenience method to get the "Content-Length" header from the
* HTTP response.
*
* If this method is called, it must be called after connect
* has been called. Otherwise the information is not available and
* this method will return -1
.
*
* @return The content length specified in the response headers, or
* -1
if the length was not specified or malformed
* (not a number).
*
* @see #connect
* @see #getResponseHeader
*/
public int
getContentLength()
{
try {
return Integer.parseInt(responseHeaders.get("Content-Length"));
} catch (Exception e) {
return -1;
}
}
/**
* Removes all the point-to-point (hop-by-hop) headers from
* the given mime headers.
*
* @param headers
* The mime headers to be modified.
*
* @param response
* true
to remove the point-to-point response
* headers, false
to remove the point-to-point
* request headers.
*
* @see RFC 2068
*/
public static void
removePointToPointHeaders(MimeHeaders headers, boolean response)
{
headers.remove("Connection");
headers.remove("Proxy-Connection");
headers.remove("Keep-Alive");
headers.remove("Upgrade");
if (response == false) {
headers.remove("Proxy-Authorization");
} else {
headers.remove("Proxy-Authenticate");
headers.remove("Public");
headers.remove("Transfer-Encoding");
}
}
/**
* Convenience method for adding request headers by looking them
* up in a properties object.
* @param tokens a white space delimited set of tokens that refer
* to headers that will be added to the HTTP request.
* @param props Keys of the form [token].name
and
* [token].value
are used to lookup additional
* HTTP headers to be added to the request.
* @return The number of headers added to the request
* @see #setRequestHeader
*/
public int
addHeaders(String tokens, Properties props) {
int count = 0;
StringTokenizer st = new StringTokenizer(tokens);
while (st.hasMoreTokens()) {
String token = st.nextToken();
String name = props.getProperty(token + ".name");
String value = props.getProperty(token + ".value");
if (name!=null && value!=null) {
setRequestHeader(name, value);
count++;
}
}
return count;
}
/**
* Get the content as a string. Uses the character
* encoding specified in the HTTP headers if available.
* Otherwise the supplied encoding is used, or (if
* encoding is null), the platform default encoding.
* @param encoding The ISO character encoding to use, if
* the encoding can't be determined by
* context.
* @return The content as a string.
*/
public String getContent(String encoding)
throws IOException, UnsupportedEncodingException {
HttpInputStream in = getInputStream();
ByteArrayOutputStream out = new ByteArrayOutputStream();
in.copyTo(out);
in.close();
String enc = getEncoding();
if (enc == null) {
enc = encoding;
}
String result;
if (enc != null) {
result = out.toString(enc);
} else {
result = out.toString();
}
out.close();
return result;
}
/**
* Return the content as a string.
*/
public String getContent()
throws IOException, UnsupportedEncodingException {
return getContent(null);
}
/**
* Get the ISO character encoding (if any) associated with this
* text stream,
* or "null" if none found. Response headers must be available.
*/
Regexp encExp = new Regexp("^text/.*;[ \t]*charset=([^ \t;]*)",true);
public String getEncoding() {
String type = getResponseHeader("content-type");
if (type == null) {
return null;
} else {
type = type.trim();
}
if (displayAllHeaders) {
System.err.println("Looking for encoding in: " + type);
}
String subs[] = new String[2];
if (type != null && encExp.match(type, subs)) {
return subs[1];
} else {
return null;
}
}
/**
* Grab http document(s) and save them in the filesystem.
* This is a simple batch HTTP url fetcher. Usage:
*
* java ... sunlabs.brazil.request.HttpRequest [-v(erbose)] [-h(headers)] [-p] url...
*
*
* - -v
- Verbose. Print the target URL and destination file on stderr
*
- -h
- Print all the HTTP headers on stderr
*
- -phttp://proxyhost:port
- The following url's are to be fetched
* via a proxy.
*
* The options and url's may be given in any order. Use "-p" by itself
* to disable the proxy for all following requests.
*
* There are many limitations: only HTTP GET requests are supported, the
* output filename is derived autmatically from the URL and can't be
* overridden, if a destination file already exists, it is overwritten.
*/
public static void
main(String[] args) throws Exception {
String proxyHost = null;
int proxyPort = 80;
boolean isVerbose = false;
if (args.length == 0) {
System.err.println("Usage: [-v(erbose) -h(headers) -p] url...");
System.exit(1);
}
for (int i=0; i 7) {
URL url = new URL(arg.substring(1));
proxyHost = url.getHost();
proxyPort = url.getPort();
if (proxyPort < 0) {
proxyPort = 80;
}
} else {
proxyHost = null;
}
break;
default:
System.err.println("Invalid argument, ignored: -" +
arg);
}
continue;
}
try {
HttpRequest target = new HttpRequest(args[i]);
String name = url2file(args[i]);
if (isVerbose) {
System.err.println("Fetching (" + args[i] + ") to (" +
name + ")");
}
target.setProxy(proxyHost, proxyPort);
HttpInputStream in = target.getInputStream();
FileOutputStream out = new FileOutputStream(name);
in.copyTo(out);
in.close();
out.close();
} catch (IOException e) {
System.err.println("Error fetching " + args[i] +
": " + e.getMessage());
}
}
}
/* Invent a url from a file name. */
static String url2file(String url) throws IOException {
String path = HttpUtil.extractUrlPath(url);
if (path==null) {
throw new IOException("Invalid url: " + url);
}
return path.substring(1).replace('/', '_');
}
}
class RecycleInputStream
extends HttpInputStream
{
HttpRequest target;
boolean closed;
public
RecycleInputStream(HttpRequest target, InputStream in)
{
super(in);
this.target = target;
}
/**
* Reads from the underlying input stream, which might be a raw
* input stream, a limit input stream, or an unchunking input stream.
* If we get EOF or there is an error reading, close the socket.
*/
public int
read()
throws IOException
{
if (closed) {
return -1;
}
try {
int ch = in.read();
if (ch < 0) {
close(false);
}
return ch;
} catch (IOException e) {
close(false);
throw e;
}
}
public int
read(byte[] buf, int off, int len)
throws IOException
{
if (closed) {
return -1;
}
try {
int count = in.read(buf, off, len);
if (count < 0) {
close(false);
}
return count;
} catch (IOException e) {
close(false);
throw e;
}
}
private void
close(boolean reuse)
{
if (closed == false) {
closed = true;
target.closeSocket(reuse);
}
}
public void
close()
{
close(true);
}
}
class NullInputStream
extends InputStream
{
public int
read()
{
return -1;
}
public int
read(char[] buf, int off, int len)
{
return -1;
}
}
class LimitInputStream
extends HttpInputStream
{
HttpRequest target;
int limit;
public
LimitInputStream(HttpRequest target, int limit)
{
super(target.hs.in);
this.target = target;
this.limit = limit;
}
public int
read()
throws IOException
{
if (limit <= 0) {
return -1;
}
int ch = in.read();
if ((ch >= 0) && (--limit <= 0)) {
target.eof = true;
target.closeSocket(true);
}
return ch;
}
public int
read(byte[] buf, int off, int len)
throws IOException
{
if (limit <= 0) {
return -1;
}
len = Math.min(len, limit);
int count = in.read(buf, off, len);
if (count < 0) {
limit = 0;
return -1;
}
limit -= count;
if (limit <= 0) {
target.eof = true;
target.closeSocket(true);
}
return count;
}
}
class UnchunkingInputStream
extends HttpInputStream
{
HttpRequest target;
boolean eof;
int bytesLeft;
public
UnchunkingInputStream(HttpRequest target)
{
super(target.in);
this.target = target;
}
public int
read()
throws IOException
{
if ((bytesLeft <= 0) && (getChunkSize() == false)) {
return -1;
}
bytesLeft--;
return in.read();
}
public int
read(byte[] buf, int off, int len)
throws IOException
{
int total = 0;
while (true) {
if ((bytesLeft <= 0) && (getChunkSize() == false)) {
break;
}
int count = super.read(buf, off, Math.min(bytesLeft, len));
total += count;
off += count;
bytesLeft -= count;
len -= count;
if ((len <= 0) || (available() == 0)) {
break;
}
}
return (total == 0) ? -1 : total;
}
private boolean
getChunkSize()
throws IOException
{
if (eof) {
return false;
}
/*
* Although HTTP/1.1 chunking spec says that there is one "\r\n"
* between chunks, some servers (for example, maps.yahoo.com)
* send more than one blank line between chunks. So, read and skip
* all the blank lines seen between chunks.
*/
String line;
do {
// Sanity check: limit chars when expecting a chunk size.
line = ((HttpInputStream) in).readLine(HttpRequest.LINE_LIMIT);
} while ((line != null) && (line.length() == 0));
try {
bytesLeft = Integer.parseInt(line.trim(), 16);
} catch (Exception e) {
throw new IOException("malformed chunk");
}
if (bytesLeft == 0) {
eof = true;
target.responseTrailers = new MimeHeaders((HttpInputStream) in);
target.eof = true;
target.closeSocket(true);
return false;
}
return true;
}
}
class SimpleHttpSocketPool
implements Runnable, HttpSocketPool
{
public int maxIdle = 10; // size of the socket pool
public int maxAge = 20000; // max age of idle socket (mseconds)
public int reapInterval=10000;// interval (in msec) to run reaper thread
// pool of idle connections
Vector idle = new Vector();
/**
* Start the background thread that removes old connections
*/
Thread reaper;
public
SimpleHttpSocketPool()
{
reaper = new Thread(this);
reaper.setDaemon(true);
reaper.start();
}
/**
* Get a potentially "pooled" target object.
* Call this instead of the constructor to use the pool.
* @param host the target content server (or web proxy)
* @param port target web server port
* @param proxy if true, use telnet passthru mode.
*/
public HttpSocket
get(String host, int port, boolean reuse)
throws IOException, UnknownHostException
{
host = host.toLowerCase();
if (reuse) {
synchronized (idle) {
/*
* Start at end to reuse the most recent socket, which is
* hopefully the most likely to still be alive.
*/
int i = idle.size();
while (--i >= 0) {
HttpSocket hs = (HttpSocket) idle.elementAt(i);
if (hs.host.equals(host) && (hs.port == port)) {
idle.removeElementAt(i);
/*System.out.println("reusing:" + hs);*/
hs.timesUsed++;
return hs;
}
}
}
}
HttpSocket hs = new HttpSocket(host, port);
/*System.out.println("new:" + hs);*/
return hs;
}
public void
close(HttpSocket hs, boolean reuse)
{
if (reuse) {
/*System.out.println("recycling: " + hs);*/
synchronized (idle) {
if (idle.size() >= maxIdle) {
HttpSocket bump = (HttpSocket) idle.firstElement();
idle.removeElementAt(0);
bump.close();
}
hs.firstTime = false;
hs.lastUsed = System.currentTimeMillis();
idle.addElement(hs);
}
} else {
/*System.out.println("closing: " + hs);*/
hs.close();
}
}
int lastSize = -1;
public void
run()
{
while(true) {
try {
Thread.sleep(reapInterval);
} catch (InterruptedException e) {
break;
}
/*
* expire after age seconds
*/
long expired = System.currentTimeMillis() - maxAge;
boolean any = false;
synchronized (idle) {
while (idle.size() > 0) {
HttpSocket hs = (HttpSocket) idle.firstElement();
if (hs.lastUsed >= expired) {
break;
}
any = true;
idle.removeElementAt(0);
hs.close();
}
}
if (false) {
if (idle.size() > 0 || lastSize != 0) {
long now = System.currentTimeMillis();
System.out.print("socket cache:");
for (int i = 0; i < idle.size(); i++) {
HttpSocket hs = (HttpSocket) idle.elementAt(i);
System.out.print(" (" + hs + " " + (now - hs.lastUsed)/1000 + ")");
}
System.out.println();
lastSize = idle.size();
}
}
}
}
public String
toString()
{
if (idle == null) {
return "(null)";
}
StringBuffer sb = new StringBuffer();
for (int i = 0; i < idle.size(); i++) {
HttpSocket hs = (HttpSocket) idle.elementAt(i);
sb.append(hs.toString() + ", ");
}
return sb.toString();
}
}