org.jwat.common.HttpHeader Maven / Gradle / Ivy
/**
* Java Web Archive Toolkit - Software to read and validate ARC, WARC
* and GZip files. (http://jwat.org/)
* Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.jwat.common;
import java.io.IOException;
import java.io.PushbackInputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* This class represents a recognized HTTP response or request header with optional payload.
*
* @author lbihanic, selghissassi, nicl
*/
public class HttpHeader extends PayloadWithHeaderAbstract {
/** Response header type. */
public static final int HT_RESPONSE = 1;
/** Request header type. */
public static final int HT_REQUEST = 2;
/** HTTP scheme. */
public static final String PROTOCOL_HTTP = "http";
/** HTTPS scheme. */
public static final String PROTOCOL_HTTPS = "https";
/** HTTP Version suffix. */
protected static final String HTTP_VERSION_SUFFIX = "HTTP/";
/** Content-type header name. */
protected static final String CONTENT_TYPE = "Content-Type".toUpperCase();
/** Header type of this object. */
public int headerType;
/*
* Request-Line.
*/
/** HTTP Method. */
public String method;
/** HTTP Request-URI. */
public String requestUri;
/*
* Http-Version.
*/
/** HTTP-Version. */
public String httpVersion;
/** HTTP-Version major. */
public Integer httpVersionMajor;
/** HTTP-Version minor. */
public Integer httpVersionMinor;
/*
* Status-Line.
*/
/** HTTP Status-Code. */
public String statusCodeStr;
/** HTTP Status-Code. */
public Integer statusCode;
/** HTTP Reason-Phrase. */
public String reasonPhrase;
/*
* Header-Fields.
*/
/** List of parsed header fields. */
protected List headerList = new LinkedList();
/** Map of parsed header fields. */
protected Map headerMap = new HashMap();
/** HTTP content Content-type. */
public String contentType;
/**
* Non public constructor.
*/
protected HttpHeader() {
}
/**
* Boolean indicating whether a protocol is supported by this
* payload inspector.
* @param protocol protocol name
* @return true/false if the protocol is supported or not.
*/
public static boolean isSupported(String protocol) {
return ((PROTOCOL_HTTP.equalsIgnoreCase(protocol)
|| PROTOCOL_HTTPS.equalsIgnoreCase(protocol)));
}
/**
* Reads the HTTP protocol response or request and returns it as an object.
* It is important to understand that the maximum size of a parsed header
* is equal to the size of the PushbackInputStream's buffer!
* @param headerType HTTP header type to parse, request or response
* @param pbin payload input stream
* @param length payload length
* @param digestAlgorithm digest algorithm to use on payload or null if we
* don't want a digest of the payload
* @return HttpResponse
based on the http headers
* @throws IOException if an error occur while processing http header.
*/
public static HttpHeader processPayload(int headerType, ByteCountingPushBackInputStream pbin,
long length, String digestAlgorithm) throws IOException {
if (headerType != HT_RESPONSE && headerType != HT_REQUEST) {
throw new IllegalArgumentException(
"Invalid 'headerType' argument: " + headerType);
}
if (pbin == null) {
throw new IllegalArgumentException(
"The inputstream 'pbin' is null");
}
if (length < 0) {
throw new IllegalArgumentException(
"The 'length' is less than zero: " + length);
}
HttpHeader hh = new HttpHeader();
hh.headerType = headerType;
hh.in_pb = pbin;
hh.totalLength = length;
hh.digestAlgorithm = digestAlgorithm;
hh.diagnostics = new Diagnostics();
hh.initProcess();
return hh;
}
@Override
protected boolean readHeader(MaxLengthRecordingInputStream in, long payloadLength)
throws IOException {
// TODO too low!
PushbackInputStream pbin = new PushbackInputStream(in, in_pb.getPushbackSize());
HeaderLineReader hlr = HeaderLineReader.getHeaderLineReader();
hlr.bNameValue = false;
hlr.encoding = HeaderLineReader.ENC_ISO8859_1;
hlr.bLWS = false;
hlr.bQuotedText = false;
hlr.bEncodedWords = false;
boolean bValidHttpHeader = false;
HeaderLine line = hlr.readLine(pbin);
int bfErrors = 0;
if (!hlr.bEof && line.type == HeaderLine.HLT_LINE && line.line != null && line.line.length() > 0) {
bfErrors = (line.bfErrors & ~HeaderLineReader.E_BIT_INVALID_SEPARATOR_CHAR);
if (headerType == HT_RESPONSE) {
bValidHttpHeader = isHttpStatusLineValid(line.line);
} else if (headerType == HT_REQUEST) {
bValidHttpHeader = isHttpRequestLineValid(line.line);
} else {
throw new IllegalStateException("Invalid headerType!");
}
}
hlr.bNameValue = true;
hlr.bLWS = true;
hlr.bQuotedText = true;
hlr.bEncodedWords = true;
HeaderLine tmpLine;
boolean bLoop = bValidHttpHeader;
while (bLoop) {
line = hlr.readLine(pbin);
bfErrors |= line.bfErrors;
if (!hlr.bEof) {
switch (line.type) {
case HeaderLine.HLT_HEADERLINE:
//System.out.println(line.name);
//System.out.println(line.value);
if (CONTENT_TYPE.equals(line.name.toUpperCase())) {
contentType = line.value;
}
// A HeaderLine object contains a list of additional lines.
tmpLine = headerMap.get(line.name.toLowerCase());
if (tmpLine == null) {
headerMap.put(line.name.toLowerCase(), line);
} else {
tmpLine.lines.add(line);
}
headerList.add(line);
break;
case HeaderLine.HLT_LINE:
if (line.line.length() == 0) {
bLoop = false;
} else {
// Errors reported by bfErrors.
}
break;
}
} else {
// Accept truncated http header if it is the length of the payload.
if ((bfErrors & HeaderLineReader.E_BIT_EOF) == 0 || in.record.size() != payloadLength) {
/*
System.out.println("Epic fail!");
System.out.println(Integer.toBinaryString(hlr.bfErrors));
System.out.println(new String(in.getRecording()));
*/
bValidHttpHeader = false;
}
bLoop = false;
}
}
HeaderLineReader.report_error(bfErrors, diagnostics);
if (bValidHttpHeader) {
this.payloadLength = payloadLength - in.record.size();
}
return bValidHttpHeader;
}
/**
* Checks a HTTP Response Status-Line for validity.
* @param statusLine the Status-Line of the HTTP Response
* @return true/false based on whether the Status-Line is valid or not.
*/
protected boolean isHttpStatusLineValid(String statusLine) {
int idx;
int prevIdx;
boolean bIsHttpStatusLineValid = (statusLine != null) && (statusLine.length() > 0);
if (bIsHttpStatusLineValid) {
idx = statusLine.indexOf(' ');
if (idx > 0) {
bIsHttpStatusLineValid = isHttpVersionValid(statusLine.substring(0, idx));
} else {
if (idx == -1) {
httpVersion = statusLine;
}
bIsHttpStatusLineValid = false;
}
if (bIsHttpStatusLineValid) {
prevIdx = ++idx;
idx = statusLine.indexOf(' ', idx);
if (idx == -1) {
idx = statusLine.length();
}
if (idx > prevIdx) {
statusCodeStr = statusLine.substring(prevIdx, idx);
try {
statusCode = Integer.parseInt(statusCodeStr);
if (statusCode < 100 || statusCode > 999) {
bIsHttpStatusLineValid = false;
}
} catch(NumberFormatException e) {
bIsHttpStatusLineValid = false;
}
} else {
bIsHttpStatusLineValid = false;
}
if (bIsHttpStatusLineValid) {
if (idx < statusLine.length()) {
++idx;
reasonPhrase = statusLine.substring(idx);
}
}
}
}
return bIsHttpStatusLineValid;
}
/**
* Checks a HTTP-Version string for validity.
* @param versionString the HTTP-Version of the HTTP header
* @return true/false based on whether the HTTP-Version is valid or not.
*/
protected boolean isHttpVersionValid(String versionString) {
int idx;
httpVersion = versionString;
boolean bIsHttpVersionValid = versionString.startsWith(HTTP_VERSION_SUFFIX);
if (bIsHttpVersionValid) {
idx = versionString.indexOf('.', HTTP_VERSION_SUFFIX.length());
if (idx > 0) {
try {
httpVersionMajor = Integer.parseInt(versionString.substring(HTTP_VERSION_SUFFIX.length(), idx));
if (httpVersionMajor < 0) {
bIsHttpVersionValid = false;
}
} catch (NumberFormatException e) {
bIsHttpVersionValid = false;
}
try {
httpVersionMinor = Integer.parseInt(versionString.substring(idx + 1));
if (httpVersionMinor < 0) {
bIsHttpVersionValid = false;
}
} catch (NumberFormatException e) {
bIsHttpVersionValid = false;
}
} else {
bIsHttpVersionValid = false;
}
} else if (versionString.equals("HTTP")) {
// IA-local change: be lenient with broken version field.
// See https://webarchive.jira.com/browse/WWM-160
httpVersionMajor = 1;
httpVersionMinor = 0;
bIsHttpVersionValid = true;
}
return bIsHttpVersionValid;
}
/**
* Checks a HTTP Request Request-Line for validity.
* @param requestLine the Request-Line of the HTTP Request
* @return true/false based on whether the Request-Line is valid or not.
*/
protected boolean isHttpRequestLineValid(String requestLine) {
int idx;
int prevIdx;
boolean bIsHttpRequestLineValid = (requestLine != null) && (requestLine.length() > 0);
if (bIsHttpRequestLineValid) {
idx = requestLine.indexOf(' ');
if (idx > 0) {
method = requestLine.substring(0, idx);
} else {
if (idx == -1) {
method = requestLine;
}
bIsHttpRequestLineValid = false;
}
if (bIsHttpRequestLineValid) {
prevIdx = ++idx;
idx = requestLine.indexOf(' ', idx);
if (idx > prevIdx) {
requestUri = requestLine.substring(prevIdx, idx);
} else {
if (idx == -1) {
requestUri = requestLine.substring(prevIdx);
}
bIsHttpRequestLineValid = false;
}
if (bIsHttpRequestLineValid) {
++idx;
bIsHttpRequestLineValid = isHttpVersionValid(requestLine.substring(idx));
}
}
}
return bIsHttpRequestLineValid;
}
@Override
public List getHeaderList() {
return Collections.unmodifiableList(headerList);
}
@Override
public HeaderLine getHeader(String field) {
if (field != null && field.length() > 0) {
return headerMap.get(field.toLowerCase());
} else {
return null;
}
}
/**
* Result-Code string getter
* @return the ResultCode
*/
public String getProtocolStatusCodeStr() {
return statusCodeStr;
}
/**
* Result-Code integer getter
* @return the ResultCode
*/
public Integer getProtocolStatusCode() {
return statusCode;
}
/**
* protocolVersion getter
* @return the protocolVersion
*/
public String getProtocolVersion() {
return httpVersion;
}
/**
* Content-Type getter
* @return the Content-Type
*/
public String getProtocolContentType() {
return contentType;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder(256);
builder.append("\nHttpHeader : [\n");
if (statusCode != null) {
builder.append(", HttpResultCode: ")
.append(statusCode);
}
if (httpVersion != null) {
builder.append(", HttpProtocolVersion: ")
.append(httpVersion);
}
if (contentType != null) {
builder.append(", HttpContentType: ")
.append(contentType);
}
builder.append("]\n");
return builder.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy