net.lightbody.bmp.util.BrowserMobHttpUtil Maven / Gradle / Ivy
The newest version!
package net.lightbody.bmp.util;
import com.google.common.io.BaseEncoding;
import com.google.common.net.HostAndPort;
import com.google.common.net.MediaType;
import io.netty.buffer.ByteBuf;
import io.netty.handler.codec.http.HttpHeaders;
import io.netty.handler.codec.http.HttpRequest;
import io.netty.handler.codec.http.HttpResponse;
import net.lightbody.bmp.exception.DecompressionException;
import net.lightbody.bmp.exception.UnsupportedCharsetException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import java.util.zip.InflaterInputStream;
/**
* Utility class with static methods for processing HTTP requests and responses.
*/
public class BrowserMobHttpUtil {
private static final Logger log = LoggerFactory.getLogger(BrowserMobHttpUtil.class);
/**
* Default MIME content type if no Content-Type header is present. According to the HTTP 1.1 spec, section 7.2.1:
*
* Any HTTP/1.1 message containing an entity-body SHOULD include a Content-Type header field defining the media
* type of that body. If and only if the media type is not given by a Content-Type field, the recipient MAY
* attempt to guess the media type via inspection of its content and/or the name extension(s) of the URI used to
* identify the resource. If the media type remains unknown, the recipient SHOULD treat it as
* type "application/octet-stream".
*
*/
public static final String UNKNOWN_CONTENT_TYPE = "application/octet-stream";
/**
* The default charset when the Content-Type header does not specify a charset. According to RFC 7231 Appendix B:
*
* The default charset of ISO-8859-1 for text media types has been
* removed; the default is now whatever the media type definition says.
* Likewise, special treatment of ISO-8859-1 has been removed from the
* Accept-Charset header field.
*
*
* Technically, we would have to determine the charset on a per-content-type basis, but generally speaking, UTF-8 is a
* pretty safe default. (NOTE: In the previous HTTP/1.1 spec, section 3.7.1, the default charset was defined as ISO-8859-1.)
*/
public static final Charset DEFAULT_HTTP_CHARSET = StandardCharsets.UTF_8;
/**
* Buffer size when decompressing content.
*/
public static final int DECOMPRESS_BUFFER_SIZE = 16192;
/**
* Returns the size of the headers, including the 2 CRLFs at the end of the header block.
*
* @param headers headers to size
* @return length of the headers, in bytes
*/
public static long getHeaderSize(HttpHeaders headers) {
long headersSize = 0;
for (Map.Entry header : headers.entries()) {
// +2 for ': ', +2 for new line
headersSize += header.getKey().length() + header.getValue().length() + 4;
}
return headersSize;
}
/**
* Decompresses the gzipped byte stream.
*
* @param fullMessage gzipped byte stream to decomress
* @return decompressed bytes
* @throws DecompressionException thrown if the fullMessage cannot be read or decompressed for any reason
*/
public static byte[] decompressContents(byte[] fullMessage) throws DecompressionException {
InflaterInputStream gzipReader = null;
ByteArrayOutputStream uncompressed;
try {
gzipReader = new GZIPInputStream(new ByteArrayInputStream(fullMessage));
uncompressed = new ByteArrayOutputStream(fullMessage.length);
byte[] decompressBuffer = new byte[DECOMPRESS_BUFFER_SIZE];
int bytesRead;
while ((bytesRead = gzipReader.read(decompressBuffer)) > -1) {
uncompressed.write(decompressBuffer, 0, bytesRead);
}
fullMessage = uncompressed.toByteArray();
} catch (IOException e) {
throw new DecompressionException("Unable to decompress response", e);
} finally {
try {
if (gzipReader != null) {
gzipReader.close();
}
} catch (IOException e) {
log.warn("Unable to close gzip stream", e);
}
}
return fullMessage;
}
/**
* Returns true if the content type string indicates textual content. Currently these are any Content-Types that start with one of the
* following:
*
* text/
* application/x-javascript
* application/javascript
* application/json
* application/xml
* application/xhtml+xml
*
*
* @param contentType contentType string to parse
* @return true if the content type is textual
*/
public static boolean hasTextualContent(String contentType) {
return contentType != null &&
(contentType.startsWith("text/") ||
contentType.startsWith("application/x-javascript") ||
contentType.startsWith("application/javascript") ||
contentType.startsWith("application/json") ||
contentType.startsWith("application/xml") ||
contentType.startsWith("application/xhtml+xml")
);
}
/**
* Extracts all readable bytes from the ByteBuf as a byte array.
*
* @param content ByteBuf to read
* @return byte array containing the readable bytes from the ByteBuf
*/
public static byte[] extractReadableBytes(ByteBuf content) {
byte[] binaryContent = new byte[content.readableBytes()];
content.markReaderIndex();
content.readBytes(binaryContent);
content.resetReaderIndex();
return binaryContent;
}
/**
* Converts the byte array into a String based on the specified charset. The charset cannot be null.
*
* @param content bytes to convert to a String
* @param charset the character set of the content
* @return String containing the converted content
* @throws IllegalArgumentException if charset is null
*/
public static String getContentAsString(byte[] content, Charset charset) {
if (charset == null) {
throw new IllegalArgumentException("Charset cannot be null");
}
return new String(content, charset);
}
/**
* Reads the charset directly from the Content-Type header string. If the Content-Type header does not contain a charset,
* is malformed or unparsable, or if the header is null or empty, this method returns null.
*
* @param contentTypeHeader the Content-Type header string; can be null or empty
* @return the character set indicated in the contentTypeHeader, or null if the charset is not present or is not parsable
* @throws UnsupportedCharsetException if there is a charset specified in the content-type header, but it is not supported on this platform
*/
public static Charset readCharsetInContentTypeHeader(String contentTypeHeader) throws UnsupportedCharsetException {
if (contentTypeHeader == null || contentTypeHeader.isEmpty()) {
return null;
}
MediaType mediaType;
try {
mediaType = MediaType.parse(contentTypeHeader);
} catch (IllegalArgumentException e) {
log.info("Unable to parse Content-Type header: {}. Content-Type header will be ignored.", contentTypeHeader, e);
return null;
}
try {
return mediaType.charset().orNull();
} catch (java.nio.charset.UnsupportedCharsetException e) {
throw new UnsupportedCharsetException(e);
}
}
/**
* Retrieves the raw (unescaped) path + query string from the specified request. The returned path will not include
* the scheme, host, or port.
*
* @param httpRequest HTTP request
* @return the unescaped path + query string from the HTTP request
* @throws URISyntaxException if the path could not be parsed (due to invalid characters in the URI, etc.)
*/
public static String getRawPathAndParamsFromRequest(HttpRequest httpRequest) throws URISyntaxException {
// if this request's URI contains a full URI (including scheme, host, etc.), strip away the non-path components
if (HttpUtil.startsWithHttpOrHttps(httpRequest.getUri())) {
return getRawPathAndParamsFromUri(httpRequest.getUri());
} else {
// to provide consistent validation behavior for URIs that contain a scheme and those that don't, attempt to parse
// the URI, even though we discard the parsed URI object
new URI(httpRequest.getUri());
return httpRequest.getUri();
}
}
/**
* Retrieves the raw (unescaped) path and query parameters from the URI, stripping out the scheme, host, and port.
* The path will begin with a leading '/'. For example, 'http://example.com/some/resource?param%20name=param%20value'
* would return '/some/resource?param%20name=param%20value'.
*
* @param uriString the URI to parse, containing a scheme, host, port, path, and query parameters
* @return the unescaped path and query parameters from the URI
* @throws URISyntaxException if the specified URI is invalid or cannot be parsed
*/
public static String getRawPathAndParamsFromUri(String uriString) throws URISyntaxException {
URI uri = new URI(uriString);
String path = uri.getRawPath();
String query = uri.getRawQuery();
if (query != null) {
return path + '?' + query;
} else {
return path;
}
}
/**
* Returns true if the specified response is an HTTP redirect response, i.e. a 300, 301, 302, 303, or 307.
*
* @param httpResponse HTTP response
* @return true if the response is a redirect, otherwise false
*/
public static boolean isRedirect(HttpResponse httpResponse) {
switch (httpResponse.getStatus().code()) {
case 300:
case 301:
case 302:
case 303:
case 307:
return true;
default:
return false;
}
}
/**
* Removes a port from a host+port if the string contains the specified port. If the host+port does not contain
* a port, or contains another port, the string is returned unaltered. For example, if hostWithPort is the
* string {@code www.website.com:443}, this method will return {@code www.website.com}.
*
* Note: The hostWithPort string is not a URI and should not contain a scheme or resource. This method does
* not attempt to validate the specified host; it might throw IllegalArgumentException if there was a problem
* parsing the hostname, but makes no guarantees. In general, it should be validated externally, if necessary.
*
* @param hostWithPort string containing a hostname and optional port
* @param portNumber port to remove from the string
* @return string with the specified port removed, or the original string if it did not contain the portNumber
*/
public static String removeMatchingPort(String hostWithPort, int portNumber) {
HostAndPort parsedHostAndPort = HostAndPort.fromString(hostWithPort);
if (parsedHostAndPort.hasPort() && parsedHostAndPort.getPort() == portNumber) {
// HostAndPort.getHostText() strips brackets from ipv6 addresses, so reparse using fromHost
return HostAndPort.fromHost(parsedHostAndPort.getHost()).toString();
} else {
return hostWithPort;
}
}
/**
* Base64-encodes the specified username and password for Basic Authorization for HTTP requests or upstream proxy
* authorization. The format of Basic auth is "username:password" as a base64 string.
*
* @param username username to encode
* @param password password to encode
* @return a base-64 encoded string containing username:password
*/
public static String base64EncodeBasicCredentials(String username, String password) {
String credentialsToEncode = username + ':' + password;
// using UTF-8, which is the modern de facto standard, and which retains compatibility with US_ASCII for ASCII characters,
// as required by RFC 7616, section 3: http://tools.ietf.org/html/rfc7617#section-3
byte[] credentialsAsUtf8Bytes = credentialsToEncode.getBytes(StandardCharsets.UTF_8);
return BaseEncoding.base64().encode(credentialsAsUtf8Bytes);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy