All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.mike10004.vhs.BasicHeuristic Maven / Gradle / Ivy

There is a newer version: 0.32
Show newest version
package io.github.mike10004.vhs;

import com.google.common.base.MoreObjects;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableMultiset;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multiset;
import com.google.common.io.ByteSource;
import com.google.common.net.HttpHeaders;
import com.google.common.net.MediaType;
import io.github.mike10004.vhs.harbridge.FormDataPart;
import io.github.mike10004.vhs.harbridge.HttpMethod;
import io.github.mike10004.vhs.harbridge.ParsedRequest;
import io.github.mike10004.vhs.repackaged.org.apache.http.client.utils.URLEncodedUtils;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;

/**
 * Implementation of a heuristic that compares request headers, parameters, and bodies.
 * This is inspired by the JavaScript implementation of https://github.com/Stuk/server-replay.
 */
public class BasicHeuristic implements Heuristic {

    /**
     * Default character encoding for {@code x-www-form-urlencoded}.
     *
     * 

Browsermob has this note about default charsets: *

* The default charset when the Content-Type header does not specify a charset. According to RFC 7231 Appendix B: *
     *     The default charset of ISO-8859-1 for text media types has been
     *     removed; the default is now whatever the media type definition says.
     *     Likewise, special treatment of ISO-8859-1 has been removed from the
     *     Accept-Charset header field.
     * 
* * Technically, we would have to determine the charset on a per-content-type basis, but generally speaking, UTF-8 is a * pretty safe default. (NOTE: In the previous HTTP/1.1 spec, section 3.7.1, the default charset was defined as ISO-8859-1.) *
* * The media type {@code x-www-form-urlencoded} is not technically a text media type, but URLs * are supposed to be ASCII safe, and the media type definition https://www.iana.org/assignments/media-types/application/x-www-form-urlencoded * specifies the "7bit" encoding consideration. We default to UTF-8 because it is a superset of * ASCII and supports more characters than ISO-8859-1. */ private static final Charset DEFAULT_FORM_DATA_CHARSET = StandardCharsets.UTF_8; public static final int DEFAULT_THRESHOLD_EXCLUSIVE = 0; static final int DEFAULT_INCREMENT = 100; private final int increment; private final int halfIncrement; private final FormDataDecoder formDataDecoder; public BasicHeuristic() { this(DEFAULT_INCREMENT, new RepackagedHttpClientFormDataDecoder()); } @SuppressWarnings("unused") public BasicHeuristic(int increment) { this(increment, new RepackagedHttpClientFormDataDecoder()); } public BasicHeuristic(int increment, FormDataDecoder formDataDecoder) { this.increment = increment; checkArgument(increment % 2 == 0, "increment must be even: %s", increment); this.halfIncrement = this.increment / 2; this.formDataDecoder = requireNonNull(formDataDecoder); } interface FormDataDecoder { Multimap> decode(ByteSource body, MediaType contentType) throws IOException; } static class RepackagedHttpClientFormDataDecoder implements FormDataDecoder { @Override public Multimap> decode(ByteSource body, MediaType mediaType) throws IOException { /* * Content-type magic here is a little weird. I *think* that form data should * always be US-ASCII, just like query parameters are supposed to be. The request * body ought to be accompanied by a content-type header that specifies the charset, * but if the charset is not specified, what do we do? Assume ISO-8859-1 as we do * for other HTTP-transported data? Or assume UTF-8 as we frequently do with query * parameters? Currently, we're defaulting to ISO-8859-1, because that would seem * to be more in line with the HTTP spec, and I have no strong opinion. */ Charset charset = mediaType.charset().or(DEFAULT_FORM_DATA_CHARSET); String queryString = body.asCharSource(charset).read(); // It's possible that the charset for decoding parameters, specified as an argument // here, is not necessarily the same as the content-type charset List> params = URLEncodedUtils.parse(queryString, charset); if (params != null) { Multimap> mm = ArrayListMultimap.create(); params.stream() .map(p -> new SimpleImmutableEntry<>(p.getKey(), Optional.ofNullable(p.getValue()))) .forEach(p -> mm.put(p.getKey(), p.getValue())); return mm; } else { return ImmutableMultimap.of(); } } } protected int rateQuerySameness(@Nullable Multimap> entryQuery, @Nullable Multimap> requestQuery) { int points = 0; if (entryQuery == null && requestQuery == null) { points += increment; } else { //noinspection ConstantConditions entryQuery = MoreObjects.firstNonNull(entryQuery, ImmutableMultimap.of()); //noinspection ConstantConditions requestQuery = MoreObjects.firstNonNull(requestQuery, ImmutableMultimap.of()); for (String name : requestQuery.keySet()) { if (!entryQuery.containsKey(name)) { points -= halfIncrement; } else { Multiset> entryParamValues = stripProtocolFromOptionals(entryQuery.get(name)); Multiset> requestParamValues = stripProtocolFromOptionals(requestQuery.get(name)); if (entryParamValues.equals(requestParamValues)) { points += increment; } } } for (String name : entryQuery.keySet()) { if (!requestQuery.containsKey(name)) { points -= halfIncrement; } } } return Math.max(0, points); } @Override public int rate(ParsedRequest entryRequest, ParsedRequest request) { // String name; URI requestUrl = request.url; Multimap requestHeaders = request.indexedHeaders; // method, host and pathname must match if (requestUrl == null) { return 0; } if (entryRequest.method != request.method) { return 0; } if (!entryRequest.url.getHost().equals(requestUrl.getHost())) { return 0; } if (!entryRequest.url.getPath().equals(requestUrl.getPath())) { return 0; } int points = increment; // One point for matching above requirements points += rateQuerySameness(entryRequest.query, request.query); // each header Multimap entryHeaders = entryRequest.indexedHeaders; for (String name : requestHeaders.keySet()) { if (entryHeaders.containsKey(name)) { points += stripProtocolFromStrings(entryHeaders.get(name)).equals(stripProtocolFromStrings(requestHeaders.get(name))) ? increment : 0; } // TODO handle missing headers and adjust score appropriately } if (request.method == HttpMethod.POST || request.method == HttpMethod.PUT) { if (!request.isBodyPresent() && !entryRequest.isBodyPresent()) { points += halfIncrement; } else if (request.isBodyPresent() && entryRequest.isBodyPresent()) { points += rateBodySameness(entryRequest, request); } } return points; } protected int rateBodySameness(ParsedRequest entryRequest, ParsedRequest request) { ByteSource requestBody = getBodyAsByteSource(request); ByteSource entryBody = getBodyAsByteSource(entryRequest); @Nullable String entryContentType = entryRequest.getFirstHeaderValue(HttpHeaders.CONTENT_TYPE); @Nullable String requestContentType = request.getFirstHeaderValue(HttpHeaders.CONTENT_TYPE); return rateBodySameness(entryBody, entryContentType, requestBody, requestContentType); } @Nullable private Multimap> parseIfWwwFormData(ByteSource body, @Nullable String contentType) { if (contentType != null) { try { MediaType mediaType = MediaType.parse(contentType); if (MediaType.FORM_DATA.withoutParameters().equals(mediaType.withoutParameters())) { return formDataDecoder.decode(body, mediaType); } } catch (RuntimeException | IOException ignore) { LoggerFactory.getLogger(getClass()).debug("failed to decode body as form data params"); } } return null; } protected int rateBodySameness(ByteSource entryBody, @Nullable String entryContentType, ByteSource requestBody, @Nullable String requestContentType) { if (entryBody.sizeIfKnown().equals(requestBody.sizeIfKnown())) { if (entryBody.sizeIfKnown().isPresent() && entryBody.sizeIfKnown().get().equals(0L)) { return increment; } } // TODO examine content types here and return 0 score early if they're very different @Nullable Multimap> entryParams = parseIfWwwFormData(entryBody, entryContentType); if (entryParams != null) { @Nullable Multimap> requestParams = parseIfWwwFormData(requestBody, requestContentType); if (requestParams != null) { return rateQuerySameness(entryParams, requestParams); } } @Nullable Multiset entryFormData = parseIfMultipartFormData(entryBody, entryContentType); if (entryFormData != null) { @Nullable Multiset requestFormData = parseIfMultipartFormData(requestBody, requestContentType); if (requestFormData != null) { return rateFormDataSameness(entryFormData, requestFormData); } } try { return entryBody.contentEquals(requestBody) ? increment : 0; } catch (IOException e) { return 0; } } @Nullable @SuppressWarnings("unused") protected Multiset parseIfMultipartFormData(ByteSource body, @Nullable String contentType) { // TODO implement multipart/form-data parsing and comparison // and then remove "unused" warning suppression return null; } @SuppressWarnings("unused") protected int rateFormDataSameness(Multiset entryFormData, Multiset requestFormData) { // TODO implement rating form-data sameness // and then remove "unused" warning suppression return 0; } private static ByteSource getBodyAsByteSource(ParsedRequest request) { return new ByteSource() { @Override public InputStream openStream() throws IOException { return request.openBodyStream(); } }; } private static Multiset stripProtocolFromStrings(Collection strings) { return strings.stream().map(string -> string.replaceAll("^https?", "")).collect(ImmutableMultiset.toImmutableMultiset()); } private static Multiset> stripProtocolFromOptionals(Collection> strings) { return strings.stream() .map(stringOpt -> { if (stringOpt.isPresent()) { String string = stringOpt.get(); return Optional.of(string.replaceAll("^https?", "")); } else { return stringOpt; } }).collect(ImmutableMultiset.toImmutableMultiset()); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy