All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gargoylesoftware.htmlunit.Cache Maven / Gradle / Ivy

There is a newer version: 2.70.0
Show newest version
/*
 * Copyright (c) 2002-2018 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gargoylesoftware.htmlunit;

import java.io.Serializable;
import java.net.URL;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.client.utils.DateUtils;
import org.w3c.dom.css.CSSStyleSheet;

import com.gargoylesoftware.htmlunit.util.HeaderUtils;
import com.gargoylesoftware.htmlunit.util.UrlUtils;

/**
 * 

Simple cache implementation which caches compiled JavaScript files and parsed CSS snippets. Caching * compiled JavaScript files avoids unnecessary web requests and additional compilation overhead, while * caching parsed CSS snippets avoids very expensive CSS parsing.

* * @author Marc Guillemot * @author Daniel Gredler * @author Ahmed Ashour * @author Anton Demydenko */ public class Cache implements Serializable { /** The maximum size of the cache. */ private int maxSize_ = 40; private static final Pattern DATE_HEADER_PATTERN = Pattern.compile("-?\\d+"); private static final long DELAY = 10 * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_MINUTE; /** * The map which holds the cached responses. Note that when keying on URLs, we key on the string version * of the URLs, rather than on the URLs themselves. This is done for performance, because a) the * {@link java.net.URL#hashCode()} method is synchronized, and b) the {@link java.net.URL#hashCode()} * method triggers DNS lookups of the URL hostnames' IPs. As of this writing, the HtmlUnit unit tests * run ~20% faster whey keying on strings rather than on {@link java.net.URL} instances. */ private final Map entries_ = Collections.synchronizedMap(new HashMap(maxSize_)); /** * A cache entry. */ private static class Entry implements Comparable, Serializable { private final String key_; private WebResponse response_; private Object value_; private long lastAccess_; private long createdAt_; Entry(final String key, final WebResponse response, final Object value) { key_ = key; response_ = response; value_ = value; createdAt_ = System.currentTimeMillis(); lastAccess_ = createdAt_; } /** * {@inheritDoc} */ @Override public int compareTo(final Entry other) { if (lastAccess_ < other.lastAccess_) { return -1; } if (lastAccess_ == other.lastAccess_) { return 0; } return 1; } /** * {@inheritDoc} */ @Override public boolean equals(final Object obj) { return obj instanceof Entry && lastAccess_ == ((Entry) obj).lastAccess_; } /** * {@inheritDoc} */ @Override public int hashCode() { return ((Long) lastAccess_).hashCode(); } /** * Updates the last access date. */ public void touch() { lastAccess_ = System.currentTimeMillis(); } } /** * Caches the specified object, if the corresponding request and response objects indicate * that it is cacheable. * * @param request the request corresponding to the specified compiled script * @param response the response corresponding to the specified compiled script * @param toCache the object that is to be cached, if possible (may be for instance a compiled script or * simply a WebResponse) * @return whether the response was cached or not */ public boolean cacheIfPossible(final WebRequest request, final WebResponse response, final Object toCache) { if (isCacheable(request, response)) { final URL url = response.getWebRequest().getUrl(); if (url == null) { return false; } final Entry entry = new Entry(UrlUtils.normalize(url), response, toCache); entries_.put(entry.key_, entry); deleteOverflow(); return true; } return false; } /** * Caches the parsed version of the specified CSS snippet. We key the cache based on CSS snippets (rather * than requests and responses as is done above) because a) this allows us to cache inline CSS, b) CSS is * extremely expensive to parse, so we want to avoid it as much as possible, c) CSS files aren't usually * nearly as large as JavaScript files, so memory bloat won't be too bad, and d) caching on requests and * responses requires checking dynamicity (see {@link #isCacheableContent(WebResponse)}), and headers often * aren't set up correctly, disallowing caching when in fact it should be allowed. * * @param css the CSS snippet from which styleSheet is derived * @param styleSheet the parsed version of css */ public void cache(final String css, final CSSStyleSheet styleSheet) { final Entry entry = new Entry(css, null, styleSheet); entries_.put(entry.key_, entry); deleteOverflow(); } /** * Truncates the cache to the maximal number of entries. */ protected void deleteOverflow() { synchronized (entries_) { while (entries_.size() > maxSize_) { final Entry oldestEntry = Collections.min(entries_.values()); entries_.remove(oldestEntry.key_); if (oldestEntry.response_ != null) { oldestEntry.response_.cleanUp(); } } } } /** * Determines if the specified response can be cached. * * @param request the performed request * @param response the received response * @return {@code true} if the response can be cached */ protected boolean isCacheable(final WebRequest request, final WebResponse response) { return HttpMethod.GET == response.getWebRequest().getHttpMethod() && isCacheableContent(response); } /** *

Perform prior validation for 'no-store' directive in Cache-Control header.

* *

Tries to guess if the content is dynamic or not.

* *

"Since origin servers do not always provide explicit expiration times, HTTP caches typically * assign heuristic expiration times, employing algorithms that use other header values (such as the * Last-Modified time) to estimate a plausible expiration time".

* *

The current implementation considers as dynamic content everything except responses with a * Last-Modified header with a date older than 10 minutes or with an Expires header * specifying expiration in more than 10 minutes.

* * @see RFC 7234 * @see RFC 2616 * @param response the response to examine * @return {@code true} if the response should be considered as cacheable */ protected boolean isCacheableContent(final WebResponse response) { if (HeaderUtils.containsNoStore(response)) { return false; } final Date lastModified = parseDateHeader(response, HttpHeader.LAST_MODIFIED); final Date expires = parseDateHeader(response, HttpHeader.EXPIRES); final long now = getCurrentTimestamp(); return expires != null && (expires.getTime() - now > DELAY) || (expires == null && lastModified != null && now - lastModified.getTime() > DELAY); } /** * Gets the current time stamp. As method to allow overriding it, when simulating an other time. * @return the current time stamp */ protected long getCurrentTimestamp() { return System.currentTimeMillis(); } /** * Parses and returns the specified date header of the specified response. This method * returns {@code null} if the specified header cannot be found or cannot be parsed as a date. * * @param response the response * @param headerName the header name * @return the specified date header of the specified response */ protected Date parseDateHeader(final WebResponse response, final String headerName) { final String value = response.getResponseHeaderValue(headerName); if (value == null) { return null; } final Matcher matcher = DATE_HEADER_PATTERN.matcher(value); if (matcher.matches()) { return new Date(); } return DateUtils.parseDate(value); } /** * Returns the cached response corresponding to the specified request. If there is * no corresponding cached object, this method returns {@code null}. * *

Calculates and check if object still fresh(RFC 7234) otherwise returns {@code null}.

* @see RFC 7234 * * @param request the request whose corresponding response is sought * @return the cached response corresponding to the specified request if any */ public WebResponse getCachedResponse(final WebRequest request) { final Entry cachedEntry = getCacheEntry(request); if (cachedEntry == null) { return null; } return cachedEntry.response_; } /** * Returns the cached object corresponding to the specified request. If there is * no corresponding cached object, this method returns {@code null}. * *

Calculates and check if object still fresh(RFC 7234) otherwise returns {@code null}.

* @see RFC 7234 * * @param request the request whose corresponding cached compiled script is sought * @return the cached object corresponding to the specified request if any */ public Object getCachedObject(final WebRequest request) { final Entry cachedEntry = getCacheEntry(request); if (cachedEntry == null) { return null; } return cachedEntry.value_; } private Entry getCacheEntry(final WebRequest request) { if (HttpMethod.GET != request.getHttpMethod()) { return null; } final URL url = request.getUrl(); if (url == null) { return null; } final Entry cachedEntry = entries_.get(UrlUtils.normalize(url)); if (cachedEntry == null) { return null; } // check if object still fresh if (checkFreshness(cachedEntry.response_, cachedEntry.createdAt_)) { synchronized (entries_) { cachedEntry.touch(); } return cachedEntry; } else { entries_.remove(UrlUtils.normalize(url)); } return null; } /** *

Check freshness return value if * a) s-maxage specified * b) max-age specified * c) expired specified * otherwise return {@code null}

* * @see RFC 7234 * * @param response * @param createdAt * @return freshnessLifetime */ private boolean checkFreshness(final WebResponse response, final long createdAt) { final long now = getCurrentTimestamp(); long freshnessLifetime = 0; if (!HeaderUtils.containsPrivate(response) && HeaderUtils.containsSMaxage(response)) { // check s-maxage freshnessLifetime = HeaderUtils.sMaxage(response); } else if (HeaderUtils.containsMaxAge(response)) { // check max-age freshnessLifetime = HeaderUtils.maxAge(response); } else if (response.getResponseHeaderValue(HttpHeader.EXPIRES) != null) { final Date expires = parseDateHeader(response, HttpHeader.EXPIRES); if (expires != null) { // use the same logic as in isCacheableContent() return expires.getTime() - now > DELAY; } } else { return true; } return now - createdAt < freshnessLifetime * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_SECOND; } /** * Returns the cached parsed version of the specified CSS snippet. If there is no * corresponding cached stylesheet, this method returns {@code null}. * * @param css the CSS snippet whose cached stylesheet is sought * @return the cached stylesheet corresponding to the specified CSS snippet */ public CSSStyleSheet getCachedStyleSheet(final String css) { final Entry cachedEntry = entries_.get(css); if (cachedEntry == null) { return null; } synchronized (entries_) { cachedEntry.touch(); } return (CSSStyleSheet) cachedEntry.value_; } /** * Returns the cache's maximum size. This is the maximum number of files that will * be cached. The default is 25. * * @return the cache's maximum size */ public int getMaxSize() { return maxSize_; } /** * Sets the cache's maximum size. This is the maximum number of files that will * be cached. The default is 25. * * @param maxSize the cache's maximum size (must be >= 0) */ public void setMaxSize(final int maxSize) { if (maxSize < 0) { throw new IllegalArgumentException("Illegal value for maxSize: " + maxSize); } maxSize_ = maxSize; deleteOverflow(); } /** * Returns the number of entries in the cache. * * @return the number of entries in the cache */ public int getSize() { return entries_.size(); } /** * Clears the cache. */ public void clear() { synchronized (entries_) { for (final Entry entry : entries_.values()) { if (entry.response_ != null) { entry.response_.cleanUp(); } } entries_.clear(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy