org.htmlunit.Cache Maven / Gradle / Ivy
Show all versions of xlt Show documentation
/*
* Copyright (c) 2002-2024 Gargoyle Software Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.htmlunit;
import java.io.Serializable;
import java.net.URL;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.htmlunit.cssparser.dom.CSSStyleSheetImpl;
import org.htmlunit.httpclient.HttpClientConverter;
import org.htmlunit.util.HeaderUtils;
import org.htmlunit.util.UrlUtils;
/**
* Simple cache implementation which caches compiled JavaScript files and parsed CSS snippets. Caching
* compiled JavaScript files avoids unnecessary web requests and additional compilation overhead, while
* caching parsed CSS snippets avoids very expensive CSS parsing.
*
* @author Marc Guillemot
* @author Daniel Gredler
* @author Ahmed Ashour
* @author Anton Demydenko
* @author Ronald Brill
* @author Ashley Frieze
*/
public class Cache implements Serializable {
/** The maximum size of the cache. */
private int maxSize_ = 40;
private static final Pattern DATE_HEADER_PATTERN = Pattern.compile("-?\\d+");
static final long DELAY = 10 * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_MINUTE;
// for taking ten percent of a number in milliseconds and converting that to the amount in seconds
private static final double TEN_PERCENT_OF_MILLISECONDS_IN_SECONDS = 0.0001;
/**
* The map which holds the cached responses. Note that when keying on URLs, we key on the string version
* of the URLs, rather than on the URLs themselves. This is done for performance, because a) the
* {@link java.net.URL#hashCode()} method is synchronized, and b) the {@link java.net.URL#hashCode()}
* method triggers DNS lookups of the URL hostnames' IPs. As of this writing, the HtmlUnit unit tests
* run ~20% faster whey keying on strings rather than on {@link java.net.URL} instances.
*/
private final Map entries_ = Collections.synchronizedMap(new HashMap<>(maxSize_));
/**
* A cache entry.
*/
private static class Entry implements Comparable, Serializable {
private final String key_;
private final WebResponse response_;
private final Object value_;
private long lastAccess_;
private final long createdAt_;
Entry(final String key, final WebResponse response, final Object value) {
key_ = key;
response_ = response;
value_ = value;
createdAt_ = System.currentTimeMillis();
lastAccess_ = createdAt_;
}
/**
* {@inheritDoc}
*/
@Override
public int compareTo(final Entry other) {
return Long.compare(lastAccess_, other.lastAccess_);
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(final Object obj) {
return obj instanceof Entry && lastAccess_ == ((Entry) obj).lastAccess_;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
return ((Long) lastAccess_).hashCode();
}
/**
* Updates the last access date.
*/
public void touch() {
lastAccess_ = System.currentTimeMillis();
}
/**
* Is this cached entry still fresh?
* @param now the current time
* @return true
if can keep in the cache
* @see #isWithinCacheWindow(WebResponse, long, long)
*/
boolean isStillFresh(final long now) {
return Cache.isWithinCacheWindow(response_, now, createdAt_);
}
}
/**
* Find expiry time using
* a) s-maxage specified
* b) max-age specified
* c) expired specified
* d) A Last-Update is specified and the time is now within 10% of the difference between download time and update
* time
*
* @see RFC 7234
*
* @param response {@link WebResponse}
* @param now the current time
* @param createdAt when the request was downloaded
* @return true if still fresh
*/
static boolean isWithinCacheWindow(final WebResponse response, final long now, final long createdAt) {
long freshnessLifetime = 0;
if (!HeaderUtils.containsPrivate(response) && HeaderUtils.containsSMaxage(response)) {
// check s-maxage
freshnessLifetime = HeaderUtils.sMaxage(response);
}
else if (HeaderUtils.containsMaxAge(response)) {
// check max-age
freshnessLifetime = HeaderUtils.maxAge(response);
}
else if (response.getResponseHeaderValue(HttpHeader.EXPIRES) != null) {
final Date expires = parseDateHeader(response, HttpHeader.EXPIRES);
if (expires != null) {
// use the same logic as in isCacheableContent()
return expires.getTime() - now > DELAY;
}
}
else if (response.getResponseHeaderValue(HttpHeader.LAST_MODIFIED) != null) {
final Date lastModified = parseDateHeader(response, HttpHeader.LAST_MODIFIED);
if (lastModified != null) {
freshnessLifetime = (long) ((createdAt - lastModified.getTime())
* TEN_PERCENT_OF_MILLISECONDS_IN_SECONDS);
}
}
return now - createdAt < freshnessLifetime * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_SECOND;
}
/**
* Caches the specified object, if the corresponding request and response objects indicate
* that it is cacheable.
*
* @param request the request corresponding to the specified compiled script
* @param response the response corresponding to the specified compiled script
* @param toCache the object that is to be cached, if possible (may be for instance a compiled script or
* simply a WebResponse)
* @return whether the response was cached or not
*/
public boolean cacheIfPossible(final WebRequest request, final WebResponse response, final Object toCache) {
if (isCacheable(request, response)) {
final URL url = request.getUrl();
if (url == null) {
return false;
}
final Entry entry = new Entry(UrlUtils.normalize(url), response, toCache);
entries_.put(entry.key_, entry);
deleteOverflow();
return true;
}
return false;
}
/**
* Caches the parsed version of the specified CSS snippet. We key the cache based on CSS snippets (rather
* than requests and responses as is done above) because a) this allows us to cache inline CSS, b) CSS is
* extremely expensive to parse, so we want to avoid it as much as possible, c) CSS files aren't usually
* nearly as large as JavaScript files, so memory bloat won't be too bad, and d) caching on requests and
* responses requires checking dynamically (see {@link #isCacheableContent(WebResponse)}), and headers often
* aren't set up correctly, disallowing caching when in fact it should be allowed.
*
* @param css the CSS snippet from which styleSheet
is derived
* @param styleSheet the parsed version of css
*/
public void cache(final String css, final CSSStyleSheetImpl styleSheet) {
final Entry entry = new Entry(css, null, styleSheet);
entries_.put(entry.key_, entry);
deleteOverflow();
}
/**
* Truncates the cache to the maximal number of entries.
*/
protected void deleteOverflow() {
synchronized (entries_) {
while (entries_.size() > maxSize_) {
final Entry oldestEntry = Collections.min(entries_.values());
entries_.remove(oldestEntry.key_);
if (oldestEntry.response_ != null) {
oldestEntry.response_.cleanUp();
}
}
}
}
/**
* Determines if the specified response can be cached.
*
* @param request the performed request
* @param response the received response
* @return {@code true} if the response can be cached
*/
protected boolean isCacheable(final WebRequest request, final WebResponse response) {
return HttpMethod.GET == response.getWebRequest().getHttpMethod()
&& UrlUtils.URL_ABOUT_BLANK != request.getUrl()
&& isCacheableContent(response);
}
/**
* Perform prior validation for 'no-store' directive in Cache-Control header.
*
* Tries to guess if the content is dynamic or not.
*
* "Since origin servers do not always provide explicit expiration times, HTTP caches typically
* assign heuristic expiration times, employing algorithms that use other header values (such as the
* Last-Modified
time) to estimate a plausible expiration time".
*
* The current implementation considers as dynamic content everything except responses with a
* Last-Modified
header with a date older than 10 minutes or with an Expires
header
* specifying expiration in more than 10 minutes.
*
* @see RFC 7234
* @see RFC 2616
* @param response the response to examine
* @return {@code true} if the response should be considered as cacheable
*/
protected boolean isCacheableContent(final WebResponse response) {
if (HeaderUtils.containsNoStore(response)) {
return false;
}
final long now = getCurrentTimestamp();
return isWithinCacheWindow(response, now, now);
}
/**
* Gets the current time stamp. As method to allow overriding it, when simulating another time.
* @return the current time stamp
*/
protected long getCurrentTimestamp() {
return System.currentTimeMillis();
}
/**
* Parses and returns the specified date header of the specified response. This method
* returns {@code null} if the specified header cannot be found or cannot be parsed as a date.
*
* @param response the response
* @param headerName the header name
* @return the specified date header of the specified response
*/
protected static Date parseDateHeader(final WebResponse response, final String headerName) {
final String value = response.getResponseHeaderValue(headerName);
if (value == null) {
return null;
}
final Matcher matcher = DATE_HEADER_PATTERN.matcher(value);
if (matcher.matches()) {
return new Date();
}
return HttpClientConverter.parseHttpDate(value);
}
/**
* Returns the cached response corresponding to the specified request. If there is
* no corresponding cached object, this method returns {@code null}.
*
* Calculates and check if object still fresh(RFC 7234) otherwise returns {@code null}.
* @see RFC 7234
*
* @param request the request whose corresponding response is sought
* @return the cached response corresponding to the specified request if any
*/
public WebResponse getCachedResponse(final WebRequest request) {
final Entry cachedEntry = getCacheEntry(request);
if (cachedEntry == null) {
return null;
}
return cachedEntry.response_;
}
/**
* Returns the cached object corresponding to the specified request. If there is
* no corresponding cached object, this method returns {@code null}.
*
* Calculates and check if object still fresh(RFC 7234) otherwise returns {@code null}.
* @see RFC 7234
*
* @param request the request whose corresponding cached compiled script is sought
* @return the cached object corresponding to the specified request if any
*/
public Object getCachedObject(final WebRequest request) {
final Entry cachedEntry = getCacheEntry(request);
if (cachedEntry == null) {
return null;
}
return cachedEntry.value_;
}
private Entry getCacheEntry(final WebRequest request) {
if (HttpMethod.GET != request.getHttpMethod()) {
return null;
}
final URL url = request.getUrl();
if (url == null) {
return null;
}
final String normalizedUrl = UrlUtils.normalize(url);
final Entry cachedEntry = entries_.get(normalizedUrl);
if (cachedEntry == null) {
return null;
}
if (cachedEntry.isStillFresh(getCurrentTimestamp())) {
synchronized (entries_) {
cachedEntry.touch();
}
return cachedEntry;
}
entries_.remove(UrlUtils.normalize(url));
return null;
}
/**
* Returns the cached parsed version of the specified CSS snippet. If there is no
* corresponding cached stylesheet, this method returns {@code null}.
*
* @param css the CSS snippet whose cached stylesheet is sought
* @return the cached stylesheet corresponding to the specified CSS snippet
*/
public CSSStyleSheetImpl getCachedStyleSheet(final String css) {
final Entry cachedEntry = entries_.get(css);
if (cachedEntry == null) {
return null;
}
synchronized (entries_) {
cachedEntry.touch();
}
return (CSSStyleSheetImpl) cachedEntry.value_;
}
/**
* Returns the cache's maximum size. This is the maximum number of files that will
* be cached. The default is 25
.
*
* @return the cache's maximum size
*/
public int getMaxSize() {
return maxSize_;
}
/**
* Sets the cache's maximum size. This is the maximum number of files that will
* be cached. The default is 25
.
*
* @param maxSize the cache's maximum size (must be >= 0)
*/
public void setMaxSize(final int maxSize) {
if (maxSize < 0) {
throw new IllegalArgumentException("Illegal value for maxSize: " + maxSize);
}
maxSize_ = maxSize;
deleteOverflow();
}
/**
* Returns the number of entries in the cache.
*
* @return the number of entries in the cache
*/
public int getSize() {
return entries_.size();
}
/**
* Clears the cache.
*/
public void clear() {
synchronized (entries_) {
for (final Entry entry : entries_.values()) {
if (entry.response_ != null) {
entry.response_.cleanUp();
}
}
entries_.clear();
}
}
/**
* Removes outdated entries from the cache.
*/
public void clearOutdated() {
synchronized (entries_) {
final long now = getCurrentTimestamp();
entries_.entrySet().removeIf(entry -> entry.getValue().response_ == null
|| !entry.getValue().isStillFresh(now));
}
}
}