All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.spdx.utility.DownloadCache Maven / Gradle / Ivy

/**
 * Copyright (c) 2023 Peter Monks
 * 

* SPDX-License-Identifier: Apache-2.0 *

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.spdx.utility; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.Reader; import java.io.Writer; import java.net.HttpURLConnection; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.time.Instant; import java.time.ZoneOffset; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.Base64; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Objects; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.spdx.Configuration; /** * This singleton class provides a flexible download cache for the rest of the library. If enabled, URLs that are * requested using this class will have their content automatically cached locally on disk (in a directory that adheres * to the XDG Base Directory Specification - ...), * and any subsequent requests will be served out of that cache. Cache entries will also be automatically checked every * so often for staleness using HTTP ETag requests (which are more efficient than full HTTP requests). The interval * between such checks is configurable (and can even be turned off, which makes every download request re-check the URL * for staleness). *

* The cache is configured via these Configuration options: * * org.spdx.storage.listedlicense.enableCache: * Controls whether the cache is enabled or not. Defaults to false i.e. the cache is disabled. * * org.spdx.storage.listedlicense.cacheCheckIntervalSecs: * How many seconds should the cache wait between issuing ETag requests to determine whether cached content is * stale? Defaults to 86,400 seconds (24 hours). */ public final class DownloadCache { private static final Logger logger = LoggerFactory.getLogger(DownloadCache.class); private static final int READ_TIMEOUT = 5000; private static final int IO_BUFFER_SIZE = 8192; private static final long DEFAULT_CACHE_CHECK_INTERVAL_SECS = 86400; // 24 hours, in seconds static final List WHITE_LIST = Collections.unmodifiableList(Arrays.asList( "spdx.org", "spdx.dev", "spdx.com", "spdx.info")); // Allowed host names for the SPDX listed licenses private static DownloadCache singleton; // See https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html private final String cacheDir = ((System.getenv("XDG_CACHE_HOME") == null || System.getenv("XDG_CACHE_HOME").trim().isEmpty()) ? System.getProperty("user.home") + File.separator + ".cache" : System.getenv("XDG_CACHE_HOME")) + File.separator + "Spdx-Java-Library"; private static final String CONFIG_PROPERTY_CACHE_ENABLED = "org.spdx.downloadCacheEnabled"; private static final String CONFIG_PROPERTY_CACHE_CHECK_INTERVAL_SECS = "org.spdx.downloadCacheCheckIntervalSecs"; private final boolean cacheEnabled; private final long cacheCheckIntervalSecs; private final DateTimeFormatter iso8601 = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.000'Z'").withZone(ZoneOffset.UTC); /** * This class is a singleton - use getInstance() to obtain the instance. */ private DownloadCache() { boolean tmpCacheEnabled = Boolean.parseBoolean(Configuration.getInstance().getProperty(CONFIG_PROPERTY_CACHE_ENABLED, "false")); if (tmpCacheEnabled) { try { final File cacheDirectory = new File(cacheDir); Files.createDirectories(cacheDirectory.toPath()); } catch (IOException ioe) { logger.warn("Unable to create cache directory '{}'; continuing with cache disabled.", cacheDir, ioe); tmpCacheEnabled = false; } } cacheEnabled = tmpCacheEnabled; long tmpCacheCheckIntervalSecs = DEFAULT_CACHE_CHECK_INTERVAL_SECS; try { tmpCacheCheckIntervalSecs = Long.parseLong(Configuration.getInstance().getProperty(CONFIG_PROPERTY_CACHE_CHECK_INTERVAL_SECS)); } catch(NumberFormatException nfe) { // Ignore parse failures - in this case we use the default value of 24 hours } cacheCheckIntervalSecs = tmpCacheCheckIntervalSecs; } /** * @return The singleton instance of the DownloadCache class. */ public static DownloadCache getInstance() { if (singleton == null) { singleton = new DownloadCache(); } return singleton; } /** * Recursively removes a directory. USE WITH CAUTION! * * @param dir The directory to delete. * @throws IOException on IO error */ private static void rmdir(final File dir) throws IOException { if (Objects.isNull(dir) || !dir.exists()) { return; } File[] contents = dir.listFiles(); if (Objects.nonNull(contents)) { for (final File f : contents) { rmdir(f); } } Files.delete(dir.toPath()); } /** * Resets (deletes) the local cache * @throws IOException on IO error */ public void resetCache() throws IOException { final File cacheDirectory = new File(cacheDir); rmdir(cacheDirectory); Files.createDirectories(cacheDirectory.toPath()); } /** * @param url The URL to get an input stream for. Note that redirects issued by this url are restricted to known * SPDX hosts. Redirects to other hosts will cause an IOException to be thrown. * @return An InputStream for url, or null if url is null. Note that this InputStream may be of different concrete * types, depending on whether the content is being served out of cache or not. * @throws IOException When an IO error of some kind occurs. */ public InputStream getUrlInputStream(final URL url) throws IOException { return getUrlInputStream(url, true); } /** * @param url The URL to get an input stream for. * @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX * hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF! * @return An InputStream for url, or null if url is null. Note that this InputStream may be of different concrete * types, depending on whether the content is being served out of cache or not. * @throws IOException When an IO error of some kind occurs. */ public InputStream getUrlInputStream(final URL url, final boolean restrictRedirects) throws IOException { InputStream result = null; if (url != null) { if (cacheEnabled) { result = getUrlInputStreamThroughCache(url, restrictRedirects); } else { result = getUrlInputStreamDirect(url, restrictRedirects); } } return result; } /** * @param url The URL to get an input stream for, ignoring the local cache. * @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX * hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF! * @return An InputStream for url, or null if url is null. * @throws IOException When an IO error of some kind occurs. */ private InputStream getUrlInputStreamDirect(URL url, boolean restrictRedirects) throws IOException { InputStream result; HttpURLConnection connection = (HttpURLConnection)url.openConnection(); connection.setReadTimeout(READ_TIMEOUT); final URL redirectUrl = processPossibleRedirect(connection, restrictRedirects); if (redirectUrl != null) { url = redirectUrl; connection = (HttpURLConnection)redirectUrl.openConnection(); connection.setReadTimeout(READ_TIMEOUT); } final int status = connection.getResponseCode(); if (status == HttpURLConnection.HTTP_OK) { result = connection.getInputStream(); } else { throw new IOException("Unexpected HTTP status code from " + url + ": " + status); } return result; } /** * @param url The URL to get an input stream for, leveraging the local cache. * @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX * hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF! * @return An InputStream for url, or null if url is null. Note that this InputStream may be of different concrete * types, depending on whether the content is being served out of cache or not. * @throws IOException When an IO error of some kind occurs. */ private InputStream getUrlInputStreamThroughCache(final URL url, boolean restrictRedirects) throws IOException { final String cacheKey = base64Encode(url); final File cachedFile = new File(cacheDir, cacheKey); final File cachedMetadataFile = new File(cacheDir, cacheKey + ".metadata.json"); if (cachedFile.exists() && cachedMetadataFile.exists()) { try { checkCache(url, restrictRedirects); } catch (IOException ioe) { // We know we have a locally cached file here, so if we happen to get an exception we can safely ignore // it and fall back on the (possibly stale) cached content file. This makes the code more robust in the // presence of network errors when the cache has previously been populated. } } else { cacheMiss(url, restrictRedirects); } // At this point the cached file definitely exists return new BufferedInputStream(Files.newInputStream(cachedFile.toPath())); } /** * Checks the cache for content from the given url, and brings the cached content up to date if it's stale. * @param url The url to check. * @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX * hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF! * @throws IOException When an IO error of some kind occurs. */ private void checkCache(final URL url, boolean restrictRedirects) throws IOException { final String cacheKey = base64Encode(url); final File cachedMetadataFile = new File(cacheDir, cacheKey + ".metadata.json"); final HashMap cachedMetadata = readMetadataFile(cachedMetadataFile); if (cachedMetadata != null) { final Instant lastChecked = parseISO8601String(cachedMetadata.get("lastChecked")); final long difference = lastChecked != null ? Math.abs(ChronoUnit.SECONDS.between(Instant.now(), lastChecked)) : Long.MAX_VALUE; if (difference > cacheCheckIntervalSecs) { // It's been a while since we checked the cached download of this URL for staleness, so make an ETag request logger.debug("Cache check interval exceeded; checking for updates to {}", url); final String eTag = cachedMetadata.get("eTag"); final HttpURLConnection connection = (HttpURLConnection) url.openConnection(); connection.setReadTimeout(READ_TIMEOUT); connection.setRequestProperty("If-None-Match", eTag); final int status = connection.getResponseCode(); if (status != HttpURLConnection.HTTP_NOT_MODIFIED) { // The content of the URL has changed, which we handle the same as a cache miss (i.e. we re-download // the content, and write a new metadata file from scratch) cacheMiss(url, connection, restrictRedirects); } else { // The content hasn't changed, so just update the lastChecked metadata but otherwise do nothing logger.debug("Cache hit for {}", url); cachedMetadata.put("lastChecked", iso8601.format(Instant.now())); writeMetadataFile(cachedMetadataFile, cachedMetadata); } } else { // We checked recently, so don't need to do anything - the cached content will be used logger.debug("Within cache check interval; skipping check of updates to {}", url); } } else { // Metadata doesn't exist - treat it as a cache miss cacheMiss(url, restrictRedirects); } } /** * Process a cache miss, which involves downloading the content from the given url, and writing out an associated * metadata file (in JSON format) containing sufficient information for the cache to check for staleness in the * future. * @param connection The open HTTP connection to download and cache. * @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX * hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF! * @throws IOException When an IO error of some kind occurs. */ private void cacheMiss(URL url, HttpURLConnection connection, boolean restrictRedirects) throws IOException { logger.debug("Cache miss for {}", url); final URL redirectUrl = processPossibleRedirect(connection, restrictRedirects); if (redirectUrl != null) { url = redirectUrl; connection = (HttpURLConnection)redirectUrl.openConnection(); } final int status = connection.getResponseCode(); if (status == HttpURLConnection.HTTP_OK) { final String cacheKey = base64Encode(url); final File cachedFile = new File(cacheDir, cacheKey); writeContentFile(connection.getInputStream(), cachedFile); final File cachedMetadataFile = new File(cacheDir, cacheKey + ".metadata.json"); final HashMap metadata = new HashMap<>(); metadata.put("eTag", connection.getHeaderField("ETag")); metadata.put("downloadedAt", iso8601.format(Instant.now())); metadata.put("lastChecked", iso8601.format(Instant.now())); metadata.put("sourceUrl", url.toString()); writeMetadataFile(cachedMetadataFile, metadata); } else { throw new IOException("Unexpected HTTP status code from " + url.toString() + ": " + status); } } /** * Process a cache miss, which involves downloading the content from the given url, and writing out an associated * metadata file (in JSON format) containing sufficient information for the cache to check for staleness in the * future. * @param url The url to download and cache. * @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX * hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF! * @throws IOException When an IO error of some kind occurs. */ private void cacheMiss(final URL url, boolean restrictRedirects) throws IOException { final HttpURLConnection connection = (HttpURLConnection)url.openConnection(); connection.setReadTimeout(READ_TIMEOUT); cacheMiss(url, connection, restrictRedirects); } /** * Processes an HTTP redirect (if any) returned by the given connection, returning the URL * @param connection The connection to check for a redirect. * @param restrictRedirects A flag that controls whether redirects returned by url are restricted to known SPDX * hosts or not. Defaults to true. USE EXTREME CAUTION WHEN TURNING THIS OFF! * @return The redirect URL, or null if there wasn't one. * @throws IOException When an IO error of some kind occurs. */ private URL processPossibleRedirect(final HttpURLConnection connection, final boolean restrictRedirects) throws IOException { URL result = null; final int status = connection.getResponseCode(); if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_SEE_OTHER) { // redirect final String redirectUrlStr = connection.getHeaderField("Location"); if (Objects.isNull(redirectUrlStr) || redirectUrlStr.isEmpty()) { throw new IOException("Empty redirect URL response"); } try { result = new URL(redirectUrlStr); } catch (Exception ex) { throw new IOException("Invalid redirect URL", ex); } if (!result.getProtocol().toLowerCase().startsWith("http")) { throw new IOException("Invalid redirect protocol"); } if (restrictRedirects && !WHITE_LIST.contains(result.getHost())) { throw new IOException("Invalid redirect host - not on the allowed 'white list'"); } } return result; } /** * Reads a metadata file out of local cache. * @param metadataFile The metadata file to read. * @return The metadata read from the file, or null if the file doesn't exist or there was an error while reading * it. */ private HashMap readMetadataFile(final File metadataFile) { HashMap result; try { final Reader r = new BufferedReader(new FileReader(metadataFile)); result = new Gson().fromJson(r, new TypeToken>(){}.getType()); } catch (IOException ioe) { result = null; // Treat metadata read errors as a cache miss } return result; } /** * Writes a metadata file to the local cache. * @param metadataFile The metadata file to write. Note: if it already exists it will be silently overwritten. * @param metadata The metadata to write to the file. * @throws IOException When an IO error of some kind occurs. */ private void writeMetadataFile(final File metadataFile, HashMap metadata) throws IOException { try (final Writer w = new BufferedWriter(new FileWriter(metadataFile))) { new Gson().toJson(metadata, new TypeToken>(){}.getType(), w); w.flush(); } } /** * Writes a content file to the local cache. * @param is The InputStream to read the content from. Note: this InputStream must be open at the time this * method is called, and will be fully consumed and closed by this method. * @param cachedFile The content file to write to. Note: if it already exists it will be silently overwritten. * @throws IOException When an IO error of some kind occurs. */ private void writeContentFile(final InputStream is, final File cachedFile) throws IOException { try (final OutputStream cacheFileOutputStream = new BufferedOutputStream(Files.newOutputStream(cachedFile.toPath()))) { byte[] ioBuffer = new byte[IO_BUFFER_SIZE]; int length; while ((length = is.read(ioBuffer)) != -1) { cacheFileOutputStream.write(ioBuffer, 0, length); } cacheFileOutputStream.flush(); } is.close(); } /** * Attempts to parse s as if it were an ISO8601 formatted String. * @param s The string to attempt to parse. * @return The Instant for that ISO8601 value if parsing succeeded, or null if it didn't. */ private Instant parseISO8601String(final String s) { Instant result = null; if (s != null) { try { result = Instant.parse(s); } catch (final DateTimeParseException dtpe) { //noinspection DataFlowIssue result = null; } } return result; } /** * @param s The String to BASE64 encode. * @return The BASE64 encoding of s (as UTF-8). */ private String base64Encode(final String s) { String result = null; if (s != null) { result = Base64.getEncoder().encodeToString(s.getBytes(StandardCharsets.UTF_8)); } return result; } /** * @param u The URL to BASE64 encode. * @return The BASE64 encoding of u (as a UTF-8 encoded String). */ private String base64Encode(final URL u) { String result = null; if (u != null) { result = base64Encode(u.toString()); } return result; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy