All Downloads are FREE. Search and download functionalities are using the official Maven repository.

javax0.jamal.tools.Cache Maven / Gradle / Ivy

There is a newer version: 2.8.1
Show newest version
package javax0.jamal.tools;

import javax0.jamal.api.EnvironmentVariables;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;

/**
 * A cache implementation that can store the strings downloaded from certain https addresses in files.
 * The cache elements never expire by default.
 * The assumption is that the resources downloaded from a web page are versioned, and the url contains the version number.
 * 

* URLs containing the literal {@code SNAPSHOT} are not cached. * In all other cases, we assume resources NEVER change. *

* The structure of the files and directories were influenced by the structure of the Maven local repository. *

* The cached files are stored under the directory {@code ~/.jamal/cache/}. * In this directory there are two subdirectories {@code https/} and {@code properties/}. * The directory {@code https/} contains the cached files. * The directory {@code properties/} contain properties files. * The properties contain certain information about the file in the cache. *

* You can set certain entries to expire adding a {@code ttl} value to the properties file. *

* When a ttl value is badly formatter, not a number, it is ignored. * The reason to ignore such an error instead of throwing an exception is that the cache is not a critical part of the system. * Processing should go on even when the handcrafted time-to-live values are erroneous. * */ public class Cache { /** * A cache entry. * It contains the content File and the properties File and the Properties object. * The properties are loaded when the content is requested by the caller. * There is no method to query the properties. *

* The properties are inherited from the containing directories. * The close to the actual entry overwrites the farther elements. * Properties contain time values, e.g., a cache element was written last time and also time to live value. */ public static class Entry { /** * The entry file. */ private final File file; /** * The properties file of the entry. */ private final File propertiesFile; /** * Contains all the properties from the properties file of the entry. * They are lazy loaded when the content is requested. */ private final Properties properties; /** * Contains all the properties from the properties file of the entry and also the properties from the * {@code .properties} files in the directories up to the cache root directory. */ private final Properties effectiveProperties; /** * {@code true} if the properties file is a flat file, {@code false} if the properties are to be inherited from * the directories up to the cache root directory. */ private final boolean flatProperties; /** * {@code true} if the properties are loaded. * Used to load the properties only once and only when they are needed. */ private boolean propertiesLoaded = false; private Entry(File file, File propertiesFile) { this(file, propertiesFile, false); } /** * Create a new entry for the cache. *

* NOTE: Flat is implemented, not tested and not used. * * @param file the file that contains the content * @param propertiesFile the file that contains the properties * @param flat {@code true} if the properties file is a flat file, {@code false} if the properties * are to be inherited from the directories up to the cache root directory. *

*/ public Entry(File file, File propertiesFile, boolean flat) { this.file = file; this.propertiesFile = propertiesFile; this.properties = new Properties(); this.effectiveProperties = new Properties(); this.flatProperties = flat; } /** * Check if the entry is a cache miss. *

* It is a miss if * *

    *
  • the file does not exist *
  • the file exists but expired *
* * @return {@code true} if the file is not in the cache. */ public boolean isMiss() { if (!file.exists()) { return true; } final var ttl = getProperty("ttl"); if (ttl == null) { return false; } final var write = getProperty("write"); return expiration(ttl, write) < System.currentTimeMillis(); } private long expiration(final String ttl, final String write) { final var ttlMillis = parseTtl(ttl); long writeMillis = 0L; try { writeMillis = Long.parseLong(write); } catch (NumberFormatException e) { // } return writeMillis + ttlMillis; } /** * Parse the ttl string and return the number of seconds that the ttl means. *

* A time to live string can contain numbers with units. * The units can be *

    *
  • {@code Y} meaning years
  • *
  • {@code M} meaning months, 31 days exactly
  • *
  • {@code w} meaning weeks
  • *
  • {@code d} meaning days
  • *
  • {@code h} meaning hours
  • *
  • {@code m} meaning minutes
  • *
  • {@code s} meaning seconds
  • *
* The units MUST be used in this order. * The number {@code 2Y3M5s} is correct, and it means 2 years, 3 months and 5 seconds. * On the other hand {@code 2Y5s3M} is not correct, and it will result zero TTL value. * * @param ttl the time to live string * @return the number of seconds the ttl string means */ public static long parseTtl(final String ttl) { final var sb = new StringBuilder(ttl); long seconds = 0L; try { seconds += chopSeconds(sb, "y", 365 * 24 * 60 * 60); seconds += chopSeconds(sb, "M", 31 * 24 * 60 * 60); seconds += chopSeconds(sb, "w", 7 * 24 * 60 * 60); seconds += chopSeconds(sb, "d", 24 * 60 * 60); seconds += chopSeconds(sb, "h", 60 * 60); seconds += chopSeconds(sb, "m", 60); seconds += chopSeconds(sb, "s", 1); final var value = sb.toString().trim(); if (!value.isEmpty()) { seconds += Long.parseLong(value); } } catch (NumberFormatException e) { return 0L; } return seconds; } /** * Find the unit in the string and take the number before it. * Calculate the number of seconds that number means with the given unit. *

* Delete the number and the unit from the string. * * @param sb string builder that contains the number and the unit * @param unit the unit to look for * @param seconds the number of seconds that the unit means * @return the number of seconds that the number and the unit means */ private static long chopSeconds(StringBuilder sb, String unit, long seconds) throws NumberFormatException { final var index = sb.indexOf(unit); if (index == -1) { return 0; } final var value = sb.substring(0, index).trim(); sb.delete(0, index + 1); return Long.parseLong(value) * seconds; } /** * @return the content of the cached file or {@code null} if the file is not in the cache or cannot be read. */ public StringBuilder getContent() { try { assertPropertiesAreLoaded(); if (file.exists()) { properties.put("read", "" + System.currentTimeMillis()); properties.put("read_formatted", now()); properties.put("count", "" + (Integer.parseInt(Optional.ofNullable((String) properties.get("count")).orElse("0")) + 1)); saveProperties(); return CachedHttpInput.readBufferedReader(getBufferedReader(file)); } else { return null; } } catch (IOException ioex) { return null; } } public byte[] getBinaryContent() { try { assertPropertiesAreLoaded(); if (file.exists()) { properties.put("read", "" + System.currentTimeMillis()); properties.put("read_formatted", now()); properties.put("count", "" + (Integer.parseInt(Optional.ofNullable((String) properties.get("count")).orElse("0")) + 1)); saveProperties(); final var buffer = new ByteArrayOutputStream(); try (final var is = new FileInputStream(file)) { is.transferTo(buffer); buffer.close(); return buffer.toByteArray(); } } else { return null; } } catch (IOException ioex) { return null; } } /** * Get the property value from the properties file of the entry. * If the properties were not read then they will be read and loaded. * The loading is done only once and only when the properties are needed. * The loading reads the properties file and also the {@code .properties} files in the directories up to the * cache root directory, unless the enty is flat. * * @param key the key of the property * @return the value of the property or {@code null} if the property is not found or the properties cannot be * read. */ public String getProperty(String key) { try { assertPropertiesAreLoaded(); return effectiveProperties.getProperty(key); } catch (IOException ignored) { return null; } } /** * Load the properties from the properties file. The properties are loaded only once and only when they are * needed. */ private void assertPropertiesAreLoaded() throws IOException { if (!propertiesLoaded) { if (propertiesFile.exists()) { properties.load(new FileInputStream(propertiesFile)); } propertiesLoaded = true; if (!flatProperties) { effectiveProperties.putAll(collectEffectiveProperties(propertiesFile.getParentFile())); } effectiveProperties.putAll(properties); } } /** * Collect the properties from the {@code .properties} files in the directories up to the cache root directory. * * @param directory the directory from where to start collecting the properties * @return the properties collected from the {@code .properties} files in the directories up to the cache root */ private Properties collectEffectiveProperties(final File directory) { final var properties = new Properties(); if (!directory.getParentFile().equals(CACHE_ROOT_DIRECTORY)) { properties.putAll(collectEffectiveProperties(directory.getParentFile())); } final var dotPropertiesFile = new File(directory, ".properties"); if (dotPropertiesFile.exists()) { try { final var localProperties = new Properties(); localProperties.load(new FileInputStream(dotPropertiesFile)); properties.putAll(localProperties); } catch (IOException e) { // } } return properties; } /** * @return the current date and time formatted. Used to record the time in the properties file human readable. */ private static String now() { SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS Z"); return format.format(new Date()); } /** * See the documentation of {@link #save(String, Map[])} * * @param content to be saved into the cache file * @return the content itself */ StringBuilder save(StringBuilder content) { save(content.toString()); return content; } /** * Save the given content into a cache file. The saving may fail. In that case the failure will be silent and * does not throw an exception. This is designed that way not to prevent operation in case of a wrongly * configured cache. In that case, the file will be downloaded each time instead of using the cache, but Jamal * will still work. * * @param content to be saved into the cache file * @param maps contains the key value pairs that will be saved into the cache properties file */ @SafeVarargs public final void save(String content, Map... maps) { save(content.getBytes(StandardCharsets.UTF_8), maps); } public final byte[] save(byte[] content, Map... maps) { if (cacheExists()) { try { assertPropertiesAreLoaded(); properties.put("write", "" + System.currentTimeMillis()); properties.put("write_formatted", now()); for (final var map : maps) { properties.putAll(map); } saveProperties(); //noinspection ResultOfMethodCallIgnored file.getParentFile().mkdirs(); try (final var fos = new FileOutputStream(file)) { fos.write(content); } } catch (IOException ignore) { } } return content; } /** * Save the properties. In case there is an error then silently ignore it. For reasoning see {@link * #save(StringBuilder)}. Do not call this method. It is called from {@link #save(StringBuilder)}. (It is * private after all.) */ private void saveProperties() { //noinspection ResultOfMethodCallIgnored propertiesFile.getParentFile().mkdirs(); try { properties.store(new FileOutputStream(propertiesFile), " cache parameters of the entry " + file.getAbsolutePath()); } catch (IOException ignore) { } } } private static final String DEFAULT_CACHE_ROOT = "~/.jamal/cache/"; private static final String SNAPSHOT = "SNAPSHOT"; static final Entry NO_CACHE = new Entry(NonexistentFile.INSTANCE, NonexistentFile.INSTANCE); private /*non-final for test*/ static File CACHE_ROOT_DIRECTORY = new File( EnvironmentVariables.getenv(EnvironmentVariables.JAMAL_HTTPS_CACHE_ENV) .or(() -> Optional.of(DEFAULT_CACHE_ROOT)).map(FileTools::adjustedFileName).get()); /** * Get a cache entry for the given URL. * * @param url is the URL that the entry represents. This is the URL from where the original content was downloaded. * @return an entry. If there is no entry configured then it returns a pseudo entry that says that he entry is not * found. This same entry is returned in case the url contains the string {@code SNAPSHOT} all upper case. */ public static Entry getEntry(URL url) { if (!Cache.cacheExists() || url.toString().contains(SNAPSHOT)) { return Cache.NO_CACHE; } final var fn = convertUrl2FN(url); final var propfile = new StringBuilder(fn); fn.insert(0, "/https/"); fn.insert(0, CACHE_ROOT_DIRECTORY.getAbsolutePath()); propfile.insert(0, "/properties/"); propfile.insert(0, CACHE_ROOT_DIRECTORY.getAbsolutePath()); return new Entry(new File(fn.toString()), new File(propfile.toString())); } /** * Convert a URL to a file name. This file name will be used in the cache as a structured directory path and file * name. * * @param url the url to convert to file name * @return a directory structure and file name with the {@code /} separator. */ private static StringBuilder convertUrl2FN(URL url) { final var fn = new StringBuilder(); final var host = url.getHost(); final var path = url.getPath(); for (final var s : host.split("\\.", -1)) { fn.insert(0, s + "/"); } fn.append(path.substring(1)); return fn; } /** * Create a buffered reader that reads the file using the UTF-8 character set. * * @param file is going to be read by the returned reader. * @return the reader * @throws IOException if the file cannot be opened. */ private static BufferedReader getBufferedReader(File file) throws IOException { return new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8)); } /** * @return {@code true} is the cache is properly configured and exists. The cache is used if the cache directory * exists. It has to be created manually. The directories under it are recursively created when a file is cached, * but the cache directory has to be manually created. That way the cache can be configured not to be used simply * deleting the cache directory. (Not simply emptying!) */ static boolean cacheExists() { return CACHE_ROOT_DIRECTORY.exists() && CACHE_ROOT_DIRECTORY.isDirectory(); } /** * A file object that says the file does not exist. On anything else it returns what is returned by {@code new * File("")}. This is a singleton, use the {@code INSTANCE} field. */ private static class NonexistentFile extends File { static final File INSTANCE = new NonexistentFile(""); NonexistentFile(String pathname) { super(pathname); } @Override public boolean exists() { return false; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy