org.openstreetmap.atlas.utilities.caching.strategies.NamespaceCachingStrategy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of atlas Show documentation
Show all versions of atlas Show documentation
"Library to load OSM data into an Atlas format"
package org.openstreetmap.atlas.utilities.caching.strategies;
import java.net.URI;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Optional;
import java.util.UUID;
import java.util.function.Function;
import org.openstreetmap.atlas.exception.CoreException;
import org.openstreetmap.atlas.streaming.compression.Decompressor;
import org.openstreetmap.atlas.streaming.resource.AbstractResource;
import org.openstreetmap.atlas.streaming.resource.File;
import org.openstreetmap.atlas.streaming.resource.Resource;
import org.openstreetmap.atlas.utilities.caching.ConcurrentResourceCache;
import org.openstreetmap.atlas.utilities.runtime.Retry;
import org.openstreetmap.atlas.utilities.scalars.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Caching strategy that attempts to cache a {@link Resource} within a user-defined namespace at the
* standard system temporary location. It should be noted that this strategy has no inherent
* concurrency safety. Since the namespaces are implemented as directories in the underlying
* filesystem, two {@link NamespaceCachingStrategy} objects with the same namespace can possibly
* step on each other's toes if used improperly. It is up to the users of the strategy to prevent
* concurrent access to {@link NamespaceCachingStrategy} objects that share a namespace. One way to
* ensure concurrency safety is to carefully associate a given namespace (and its
* {@link NamespaceCachingStrategy}) with exactly one {@link ConcurrentResourceCache} object
* throughout your code, and stick to this restriction consistently.
*
* @author lcram
*/
public class NamespaceCachingStrategy extends AbstractCachingStrategy
{
private static final Logger logger = LoggerFactory.getLogger(NamespaceCachingStrategy.class);
private static final String FILE_EXTENSION_DOT = ".";
private static final String PROPERTY_LOCAL_TEMPORARY_DIRECTORY = "java.io.tmpdir";
private static final String TEMPORARY_DIRECTORY_STRING = System
.getProperty(PROPERTY_LOCAL_TEMPORARY_DIRECTORY);
private static final int RETRY_NUMBER = 5;
private static final Retry RETRY = new Retry(RETRY_NUMBER, Duration.ONE_SECOND)
.withQuadratic(true);
private final String namespace;
private boolean preserveFileExtension;
private final FileSystem fileSystem;
public NamespaceCachingStrategy(final String namespace)
{
this(namespace, FileSystems.getDefault());
}
public NamespaceCachingStrategy(final String namespace, final FileSystem fileSystem)
{
super();
if (namespace.contains("/") || namespace.contains("\\"))
{
throw new IllegalArgumentException(
"The namespace cannot contain characters '\\' or '/'");
}
this.namespace = this.getName() + "_" + namespace + "_"
+ UUID.nameUUIDFromBytes(namespace.getBytes()).toString();
this.preserveFileExtension = true;
this.fileSystem = fileSystem;
}
@Override
public Optional attemptFetch(final URI resourceURI,
final Function> defaultFetcher)
{
if (TEMPORARY_DIRECTORY_STRING == null)
{
logger.error("StrategyID {}: failed to read property {}, skipping cache fetch...",
this.getStrategyID(), PROPERTY_LOCAL_TEMPORARY_DIRECTORY);
return Optional.empty();
}
if (resourceURI == null)
{
logger.warn("StrategyID {}: resourceURI was null, skipping cache fetch...",
this.getStrategyID());
return Optional.empty();
}
final File cachedFile = getCachedFile(resourceURI);
attemptToCacheFileLocally(cachedFile, defaultFetcher, resourceURI);
if (cachedFile.exists())
{
logger.trace("StrategyID {}: returning local copy of resource {}", this.getStrategyID(),
resourceURI);
return Optional.of(cachedFile);
}
// If we got here, something went wrong in attemptToCacheFileLocally().
logger.warn("StrategyID {}: could not find local copy of resource {}", this.getStrategyID(),
resourceURI);
return Optional.empty();
}
@Override
public String getName()
{
return "NamespaceCachingStrategy";
}
@Override
public void invalidate()
{
final Path storageDirectory = this.getStorageDirectory();
try
{
new File(storageDirectory.toString(), this.fileSystem).deleteRecursively();
}
catch (final Exception exception)
{
logger.warn("StrategyID {}: invalidate failed due to {}", this.getStrategyID(),
exception.getClass().getName(), exception);
}
}
@Override
public void invalidate(final URI resourceURI)
{
try
{
getCachedFile(resourceURI).delete();
}
catch (final Exception exception)
{
logger.warn("StrategyID {}: invalidate of resource {} failed due to {}",
this.getStrategyID(), resourceURI, exception.getClass().getName(), exception);
}
}
/**
* Preserve the file extension of the cached URI when saving it as a file to the temporary
* location. For example, if the URI of the resource was "hdfs://foo/bar/baz.txt", then after
* computing the hash of the URI, {@link NamespaceCachingStrategy} will append a '.txt'
* extension to the filename. This is useful for e.g. in cases where resource loading code may
* be looking for specific file extensions in order to decide between various load strategies.
*
* @param preserveFileExtension
* if true, preserve the original extension
* @return this instance for chaining
*/
public NamespaceCachingStrategy withFileExtensionPreservation(
final boolean preserveFileExtension)
{
this.preserveFileExtension = preserveFileExtension;
return this;
}
protected void validateLocalFile(final File localFile)
{
// Do nothing here, leave to extensions to decide.
}
/*
* Package-private for unit testing
*/
Path getStorageDirectory()
{
return this.fileSystem.getPath(TEMPORARY_DIRECTORY_STRING, this.namespace);
}
private void attemptToCacheFileLocally(final File cachedFile,
final Function> defaultFetcher, final URI resourceURI)
{
if (!cachedFile.exists())
{
logger.trace("StrategyID {}: attempting to cache resource {} in temporary file {}",
this.getStrategyID(), resourceURI, cachedFile);
final Optional resourceFromDefaultFetcher = defaultFetcher.apply(resourceURI);
if (resourceFromDefaultFetcher.isEmpty())
{
logger.warn(
"StrategyID {}: application of default fetcher for {} returned empty Optional!",
this.getStrategyID(), resourceURI);
return;
}
final File temporaryLocalFile = File.temporary(this.fileSystem);
RETRY.run(() ->
{
try
{
/*
* We have to explicitly set the decompressor here. Why? Because if the resource
* ends with a '.gz' extension, the 'copyTo' method will apply GZIP
* decompression to it. The problem? When the user goes to fetch the contents of
* the cached copy, it will still have the '.gz' extension but it will now be
* decompressed. So our automatic decompression code will run on an uncompressed
* file! This will cause the contents fetch to fail since Java's GZIPInputStream
* won't be able to find the GZIP magic number!
*/
final AbstractResource abstractResource = (AbstractResource) resourceFromDefaultFetcher
.get();
abstractResource.setDecompressor(Decompressor.NONE);
abstractResource.copyTo(temporaryLocalFile);
validateLocalFile(temporaryLocalFile);
}
catch (final Exception exception)
{
throw new CoreException(
"StrategyID {}: something went wrong copying {} to temporary local file {}",
this.getStrategyID(), resourceFromDefaultFetcher, temporaryLocalFile,
exception);
}
});
// now that we have pulled down the file to a unique temporary location, attempt to
// atomically move it to the cache after re-checking for existence
if (!cachedFile.exists())
{
try
{
final Path temporaryLocalFilePath = this.fileSystem
.getPath(temporaryLocalFile.getPathString());
final Path cachedFilePath = this.fileSystem.getPath(cachedFile.getPathString());
Files.move(temporaryLocalFilePath, cachedFilePath,
StandardCopyOption.ATOMIC_MOVE);
validateLocalFile(cachedFile);
}
catch (final FileAlreadyExistsException exception)
{
logger.trace("StrategyID {}: file {} is already cached", this.getStrategyID(),
cachedFile);
}
catch (final Exception exception)
{
throw new CoreException("StrategyID {}: something went wrong moving {} to {}",
this.getStrategyID(), temporaryLocalFile, cachedFile, exception);
}
}
}
}
private File getCachedFile(final URI resourceURI)
{
final Path storageDirectory = getStorageDirectory();
final Optional resourceExtensionOptional = getFileExtensionFromURI(resourceURI);
final String cachedFileName;
cachedFileName = resourceExtensionOptional
.map(extension -> this.getUUIDForResourceURI(resourceURI).toString()
+ FILE_EXTENSION_DOT + extension)
.orElseGet(() -> this.getUUIDForResourceURI(resourceURI).toString());
final Path cachedFilePath = this.fileSystem.getPath(storageDirectory.toString(),
cachedFileName);
return new File(cachedFilePath.toString(), this.fileSystem);
}
private Optional getFileExtensionFromURI(final URI resourceURI)
{
if (!this.preserveFileExtension)
{
return Optional.empty();
}
final String asciiString = resourceURI.toASCIIString();
final int lastIndexOfDot = asciiString.lastIndexOf(FILE_EXTENSION_DOT);
if (lastIndexOfDot < 0)
{
return Optional.empty();
}
final String extension = asciiString.substring(lastIndexOfDot + 1);
if (extension.isEmpty())
{
return Optional.empty();
}
else
{
return Optional.of(extension);
}
}
}