loci.common.Location Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ome-common Show documentation
Show all versions of ome-common Show documentation
Contains common I/O, date parsing, and XML processing classes.
The newest version!
/*
* #%L
* Common package for I/O and related utilities
* %%
* Copyright (C) 2005 - 2016 Open Microscopy Environment:
* - Board of Regents of the University of Wisconsin-Madison
* - Glencoe Software, Inc.
* - University of Dundee
* %%
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* #L%
*/
package loci.common;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.MapMaker;
/**
* Pseudo-extension of {@link java.io.File} that supports reading over HTTP
* (among other things).
* It is strongly recommended to use this instead of java.io.File.
*/
public class Location {
// -- Constants --
private static final Logger LOGGER = LoggerFactory.getLogger(Location.class);
private static final boolean IS_WINDOWS =
System.getProperty("os.name").startsWith("Windows");
// -- Enumerations --
protected enum UrlType {
GENERIC,
S3
};
// -- Static fields --
/** Map from given filenames to actual filenames. */
private static ThreadLocal> idMap =
new ThreadLocal>() {
@Override
protected HashMap initialValue() {
return new HashMap();
}
};
private static volatile boolean cacheListings = false;
// By default, cache for one hour.
private static volatile long cacheNanos = 60L * 60L * 1000L * 1000L * 1000L;
protected class ListingsResult {
public final String [] listing;
public final long time;
ListingsResult(String [] listing, long time) {
this.listing = listing;
this.time = time;
}
}
private static final Map fileListings =
new MapMaker().makeMap(); // like Java's ConcurrentHashMap
/** Pattern to match child URLs */
private static final Pattern URL_MATCHER = Pattern.compile(
"\\p{Alnum}+(\\+\\p{Alnum}+)?://.*");
/** Pattern to detect when getParent has gone past the parent of a URL */
private static final Pattern URL_ABOVE_PARENT = Pattern.compile(
"\\p{Alnum}+(\\+\\p{Alnum}+)?:/$");
// -- Fields --
private boolean isURL = false;
private UrlType urlType;
private URL url;
private URI uri;
private File file;
class URLLocationProperties {
public final long length;
public final boolean exists;
public URLLocationProperties(Location loc) {
LOGGER.trace("Getting LocationProperties");
boolean bexists = false;
long llength = 0;
if (!loc.isURL) {
throw new IllegalArgumentException("Location must be a URL");
}
try {
IRandomAccess handle = Location.getHandle(uri.toString());
try {
bexists = handle.exists();
}
catch (IOException e) {
LOGGER.trace("Failed to retrieve content from URL", e);
}
if (bexists) {
try {
llength = handle.length();
} catch (IOException e) {
LOGGER.trace("Could not determine URL's content length", e);
}
}
handle.close();
}
catch (IOException e) {
LOGGER.trace("Failed to retrieve content from URL", e);
}
this.exists = bexists;
this.length = llength;
LOGGER.trace("exists:{} length:{}", bexists, llength);
}
}
private URLLocationProperties cachedProperties;
// -- Constructors --
/**
* Construct a Location using the given path.
*
* @param pathname a URL, a file on disk, or a mapped name
* @see #getMappedId(String)
* @see #getMappedFile(String)
*/
public Location(String pathname) {
this((String) null, pathname);
}
/**
* Construct a Location using the given file on disk.
*
* @param file a file on disk
*/
public Location(File file) {
LOGGER.trace("Location({})", file);
isURL = false;
this.file = file;
}
/**
* Construct a Location using the given directory and relative path.
* The two parameters are joined with a file separator and passed to
* #Location(String)
*
* @param parent the directory path name
* @param child the relative path name
*/
public Location(String parent, String child) {
LOGGER.trace("Location({}, {})", parent, child);
String mapped = null;
String pathname = null;
// First handle possible URIs
if (child != null && URL_MATCHER.matcher(child).matches()) {
// Avoid expensive exception handling in case when path is
// obviously not an URL
try {
mapped = getMappedId(child);
pathname = child;
uri = new URI(mapped);
isURL = true;
if (S3Handle.canHandleScheme(uri.toString())) {
urlType = UrlType.S3;
url = null;
}
else {
urlType = UrlType.GENERIC;
url = uri.toURL();
}
}
catch (URISyntaxException | MalformedURLException e) {
// Readers such as FilePatternReader may pass invalid URI paths
// containing <> so don't throw, instead treat as a non-URL
LOGGER.debug("Invalid URL: {} {}", child, e);
isURL = false;
urlType = null;
url = null;
uri = null;
}
}
// If not a URI, then deal with relative vs. absolute paths
if (pathname == null) {
if (parent != null) {
// TODO: in some cases child here may be null
pathname = parent + File.separator + child;
} else {
pathname = child;
}
mapped = getMappedId(pathname);
}
if (!isURL) file = new File(mapped);
}
/**
* Construct a Location using the given directory and relative path.
*
* @param parent a Location representing the directory name
* @param child the relative path name
* @see #Location(String, String)
*/
public Location(Location parent, String child) {
this(parent == null ? (String) null : parent.getAbsolutePath(), child);
}
// -- Location API methods --
/**
* Clear all caches and reset cache-related bookkeeping variables to their
* original values.
*/
public static void reset() {
cacheListings = false;
cacheNanos = 60L * 60L * 1000L * 1000L * 1000L;
fileListings.clear();
getIdMap().clear();
}
/**
* Turn cacheing of directory listings on or off.
* Cacheing is turned off by default.
*
* Reasons to cache - directory listings over network shares
* can be very expensive, especially in HCS experiments with thousands
* of files in the same directory. Technically, if you use a directory
* listing and then go and access the file, you are using stale information.
* Unlike a database, there's no transactional integrity to file system
* operations, so the directory could change by the time you access the file.
*
* Reasons not to cache - the contents of the directories might change
* during the program invocation.
*
* @param cache - true to turn cacheing on, false to leave it off.
*/
public static void cacheDirectoryListings(boolean cache) {
cacheListings = cache;
}
/**
* Cache directory listings for this many seconds before relisting.
*
* @param sec - use the cache if a directory list was done within this many
* seconds.
*/
public static void setCacheDirectoryTimeout(double sec) {
cacheNanos = (long) (sec * 1000. * 1000. * 1000.);
}
/**
* Clear the directory listings cache.
*
* Do this if directory contents might have changed in a significant way.
*/
public static void clearDirectoryListingsCache() {
fileListings.clear();
}
/**
* Remove any cached directory listings that have expired.
*/
public static void cleanStaleCacheEntries() {
long t = System.nanoTime() - cacheNanos;
final Iterator cacheValues =
fileListings.values().iterator();
while (cacheValues.hasNext()) {
if (cacheValues.next().time < t) {
cacheValues.remove();
}
}
}
/**
* Maps the given id to an actual filename on disk. Typically actual
* filenames are used for ids, making this step unnecessary, but in some
* cases it is useful; e.g., if the file has been renamed to conform to a
* standard naming scheme and the original file extension is lost, then
* using the original filename as the id assists format handlers with type
* identification and pattern matching, and the id can be mapped to the
* actual filename for reading the file's contents.
*
* @param id the mapped name
* @param filename the actual filename on disk.
* If null, any existing mapping for id
will be cleared.
* @see #getMappedId(String)
*/
public static void mapId(String id, String filename) {
if (id == null) return;
if (filename == null) getIdMap().remove(id);
else getIdMap().put(id, filename);
LOGGER.debug("Location.mapId: {} -> {}", id, filename);
}
/**
* Maps the given id to the given IRandomAccess object.
*
* @param id the mapped name
* @param ira the IRandomAccess object that will be referenced by
* id
. If null, any existing mapping for
* id
will be cleared.
* @see #getMappedFile(String)
*/
public static void mapFile(String id, IRandomAccess ira) {
if (id == null) return;
if (ira == null) getIdMap().remove(id);
else getIdMap().put(id, ira);
LOGGER.debug("Location.mapFile: {} -> {}", id, ira);
}
/**
* Gets the actual filename on disk for the given id. Typically the id itself
* is the filename, but in some cases may not be; e.g., if OMEIS has renamed
* a file from its original name to a standard location such as Files/101,
* the original filename is useful for checking the file extension and doing
* pattern matching, but the renamed filename is required to read its
* contents.
*
* @param id the mapped name
* @return the corresponding file name on disk, or null if there is no mapping
* @see #mapId(String, String)
*/
public static String getMappedId(String id) {
if (getIdMap() == null) return id;
String filename = null;
if (id != null && (getIdMap().get(id) instanceof String)) {
filename = (String) getIdMap().get(id);
}
return filename == null ? id : filename;
}
/**
* Gets the random access handle for the given id.
*
* @param id the mapped name
* @return the corresponding IRandomAccess, or null if there is no mapping
* @see #mapFile(String, IRandomAccess)
*/
public static IRandomAccess getMappedFile(String id) {
if (getIdMap() == null) return null;
IRandomAccess ira = null;
if (id != null && (getIdMap().get(id) instanceof IRandomAccess)) {
ira = (IRandomAccess) getIdMap().get(id);
}
return ira;
}
/**
* Return the id mapping.
*
* @return the map from names to filesystem paths and IRandomAccess objects
* @see #mapId(String, String)
* @see #mapFile(String, IRandomAccess)
*/
public static HashMap getIdMap() {
return idMap.get();
}
/**
* Set the id mapping using the given HashMap.
*
* @param map the new mapping from names to filesystem paths
* and IRandomAccess objects
* @throws IllegalArgumentException if the given HashMap is null.
*/
public static void setIdMap(HashMap map) {
if (map == null) throw new IllegalArgumentException("map cannot be null");
idMap.set(map);
}
/**
* Gets an IRandomAccess object that can read from the given file.
*
* @param id the name for which to locate an IRandomAccess
* @return a previously mapped IRandomAccess, or a new IRandomAccess
* according to the name's type (URL, filesystem path, etc.)
* @throws IOException if a valid IRandomAccess cannot be created
* @see #getHandle(String, boolean, boolean, int)
* @see IRandomAccess
*/
public static IRandomAccess getHandle(String id) throws IOException {
return getHandle(id, false);
}
/**
* Gets an IRandomAccess object that can read from or write to the given file.
*
* @param id the name for which to locate an IRandomAccess
* @param writable true if the returned IRandomAccess should have write permission
* @return a previously mapped IRandomAccess, or a new IRandomAccess
* according to the name's type (URL, filesystem path, etc.)
* @throws IOException if a valid IRandomAccess cannot be created
* @see #getHandle(String, boolean, boolean, int)
* @see IRandomAccess
*/
public static IRandomAccess getHandle(String id, boolean writable)
throws IOException
{
return getHandle(id, writable, true);
}
/**
* Gets an IRandomAccess object that can read from or write to the given file.
*
* @param id the name for which to locate an IRandomAccess
* @param writable true if the returned IRandomAccess should have write permission
* @param allowArchiveHandles true if checks for compressed/archive file types
* (e.g. Zip, GZip, BZip2) should be enabled
* @return a previously mapped IRandomAccess, or a new IRandomAccess
* according to the name's type (URL, filesystem path, etc.)
* @throws IOException if a valid IRandomAccess cannot be created
* @see #getHandle(String, boolean, boolean, int)
* @see IRandomAccess
*/
public static IRandomAccess getHandle(String id, boolean writable,
boolean allowArchiveHandles) throws IOException
{
return getHandle(id, writable, allowArchiveHandles, 0);
}
/**
* Gets an IRandomAccess object that can read from or write to the given file.
*
* @param id the name for which to locate an IRandomAccess
* @param writable true if the returned IRandomAccess should have write permission
* @param allowArchiveHandles true if checks for compressed/archive file types
* (e.g. Zip, GZip, BZip2) should be enabled
* @param bufferSize the buffer size to use when constructing a NIOFileHandle.
* Ignored when non-positive.
* @return a previously mapped IRandomAccess, or a new IRandomAccess
* according to the name's type (URL, filesystem path, etc.)
* @throws IOException if a valid IRandomAccess cannot be created
* @see IRandomAccess
*/
public static IRandomAccess getHandle(String id, boolean writable,
boolean allowArchiveHandles, int bufferSize) throws IOException
{
LOGGER.trace("getHandle(id = {}, writable = {})", id, writable);
IRandomAccess handle = getMappedFile(id);
if (handle == null) {
LOGGER.trace("no handle was mapped for this ID");
String mapId = getMappedId(id);
if (S3Handle.canHandleScheme(id)) {
StreamHandle.Settings ss = new StreamHandle.Settings();
if (ss.getRemoteCacheRootDir() != null) {
String cachedFile = S3Handle.cacheObject(mapId, ss);
if (bufferSize > 0) {
handle = new NIOFileHandle(
new File(cachedFile), "r", bufferSize);
}
else {
handle = new NIOFileHandle(cachedFile, "r");
}
}
else {
handle = new S3Handle(mapId);
}
}
else if (id.startsWith("http://") || id.startsWith("https://")) {
handle = new URLHandle(mapId);
}
else if (allowArchiveHandles && ZipHandle.isZipFile(mapId)) {
handle = new ZipHandle(mapId);
}
else if (allowArchiveHandles && GZipHandle.isGZipFile(mapId)) {
handle = new GZipHandle(mapId);
}
else if (allowArchiveHandles && BZip2Handle.isBZip2File(mapId)) {
handle = new BZip2Handle(mapId);
}
else {
if (bufferSize > 0) {
handle = new NIOFileHandle(
new File(mapId), writable ? "rw" : "r", bufferSize);
}
else {
handle = new NIOFileHandle(mapId, writable ? "rw" : "r");
}
}
LOGGER.trace("Created new handle {} -> {}", id, handle);
// TODO: We should cache the handle, but we can't prevent callers from closing it which
// would make the cached handle useless to future fetches
//mapFile(id, handle);
}
LOGGER.trace("Location.getHandle: {} -> {}", id, handle);
return handle;
}
/**
* Checks that the given id points at a valid data stream.
*
* @param id
* The id string to validate.
* @throws IOException
* if the id is not valid.
*/
public static void checkValidId(String id) throws IOException {
if (getMappedFile(id) != null) {
// NB: The id maps directly to an IRandomAccess handle, so is valid. Do
// not destroy an existing mapped IRandomAccess handle by closing it.
return;
}
// NB: Try to actually open a handle to make sure it is valid. Close it
// afterward so we don't leave it dangling. The process of doing this will
// throw IOException if something goes wrong.
Location.getHandle(id).close();
}
/**
* Return a list of all of the files in this directory. If 'noHiddenFiles' is
* set to true, then hidden files are omitted.
*
* @param noHiddenFiles true if hidden files should be omitted
* @return an unsorted list of all relative files in the directory represented
* by this Location
* @see java.io.File#list()
*/
public String[] list(boolean noHiddenFiles) {
LOGGER.trace("list({})", noHiddenFiles);
String key = getAbsolutePath() + Boolean.toString(noHiddenFiles);
String [] result = null;
if (cacheListings) {
cleanStaleCacheEntries();
ListingsResult listingsResult = fileListings.get(key);
if (listingsResult != null) {
return listingsResult.listing;
}
}
final List files = new ArrayList();
if (isURL) {
try {
if (urlType == UrlType.S3) {
if (isDirectory()) {
// TODO: This is complicated, not sure what to do here
// See comment in isDirectory()
LOGGER.trace("list s3 {}: Returning []", uri);
return new String[0];
}
else {
LOGGER.trace("list s3 {}: Returning null", uri);
return null;
}
}
URLConnection c = url.openConnection();
InputStream is = c.getInputStream();
boolean foundEnd = false;
BufferedReader br = new BufferedReader(
new InputStreamReader(is, Constants.ENCODING));
String input;
StringBuffer buffer = new StringBuffer();
while ((input = br.readLine()) != null){
buffer.append(input);
}
br.close();
String s = buffer.toString();
while (!foundEnd) {
if (s.toLowerCase().indexOf("