thredds.crawlabledataset.CrawlableDatasetDods Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of netcdf Show documentation
Show all versions of netcdf Show documentation
The NetCDF-Java Library is a Java interface to NetCDF files,
as well as to many other types of scientific data formats.
/*
* Copyright 1998-2009 University Corporation for Atmospheric Research/Unidata
*
* Portions of this software were developed by the Unidata Program at the
* University Corporation for Atmospheric Research.
*
* Access and use of this software shall impose the following obligations
* and understandings on the user. The user is granted the right, without
* any fee or cost, to use, copy, modify, alter, enhance and distribute
* this software, and any derivative works thereof, and its supporting
* documentation for any purpose whatsoever, provided that this entire
* notice appears in all copies of the software, derivative works and
* supporting documentation. Further, UCAR requests that the user credit
* UCAR/Unidata in any publications that result from the use of this
* software or in any product that includes this software. The names UCAR
* and/or Unidata, however, may not be used in any advertising or publicity
* to endorse or promote any products or commercial entity unless specific
* written permission is obtained from UCAR/Unidata. The user also
* understands that UCAR/Unidata is not obligated to provide the user with
* any support, consulting, training or assistance of any kind with regard
* to the use, operation and performance of this software nor to provide
* the user with any updates, revisions, new versions or "bug fixes."
*
* THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE.
*/
package thredds.crawlabledataset;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import thredds.util.DodsURLExtractor;
/**
* A description
*
* @author Ethan Davis
* @author Bas Retsios
* @since Jun 8, 2005 15:34:04 -0600
*/
public class CrawlableDatasetDods implements CrawlableDataset {
static private org.slf4j.Logger log = org.slf4j.LoggerFactory
.getLogger(CrawlableDatasetDods.class);
private static DodsURLExtractor urlExtractor = null;
private static Map listDatasetsMap = null; // maintain an in-memory copy for performance reasons .. TODO: add a version-check
private String path;
private URLConnection pathUrlConnection = null; // store this, for performance reasons
private String name;
private Object configObj = null;
protected CrawlableDatasetDods() {
}
protected CrawlableDatasetDods(String path, Object configObj)
{
if (urlExtractor == null)
urlExtractor = new DodsURLExtractor();
if (listDatasetsMap == null) // for performance
listDatasetsMap = new HashMap();
if (configObj != null) {
log.debug("CrawlableDatasetDods(): config object not null, it will be ignored <"
+ configObj.toString() + ">.");
this.configObj = configObj;
}
if (path.startsWith("http:")) {
this.path = path;
try {
new URI(path); // check syntax .. URISyntaxException if its not good
name = getName(path);
} catch (URISyntaxException e) {
String tmpMsg = "Bad URI syntax for path <" + path + ">: " + e.getMessage();
log.debug( "CrawlableDatasetDods(): " + tmpMsg);
throw new IllegalArgumentException( tmpMsg);
}
// Check if this accessPoint URL is an OPeNDAP server URL.
// For now commented-out because it takes far too long when expanding a directory:
// all links would be tested, because a CrawlableDataset is new-ed too fast (when its parent is expanded).
/*
String apVersionString = path + (path.endsWith("/") ? "version" : "/version");
String apVersionResultContent = null;
try
{
apVersionResultContent = urlExtractor.getTextContent( apVersionString);
}
catch (java.io.IOException e)
{
String tmpMsg = "The accessPoint URL is not an OPeNDAP server URL (no version info) <" + apVersionString + ">";
log.error( "CrawlableDatasetDods(): " + tmpMsg, e);
}
if ( apVersionResultContent == null ||
(apVersionResultContent.indexOf( "DODS") == -1 &&
apVersionResultContent.indexOf( "OPeNDAP") == -1 &&
apVersionResultContent.indexOf( "DAP") == -1))
{
String tmpMsg = "The accessPoint URL version info is not valid <" + apVersionResultContent + ">";
log.error( "CrawlableDatasetDods(): " + tmpMsg);
}
*/
} else {
String tmpMsg = "Invalid url <" + path + ">.";
log.debug("CrawlableDatasetDods(): " + tmpMsg);
throw new IllegalArgumentException(tmpMsg);
}
}
private CrawlableDatasetDods(CrawlableDatasetDods parent, String childPath)
{
String normalChildPath = childPath.startsWith("/")?childPath.substring(1):childPath;
this.path = parent.getPath();
this.path += this.path.endsWith("/") ? normalChildPath : "/" + normalChildPath;
this.name = getName(path);
this.configObj = null;
}
private String getName(String path) {
// Attempt to return the last name in the path name sequence.
if (!path.equals("/")) {
String tmpName = path.endsWith("/")
? path.substring(0, path.length() - 1) : path;
int index = tmpName.lastIndexOf("/");
if (index != -1)
tmpName = tmpName.substring(index + 1);
return tmpName;
} else
return path;
}
/**
* Provide access to the java.net.URI that this CrawlableDataset represents.
*
* @return the java.net.URI that this CrawlableDataset represents.
*/
public URI getUri()
{
try
{
return new URI( this.path);
}
catch ( URISyntaxException e )
{
return null;
}
}
public Object getConfigObject() {
return configObj;
}
public String getPath() {
return (this.path);
}
public String getName() {
return (this.name);
}
public boolean isCollection() {
return isCollection(path);
}
public CrawlableDataset getDescendant( String relativePath )
{
if ( relativePath.startsWith( "/"))
throw new IllegalArgumentException( "Path must be relative <" + relativePath + ">.");
return new CrawlableDatasetDods(this, relativePath);
}
// how do we determine if a url is a collection?
// we can't count on a trailing backslash, as this was removed by CrawlableDatasetFactory
// for now, assume collection unless a known file extension is encountered
private static String [] knownFileExtensions = {".hdf", ".xml", ".nc", ".bz2", ".cdp", ".jpg"};
private static boolean isCollection(String path)
{
String testPath = path.toLowerCase(); // otherwise our matches may fail
if (isDodsDataset(testPath))
return false;
else
{
int i = 0;
while ((i < (knownFileExtensions.length)) && !testPath.endsWith(knownFileExtensions[i]))
++i;
return (i >= knownFileExtensions.length); // i < length means we deal with a known file ==> no collection
}
}
private static String [] dodsExtensions = {".html", ".htm", ".das", ".dds", ".info"};
private static String getDodsExtension(String path)
{
String extension = "";
String testPath = path.toLowerCase(); // otherwise our matches may fail
int i = 0;
while ((i < (dodsExtensions.length)) && !testPath.endsWith(dodsExtensions[i]))
++i;
if (i < dodsExtensions.length)
extension = dodsExtensions[i];
return extension;
}
private static boolean isDodsDataset(String path)
{
return getDodsExtension(path).length() > 0;
}
private static String removeDodsExtension(String path)
{
String dodsExtension = getDodsExtension(path);
if (dodsExtension.length() > 0)
path = path.substring(0, path.length() - dodsExtension.length());
return path;
}
// This function shouldn't be here !!!
// It is a workaround for many OPeNDAP servers that crop part of their urls (the /opendap-bin/nph-dods/ part)
// e.g. of server with problem (2-Nov-2006): http://acdisc.sci.gsfc.nasa.gov/opendap-bin/nph-dods/OPENDAP/Giovanni/
private String forceChild(String url)
{
String prefix = path;
if (prefix.endsWith("/"))
prefix = path.substring(0, path.length() - 1); // because the url also contains a '/' that we will use
int j = url.substring(0, url.length() - 1).lastIndexOf('/'); // url.length() - 1 was intentional .. if the last char is a '/', we're interested in the previous one.
if (j >= 0)
{
String ret = prefix + url.substring(j);
return ret;
}
else // relative paths .. leave intact
return url;
}
public List listDatasets() throws IOException {
if (!this.isCollection()) {
String tmpMsg = "This dataset <" + this.getPath()
+ "> is not a collection dataset.";
log.error("listDatasets(): " + tmpMsg);
throw new IllegalStateException(tmpMsg);
}
if (listDatasetsMap.containsKey(path)) // shortcut .. for performance
return (List)listDatasetsMap.get(path);
else
{
List list = new ArrayList();
List pathList = new ArrayList(); // only for detecting duplicates (after removing the extension, sometimes we end up with duplicates)
// Get list of possible datasets from current URL.
List possibleDsList = null;
try {
String openPath = path;
if (!openPath.endsWith("/")) // if you skip this, you will find that relative URLs don't work (fails in "extract", and in particular in URL u = new URL(baseURL, value))
openPath += "/";
possibleDsList = urlExtractor.extract(openPath);
} catch (java.io.IOException e) {
log.warn("listDatasets(): IOException while extracting dataset info from given OPeNDAP directory <"
+ path + ">, return empty list: " + e.getMessage());
return (list);
}
// Handle each link in the current access path.
String curDsUrlString = null;
for (Iterator it = possibleDsList.iterator(); it.hasNext(); ) {
curDsUrlString = (String) it.next();
// Perform some tests on curDsUrlString
// Skip datasets that aren't OPeNDAP datasets (".html") or
// collection datasets ("/").
if ((!isDodsDataset(curDsUrlString)) && (!isCollection(curDsUrlString))) {
log.warn("expandThisLevel(): Dataset isn't an OPeNDAP dataset or collection dataset, skip <"
+ path + ">.");
continue;
}
curDsUrlString = removeDodsExtension(curDsUrlString);
// This function goes a bit too far trying to recover from servers that drop part of URL path.
// However, it also converts URLs that point to external servers to be subdirectories of this CrDS.
//curDsUrlString = forceChild(curDsUrlString);
// Skip any URLs that aren't children of this CrDs
if ( !curDsUrlString.startsWith( path ) )
{
log.debug( "listDatasets(): skipping URL <" + curDsUrlString + ">, not child of this CrDs <" + path + ">." );
continue;
}
if (pathList.contains(curDsUrlString))
continue; // duplicate
else
pathList.add(curDsUrlString);
// Avoid links back down the path hierarchy (i.e., parent directory links).
// Comment: this call was taken over from CrawlableDatasetFile. Since we use forceChild, this call is currently useless.
if (!curDsUrlString.startsWith(path)) {
log.debug("listDatasets(): current path <" + curDsUrlString
+ "> not child of given" + " location <" + path
+ ">, skip.");
continue;
}
try {
new URI(curDsUrlString); // syntax check
} catch (URISyntaxException e) {
log.error("listDatasets(): Skipping dataset <"
+ curDsUrlString + "> due to URISyntaxException: "
+ e.getMessage());
continue;
}
log.debug("listDatasets(): handle dataset (" + curDsUrlString
+ ")");
// So far so good .. curDsUrlString passed all tests, thus add it to the list
try {
list.add(CrawlableDatasetFactory.createCrawlableDataset(
curDsUrlString, this.getClass().getName(), null));
} catch (ClassNotFoundException e) {
log.warn("listDatasets(): Can't make CrawlableDataset for child url <"
+ curDsUrlString + ">: " + e.getMessage());
} catch (NoSuchMethodException e) {
log.warn("listDatasets(): Can't make CrawlableDataset for child url <"
+ curDsUrlString + ">: " + e.getMessage());
} catch (IllegalAccessException e) {
log.warn("listDatasets(): Can't make CrawlableDataset for child url <"
+ curDsUrlString + ">: " + e.getMessage());
} catch (InvocationTargetException e) {
log.warn("listDatasets(): Can't make CrawlableDataset for child url <"
+ curDsUrlString + ">: " + e.getMessage());
} catch (InstantiationException e) {
log.warn("listDatasets(): Can't make CrawlableDataset for child url <"
+ curDsUrlString + ">: " + e.getMessage());
}
}
listDatasetsMap.put(path, list); // remember it next time, for performance
return list;
}
}
public List listDatasets(CrawlableDatasetFilter filter) throws IOException {
List list = this.listDatasets();
if (filter == null)
return list;
List retList = new ArrayList();
for (Iterator it = list.iterator(); it.hasNext();) {
CrawlableDataset curDs = (CrawlableDataset) it.next();
if (filter.accept(curDs)) {
retList.add(curDs);
}
}
return (retList);
}
public CrawlableDataset getParentDataset() {
if (!path.equals("/")) {
String parentPath = path;
int index = parentPath.lastIndexOf( "/", parentPath.endsWith( "/") ? parentPath.length() - 2 : parentPath.length() - 1 );
if ( index != -1 )
parentPath = parentPath.substring( 0, index + 1 );
return new CrawlableDatasetDods( parentPath, null);
} else
return null;
}
public boolean exists() {
if (pathUrlConnection == null)
try {
URL u = new URL(path);
pathUrlConnection = u.openConnection();
} catch (MalformedURLException e) {
} catch (IOException e) {
}
if ( pathUrlConnection != null )
try {
int responseCode = ((HttpURLConnection)pathUrlConnection).getResponseCode();
if (responseCode >= 200 && responseCode < 300) // Successful
return true;
} catch (IOException e) {
}
return false;
}
public long length() {
if (this.isCollection())
return (0);
if (pathUrlConnection == null)
{
try {
URL u = new URL(path);
pathUrlConnection = u.openConnection();
} catch (MalformedURLException e) {
} catch (IOException e) {
}
}
if (pathUrlConnection != null)
return pathUrlConnection.getContentLength();
else
return (-1);
}
public Date lastModified() {
if (pathUrlConnection == null)
{
try {
URL u = new URL(path);
pathUrlConnection = u.openConnection();
} catch (MalformedURLException e) {
} catch (IOException e) {
}
}
if (pathUrlConnection != null)
{
long lastModified = pathUrlConnection.getLastModified();
if (lastModified != 0)
{
Calendar cal = Calendar.getInstance();
cal.clear();
cal.setTimeInMillis(lastModified);
return (cal.getTime());
}
else
return null;
}
else
return null;
}
public String toString()
{
return this.path;
}
}