ucar.nc2.internal.ncml.Aggregation Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 1998-2020 John Caron and University Corporation for Atmospheric Research/Unidata
*/
package ucar.nc2.internal.ncml;
import org.jdom2.Element;
import thredds.inventory.DateExtractor;
import thredds.inventory.DateExtractorFromName;
import thredds.inventory.MFile;
import thredds.inventory.MFileCollectionManager;
import ucar.nc2.Group;
import ucar.nc2.NetcdfFile;
import ucar.nc2.Variable;
import ucar.nc2.dataset.NetcdfDataset;
import ucar.nc2.units.DateFormatter;
import ucar.nc2.util.CancelTask;
import ucar.nc2.util.DiskCache2;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.Executor;
/**
* Superclass for NcML Aggregation Builder.
* An Aggregation acts as a ProxyReader for VariableDS. That, is it must implement:
*
*
* public Array read(Variable mainv);
*
* public Array read(Variable mainv, Section section);
*
*
* @author caron
*/
public abstract class Aggregation implements ucar.nc2.ncml.AggregationIF {
protected enum Type {
forecastModelRunCollection, forecastModelRunSingleCollection, joinExisting, joinExistingOne, joinNew, tiled, union
}
protected enum TypicalDataset {
FIRST, RANDOM, LATEST, PENULTIMATE
}
protected static TypicalDataset typicalDatasetMode = TypicalDataset.FIRST;
protected static org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(Aggregation.class);
protected static DiskCache2 diskCache2;
// this is where persist() reads/writes files
public static void setPersistenceCache(DiskCache2 dc) {
diskCache2 = dc;
if (diskCache2 != null)
diskCache2.setAlwaysUseCache(true); // the persistence cache file has same name as the ncml - must put it into the
// cache else clobber ncml 7/31/2014
}
// experimental multithreading
protected static Executor executor;
public static void setExecutor(Executor exec) {
executor = exec;
}
public static void setTypicalDatasetMode(String mode) {
if (mode.equalsIgnoreCase("random"))
typicalDatasetMode = TypicalDataset.RANDOM;
else if (mode.equalsIgnoreCase("latest"))
typicalDatasetMode = TypicalDataset.LATEST;
else if (mode.equalsIgnoreCase("penultimate"))
typicalDatasetMode = TypicalDataset.PENULTIMATE;
else if (mode.equalsIgnoreCase("first"))
typicalDatasetMode = TypicalDataset.FIRST;
else
logger.error("Unknown setTypicalDatasetMode= " + mode);
}
protected static boolean debug, debugOpenFile, debugSyncDetail, debugProxy, debugRead, debugDateParse, debugConvert;
//////////////////////////////////////////////////////////////////////////////////////////
protected NetcdfDataset.Builder ncDataset; // the aggregation belongs to this dataset
protected Type type; // the aggregation type
protected Object spiObject = null; // not implemented in nested or
protected List explicitDatasets = new ArrayList<>(); // explicitly created Dataset objects from
// netcdf elements
protected List datasets = new ArrayList<>(); // all : explicit and scanned
protected MFileCollectionManager datasetManager; // manages scanning
protected boolean cacheDirty = true; // aggCache persist file needs updating
protected String dimName; // the aggregation dimension name
Element ncmlElem;
// experimental
protected String dateFormatMark;
// protected EnumSet enhance = null; // default no enhancement
protected boolean isDate;
protected DateFormatter dateFormatter = new DateFormatter();
/**
* Create an Aggregation for the given NetcdfDataset.
* The following addXXXX methods are called, then build(), before the object is ready for use.
*
* @param ncd Aggregation belongs to this NetcdfDataset
* @param dimName the aggregation dimension name
* @param type the Aggregation.Type
* @param recheckS how often to check if files have changes
*/
protected Aggregation(NetcdfDataset.Builder ncd, String dimName, Type type, String recheckS) {
this.ncDataset = ncd;
this.dimName = dimName;
this.type = type;
String name = ncd.location;
if (name == null)
name = "Agg-" + ncd.hashCode();
datasetManager = MFileCollectionManager.openWithRecheck(name, recheckS);
}
/**
* Add a nested dataset, specified by an explicit netcdf element.
* enhance is handled by the reader, so its always false here.
*
* @param cacheName a unique name to use for caching
* @param location attribute "location" on the netcdf element
* @param id attribute "id" on the netcdf element
* @param ncoordS attribute "ncoords" on the netcdf element
* @param coordValueS attribute "coordValue" on the netcdf element
* @param sectionSpec attribute "section" on the netcdf element
* @param reader factory for reading this netcdf dataset
*/
public void addExplicitDataset(String cacheName, String location, String id, String ncoordS, String coordValueS,
String sectionSpec, ucar.nc2.util.cache.FileFactory reader) {
AggDataset nested = makeDataset(cacheName, location, id, ncoordS, coordValueS, sectionSpec, null, reader);
explicitDatasets.add(nested);
}
public void addDataset(AggDataset nested) {
explicitDatasets.add(nested);
}
/**
* Add a dataset scan
*
* @param crawlableDatasetElement defines a CrawlableDataset, or null
* @param dirName scan this directory
* @param suffix filter on this suffix (may be null)
* @param regexpPatternString include if full name matches this regular expression (may be null)
* @param dateFormatMark create dates from the filename (may be null)
* @param enhanceMode how should files be enhanced
* @param subdirs equals "false" if should not descend into subdirectories
* @param olderThan files must be older than this time (now - lastModified >= olderThan); must be a time unit, may ne
* bull
*/
public void addDatasetScan(Element crawlableDatasetElement, String dirName, String suffix, String regexpPatternString,
String dateFormatMark, Set enhanceMode, String subdirs, String olderThan) {
datasetManager.addDirectoryScan(dirName, suffix, regexpPatternString, subdirs, olderThan, enhanceMode);
this.dateFormatMark = dateFormatMark;
if (dateFormatMark != null) {
isDate = true;
if (type == Type.joinExisting)
type = Type.joinExistingOne; // tricky
DateExtractor dateExtractor = new DateExtractorFromName(dateFormatMark, true);
datasetManager.setDateExtractor(dateExtractor);
}
}
// experimental
public void addCollection(String spec, String olderThan) {
datasetManager = MFileCollectionManager.open(spec, spec, olderThan, new Formatter());
}
public void setModifications(Element ncmlMods) {
this.ncmlElem = ncmlMods;
}
/**
* Get type of aggregation
*
* @return type of aggregation
*/
public Type getType() {
return type;
}
/**
* Get dimension name to join on
*
* @return dimension name or null if type union/tiled
*/
public String getDimensionName() {
return dimName;
}
protected String getLocation() {
return ncDataset.location;
}
/////////////////////////////////////////////////////////////////////
public void close() throws IOException {
persistWrite();
}
/**
* Check to see if its time to rescan directory, and if so, rescan and extend dataset if needed.
* Note that this just calls sync(), so structural metadata may be modified (!!)
*
* @return true if directory was rescanned and dataset may have been updated
* @throws IOException on io error
*/
@Override
public synchronized boolean syncExtend() throws IOException {
return false; // LOOK datasetManager.isScanNeeded() && _sync();
}
// public synchronized boolean sync() throws IOException {
// return datasetManager.isScanNeeded() && _sync();
// }
// LOOK could also use syncExtend()
@Override
public long getLastModified() {
try {
datasetManager.scanIfNeeded();
} catch (IOException e) {
logger.error("Aggregation scan failed, e");
}
return datasetManager.getLastChanged();
}
/*
* LOOK
* private boolean _sync() throws IOException {
* if (!datasetManager.scan(true))
* return false; // nothing changed LOOK what about grib extention ??
* cacheDirty = true;
* makeDatasets(null);
*
* // rebuild the metadata
* rebuildDataset();
* ncDataset.finish();
* if (ncDataset.getEnhanceMode().contains(NetcdfDataset.Enhance.CoordSystems)) { // force recreation of the
* coordinate
* // systems
* ncDataset.clearCoordinateSystems();
* ncDataset.enhance(ncDataset.getEnhanceMode());
* ncDataset.finish();
* }
*
* return true;
* }
*/
@Override
public String getFileTypeId() { // LOOK - should cache ??
AggDataset ds = null;
NetcdfFile ncfile = null;
try {
ds = getTypicalDataset();
ncfile = ds.acquireFile(null);
return ncfile.getFileTypeId();
} catch (Exception e) {
logger.error("failed to open " + ds);
} finally {
if (ds != null)
try {
ds.close(ncfile);
} catch (IOException e) {
logger.error("failed to close " + ds);
}
}
return "N/A";
}
@Override
public String getFileTypeDescription() { // LOOK - should cache ??
AggDataset ds = null;
NetcdfFile ncfile = null;
try {
ds = getTypicalDataset();
ncfile = ds.acquireFile(null);
return ncfile.getFileTypeDescription();
} catch (Exception e) {
logger.error("failed to open " + ds);
} finally {
if (ds != null)
try {
ds.close(ncfile);
} catch (IOException e) {
logger.error("failed to close " + ds);
}
}
return "N/A";
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// stuff for subclasses to override
/**
* Call this to build the dataset objects in the NetcdfDataset
*
* @param cancelTask maybe cancel
* @throws IOException on read error
*/
protected abstract void buildNetcdfDataset(CancelTask cancelTask) throws IOException;
/**
* Allow information to be made persistent. Overridden in AggregationExisting
*
* @throws IOException on error
*/
@Override
public void persistWrite() throws IOException {}
/**
* read info from the persistent XML file, if it exists; overridden in AggregationExisting
*/
protected void persistRead() {}
@Override
public void getDetailInfo(Formatter f) {
f.format(" Type=%s%n", type);
f.format(" dimName=%s%n", dimName);
f.format(" Datasets (%d) %n", datasets.size());
for (AggDataset ds : datasets)
ds.show(f);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// all elements are processed, finish construction
public void build(CancelTask cancelTask) throws IOException {
datasetManager.scan(true); // Make the list of Datasets, by scanning if needed.
cacheDirty = true;
makeDatasets(cancelTask);
buildNetcdfDataset(cancelTask);
}
public List getDatasets() {
return datasets;
}
/**
* Make the list of Datasets, from explicit and scans.
*
* @param cancelTask user can cancel
* @throws IOException on i/o error
*/
protected void makeDatasets(CancelTask cancelTask) throws IOException {
// heres where the results will go
datasets = new ArrayList<>();
for (MFile cd : datasetManager.getFilesSorted()) {
datasets.add(makeDataset(cd));
}
// sort using Dataset as Comparator.
// Sort by date if it exists, else sort by filename.
Collections.sort(datasets);
// add the explicit datasets - these need to be kept in order
// LOOK - should they be before or after scanned? Does it make sense to mix scan and explicit?
// AggFmrcSingle sets explicit datasets - the scan is empty
datasets.addAll(explicitDatasets);
// Remove unreadable files (i.e. due to permissions) from the aggregation.
// LOOK: Is this logic we should install "upstream", perhaps in MFileCollectionManager?
// It would affect other collections than just NcML aggregation in that case.
for (Iterator datasetsIter = datasets.iterator(); datasetsIter.hasNext();) {
AggDataset dataset = datasetsIter.next();
MFile mFile = dataset.getMFile();
if ((mFile != null) && (!mFile.isReadable())) {
logger.warn("Aggregation member isn't readable (permissions issue?). Skipping: " + mFile.getPath());
datasetsIter.remove();
}
}
// check for duplicate location
Set dset = new HashSet<>(2 * datasets.size());
for (AggDataset dataset : datasets) {
if (dset.contains(dataset.cacheLocation))
logger.warn("Duplicate dataset in aggregation = " + dataset.cacheLocation);
dset.add(dataset.cacheLocation);
}
if (datasets.isEmpty()) {
throw new IllegalStateException("There are no datasets in the aggregation " + datasetManager);
}
}
/**
* Open one of the nested datasets as a template for the aggregation dataset.
*
* @return a typical Dataset
* @throws IOException if there are no datasets
*/
protected AggDataset getTypicalDataset() throws IOException {
List nestedDatasets = getDatasets();
int n = nestedDatasets.size();
if (n == 0)
throw new FileNotFoundException("No datasets in this aggregation");
int select;
if (typicalDatasetMode == TypicalDataset.LATEST)
select = n - 1;
else if (typicalDatasetMode == TypicalDataset.PENULTIMATE)
select = (n < 2) ? 0 : n - 2;
else if (typicalDatasetMode == TypicalDataset.FIRST)
select = 0;
else { // random is default
if (r == null)
r = new Random();
select = (n < 2) ? 0 : r.nextInt(n);
}
return nestedDatasets.get(select);
}
private Random r;
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Dataset factory, so subclasses can override
*
* @param cacheName a unique name to use for caching
* @param location attribute "location" on the netcdf element
* @param id attribute "id" on the netcdf element
* @param ncoordS attribute "ncoords" on the netcdf element
* @param coordValueS attribute "coordValue" on the netcdf element
* @param sectionSpec attribute "sectionSpec" on the netcdf element
* @param enhance open dataset in enhance mode NOT USED
* @param reader factory for reading this netcdf dataset
* @return a Dataset
*/
protected AggDataset makeDataset(String cacheName, String location, String id, String ncoordS, String coordValueS,
String sectionSpec, EnumSet enhance, ucar.nc2.util.cache.FileFactory reader) {
return new AggDataset(cacheName, location, id, enhance, reader, spiObject, ncmlElem); // overridden in OuterDim,
// tiled
}
protected AggDataset makeDataset(MFile dset) {
return new AggDataset(dset, spiObject, ncmlElem);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* All non-agg variables use a proxy to acquire the file before reading.
* If the variable is caching, read data into cache now.
* If not caching, VariableEnhanced.setProxyReader() is called.
*
* @param typicalDataset read from a "typical dataset"
* @param newds containing dataset
* @throws IOException on i/o error
*/
void setDatasetAcquireProxy(AggDataset typicalDataset, NetcdfDataset.Builder newds) throws IOException {
AggProxyReader proxy = new AggProxyReader(typicalDataset);
setDatasetAcquireProxy(proxy, newds.rootGroup);
}
private void setDatasetAcquireProxy(AggProxyReader proxy, Group.Builder g) throws IOException {
// all normal (non agg) variables must use a proxy to lock the file
for (Variable.Builder v : g.vbuilders) {
if (v.proxyReader != v && v.proxyReader != null) {
if (debugProxy)
System.out.println(" debugProxy: hasProxyReader " + v.shortName);
continue; // dont mess with agg variables
}
/*
* LOOK no caching
* if (v.isCaching()) { // cache the small ones
* v.setCachedData(v.read()); // cache the variableDS directly
*
* } else { // put proxy on the rest
*/
v.setProxyReader(proxy);
if (debugProxy)
System.out.println(" debugProxy: set proxy on " + v.shortName);
}
// recurse
for (Group.Builder nested : g.gbuilders) {
setDatasetAcquireProxy(proxy, nested);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy