ucar.nc2.ncml.Aggregation Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 1998-2020 John Caron and University Corporation for Atmospheric Research/Unidata
* See LICENSE.txt for license information.
*/
package ucar.nc2.ncml;
import org.jdom2.Element;
import thredds.filesystem.MFileOS;
import thredds.filesystem.MFileOS7;
import thredds.inventory.DateExtractor;
import thredds.inventory.DateExtractorFromName;
import thredds.inventory.MFile;
import thredds.inventory.MFileCollectionManager;
import ucar.ma2.Array;
import ucar.ma2.InvalidRangeException;
import ucar.ma2.Range;
import ucar.ma2.Section;
import ucar.nc2.Group;
import ucar.nc2.NetcdfFile;
import ucar.nc2.ProxyReader;
import ucar.nc2.Variable;
import ucar.nc2.dataset.DatasetUrl;
import ucar.nc2.dataset.NetcdfDataset;
import ucar.nc2.dataset.VariableEnhanced;
import ucar.nc2.units.DateFormatter;
import ucar.nc2.util.CancelTask;
import ucar.nc2.util.DiskCache2;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.Executor;
/**
* Superclass for NcML Aggregation.
*
* An Aggregation acts as a ProxyReader for VariableDS. That, is it must implement:
*
*
* public Array read(Variable mainv);
*
* public Array read(Variable mainv, Section section);
*
*
* @author caron
*/
/*
* May be out of date
* Implementation Notes
* Caching
*
* - Case 1. Explicit list / Scan static directories (recheck=null)
*
* - A. AggCaching - keep track of ncoords, coordValues for joinExisting. Read on open, write on close.
* Could skip scan if cache exists.
*
- B. NetcdfFileCache - write on close if changed (only first time). On sync, recheck = null means wont be reread.
*
* - Case 2. Scan dynamic directories (recheck non-null)
*
* - A. AggCaching - keep track of ncoords, coordValues for joinExisting. Read on open, write on close.
* Could skip scan if cache exists, and recheck time not expired.
*
- B. NetcdfFileCache - write on close if changed. On sync, if recheck time, then rescan.
*
*
* Aggregation Coordinate Variable (aggCoord) Processing
* Construction:
*
* - The aggregation element is processed first.
*
- agg.finish() is called.
*
- If the user has defined the aggCoord in the NcML, it is then processed, overriding whatever the aggregation has
* constructed.
* If values are defined, they are cached in the new variable.
*
* Data Reading:
*
* - If values are cached, agg.read() is never called.
*
- Each Dataset may have a coordinate value(s) defined in the NcML coordValue attribute.
*
- If not, the coordinate value(s) is cached when the dataset is opened.
*
- agg.read() uses those if they exist, else reads and caches.
*
*
* @deprecated do not use
*/
@Deprecated
public abstract class Aggregation implements AggregationIF {
protected enum Type {
forecastModelRunCollection, forecastModelRunSingleCollection, joinExisting, joinExistingOne, // joinExisting with a
// DateFormatMark makes
// it into a
// joinExistingOne -
// must have only one
// coord / file
joinNew, tiled, union
}
protected enum TypicalDataset {
FIRST, RANDOM, LATEST, PENULTIMATE
}
protected static TypicalDataset typicalDatasetMode = TypicalDataset.FIRST;
protected static org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(Aggregation.class);
protected static DiskCache2 diskCache2;
// this is where persist() reads/writes files
public static void setPersistenceCache(DiskCache2 dc) {
diskCache2 = dc;
if (diskCache2 != null)
diskCache2.setAlwaysUseCache(true); // the persistence cache file has same name as the ncml - must put it into the
// cache else clobber ncml 7/31/2014
}
// experimental multithreading
protected static Executor executor;
public static void setExecutor(Executor exec) {
executor = exec;
}
public static void setTypicalDatasetMode(String mode) {
if (mode.equalsIgnoreCase("random"))
typicalDatasetMode = TypicalDataset.RANDOM;
else if (mode.equalsIgnoreCase("latest"))
typicalDatasetMode = TypicalDataset.LATEST;
else if (mode.equalsIgnoreCase("penultimate"))
typicalDatasetMode = TypicalDataset.PENULTIMATE;
else if (mode.equalsIgnoreCase("first"))
typicalDatasetMode = TypicalDataset.FIRST;
else
logger.error("Unknown setTypicalDatasetMode= " + mode);
}
protected static boolean debug, debugOpenFile, debugSyncDetail, debugProxy, debugRead, debugDateParse, debugConvert;
//////////////////////////////////////////////////////////////////////////////////////////
protected NetcdfDataset ncDataset; // the aggregation belongs to this dataset
protected Type type; // the aggregation type
protected Object spiObject; // pass to NetcdfFile.open()
protected List explicitDatasets = new ArrayList<>(); // explicitly created Dataset objects from
// netcdf elements
protected List datasets = new ArrayList<>(); // all : explicit and scanned
protected MFileCollectionManager datasetManager; // manages scanning
protected boolean cacheDirty = true; // aggCache persist file needs updating
protected String dimName; // the aggregation dimension name
private Element mergeNcml;
// experimental
protected String dateFormatMark;
// protected EnumSet enhance = null; // default no enhancement
protected boolean isDate;
protected DateFormatter dateFormatter = new DateFormatter();
/**
* Create an Aggregation for the given NetcdfDataset.
* The following addXXXX methods are called, then finish(), before the object is ready for use.
*
* @param ncd Aggregation belongs to this NetcdfDataset
* @param dimName the aggregation dimension name
* @param type the Aggregation.Type
* @param recheckS how often to check if files have changes
*/
protected Aggregation(NetcdfDataset ncd, String dimName, Type type, String recheckS) {
this.ncDataset = ncd;
this.dimName = dimName;
this.type = type;
String name = ncd.getLocation();
if (name == null)
name = "Agg-" + ncd.hashCode();
datasetManager = MFileCollectionManager.openWithRecheck(name, recheckS);
}
/**
* Add a nested dataset, specified by an explicit netcdf element.
* enhance is handled by the reader, so its always false here.
*
* @param cacheName a unique name to use for caching
* @param location attribute "location" on the netcdf element
* @param id attribute "id" on the netcdf element
* @param ncoordS attribute "ncoords" on the netcdf element
* @param coordValueS attribute "coordValue" on the netcdf element
* @param sectionSpec attribute "section" on the netcdf element
* @param reader factory for reading this netcdf dataset
*/
public void addExplicitDataset(String cacheName, String location, String id, String ncoordS, String coordValueS,
String sectionSpec, ucar.nc2.util.cache.FileFactory reader) {
Dataset nested = makeDataset(cacheName, location, id, ncoordS, coordValueS, sectionSpec, null, reader);
explicitDatasets.add(nested);
}
public void addDataset(Dataset nested) {
explicitDatasets.add(nested);
}
/**
* Add a dataset scan
*
* @param crawlableDatasetElement defines a CrawlableDataset, or null
* @param dirName scan this directory
* @param suffix filter on this suffix (may be null)
* @param regexpPatternString include if full name matches this regular expression (may be null)
* @param dateFormatMark create dates from the filename (may be null)
* @param enhanceMode how should files be enhanced
* @param subdirs equals "false" if should not descend into subdirectories
* @param olderThan files must be older than this time (now - lastModified >= olderThan); must be a time unit, may ne
* bull
*/
public void addDatasetScan(Element crawlableDatasetElement, String dirName, String suffix, String regexpPatternString,
String dateFormatMark, Set enhanceMode, String subdirs, String olderThan) {
datasetManager.addDirectoryScan(dirName, suffix, regexpPatternString, subdirs, olderThan, enhanceMode);
this.dateFormatMark = dateFormatMark;
if (dateFormatMark != null) {
isDate = true;
if (type == Type.joinExisting)
type = Type.joinExistingOne; // tricky
DateExtractor dateExtractor = new DateExtractorFromName(dateFormatMark, true);
datasetManager.setDateExtractor(dateExtractor);
}
}
// experimental
public void addCollection(String spec, String olderThan) {
datasetManager = MFileCollectionManager.open(spec, spec, olderThan, new Formatter());
}
public void setModifications(Element ncmlMods) {
this.mergeNcml = ncmlMods;
}
/**
* Get type of aggregation
*
* @return type of aggregation
*/
public Type getType() {
return type;
}
/**
* Get dimension name to join on
*
* @return dimension name or null if type union/tiled
*/
public String getDimensionName() {
return dimName;
}
protected String getLocation() {
return ncDataset.getLocation();
}
/////////////////////////////////////////////////////////////////////
@Override
public void close() throws IOException {
persistWrite();
}
/**
* Check to see if its time to rescan directory, and if so, rescan and extend dataset if needed.
* Note that this just calls sync(), so structural metadata may be modified (!!)
*
* @return true if directory was rescanned and dataset may have been updated
* @throws IOException on io error
*/
@Override
public synchronized boolean syncExtend() throws IOException {
return datasetManager.isScanNeeded() && _sync();
}
// public synchronized boolean sync() throws IOException {
// return datasetManager.isScanNeeded() && _sync();
// }
// LOOK could also use syncExtend()
@Override
public long getLastModified() {
try {
datasetManager.scanIfNeeded();
} catch (IOException e) {
logger.error("Aggregation scan failed, e");
}
return datasetManager.getLastChanged();
}
private boolean _sync() throws IOException {
if (!datasetManager.scan(true))
return false; // nothing changed LOOK what about grib extention ??
cacheDirty = true;
makeDatasets(null);
// rebuild the metadata
rebuildDataset();
ncDataset.finish();
if (ncDataset.getEnhanceMode().contains(NetcdfDataset.Enhance.CoordSystems)) { // force recreation of the coordinate
// systems
ncDataset.clearCoordinateSystems();
ncDataset.enhance(ncDataset.getEnhanceMode());
ncDataset.finish();
}
return true;
}
@Override
public String getFileTypeId() { // LOOK - should cache ??
Dataset ds = null;
NetcdfFile ncfile = null;
try {
ds = getTypicalDataset();
ncfile = ds.acquireFile(null);
return ncfile.getFileTypeId();
} catch (Exception e) {
logger.error("failed to open " + ds);
} finally {
if (ds != null)
try {
ds.close(ncfile);
} catch (IOException e) {
logger.error("failed to close " + ds);
}
}
return "N/A";
}
@Override
public String getFileTypeDescription() { // LOOK - should cache ??
Dataset ds = null;
NetcdfFile ncfile = null;
try {
ds = getTypicalDataset();
ncfile = ds.acquireFile(null);
return ncfile.getFileTypeDescription();
} catch (Exception e) {
logger.error("failed to open " + ds);
} finally {
if (ds != null)
try {
ds.close(ncfile);
} catch (IOException e) {
logger.error("failed to close " + ds);
}
}
return "N/A";
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// stuff for subclasses to override
/**
* Call this to build the dataset objects in the NetcdfDataset
*
* @param cancelTask maybe cancel
* @throws IOException on read error
*/
protected abstract void buildNetcdfDataset(CancelTask cancelTask) throws IOException;
/**
* Call this when rescan has found changed datasets
*
* @throws IOException on read error
*/
protected abstract void rebuildDataset() throws IOException;
/**
* Allow information to be make persistent. Overridden in AggregationExisting
*
* @throws IOException on error
*/
@Override
public void persistWrite() throws IOException {}
/**
* read info from the persistent XML file, if it exists; overridden in AggregationExisting
*/
protected void persistRead() {}
@Override
public void getDetailInfo(Formatter f) {
f.format(" Type=%s%n", type);
f.format(" dimName=%s%n", dimName);
f.format(" Datasets (%d) %n", datasets.size());
for (Dataset ds : datasets)
ds.show(f);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// all elements are processed, finish construction
public void finish(CancelTask cancelTask) throws IOException {
datasetManager.scan(true); // Make the list of Datasets, by scanning if needed.
cacheDirty = true;
makeDatasets(cancelTask);
buildNetcdfDataset(cancelTask);
}
public List getDatasets() {
return datasets;
}
/**
* Make the list of Datasets, from explicit and scans.
*
* @param cancelTask user can cancel
* @throws IOException on i/o error
*/
protected void makeDatasets(CancelTask cancelTask) throws IOException {
// heres where the results will go
datasets = new ArrayList<>();
for (MFile cd : datasetManager.getFilesSorted()) {
datasets.add(makeDataset(cd));
}
// sort using Aggregation.Dataset as Comparator.
// Sort by date if it exists, else sort by filename.
Collections.sort(datasets);
// add the explicit datasets - these need to be kept in order
// LOOK - should they be before or after scanned? Does it make sense to mix scan and explicit?
// AggFmrcSingle sets explicit datasets - the scan is empty
datasets.addAll(explicitDatasets);
// Remove unreadable files (i.e. due to permissions) from the aggregation.
// LOOK: Is this logic we should install "upstream", perhaps in MFileCollectionManager?
// It would affect other collections than just NcML aggregation in that case.
for (Iterator datasetsIter = datasets.iterator(); datasetsIter.hasNext();) {
Dataset dataset = datasetsIter.next();
Path datasetPath;
if (dataset.getMFile() instanceof MFileOS) {
datasetPath = ((MFileOS) dataset.getMFile()).getFile().toPath();
} else if (dataset.getMFile() instanceof MFileOS7) {
datasetPath = ((MFileOS7) dataset.getMFile()).getNioPath();
} else {
continue;
}
if (!Files.isReadable(datasetPath)) { // File.canRead() is broken on Windows, but the JDK7 methods work.
logger.warn("Aggregation member isn't readable (permissions issue?). Skipping: " + datasetPath);
datasetsIter.remove();
}
}
// check for duplicate location
Set dset = new HashSet<>(2 * datasets.size());
for (Aggregation.Dataset dataset : datasets) {
if (dset.contains(dataset.cacheLocation))
logger.warn("Duplicate dataset in aggregation = " + dataset.cacheLocation);
dset.add(dataset.cacheLocation);
}
if (datasets.isEmpty()) {
throw new IllegalStateException("There are no datasets in the aggregation " + datasetManager);
}
}
/**
* Open one of the nested datasets as a template for the aggregation dataset.
*
* @return a typical Dataset
* @throws IOException if there are no datasets
*/
protected Dataset getTypicalDataset() throws IOException {
List nestedDatasets = getDatasets();
int n = nestedDatasets.size();
if (n == 0)
throw new FileNotFoundException("No datasets in this aggregation");
int select;
if (typicalDatasetMode == TypicalDataset.LATEST)
select = n - 1;
else if (typicalDatasetMode == TypicalDataset.PENULTIMATE)
select = (n < 2) ? 0 : n - 2;
else if (typicalDatasetMode == TypicalDataset.FIRST)
select = 0;
else { // random is default
if (r == null)
r = new Random();
select = (n < 2) ? 0 : r.nextInt(n);
}
return nestedDatasets.get(select);
}
private Random r;
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Dataset factory, so subclasses can override
*
* @param cacheName a unique name to use for caching
* @param location attribute "location" on the netcdf element
* @param id attribute "id" on the netcdf element
* @param ncoordS attribute "ncoords" on the netcdf element
* @param coordValueS attribute "coordValue" on the netcdf element
* @param sectionSpec attribute "sectionSpec" on the netcdf element
* @param enhance open dataset in enhance mode NOT USED
* @param reader factory for reading this netcdf dataset
* @return a Aggregation.Dataset
*/
protected Dataset makeDataset(String cacheName, String location, String id, String ncoordS, String coordValueS,
String sectionSpec, EnumSet enhance, ucar.nc2.util.cache.FileFactory reader) {
return new Dataset(cacheName, location, id, enhance, reader); // overridden in OuterDim, tiled
}
protected Dataset makeDataset(MFile dset) {
return new Dataset(dset);
}
/**
* Encapsolates a NetcdfFile that is a component of the aggregation.
*/
public class Dataset implements Comparable {
MFile mfile;
protected String id; // id attribute on the netcdf element
// deferred opening
protected String cacheLocation;
protected ucar.nc2.util.cache.FileFactory reader;
protected Set enhance; // used by Fmrc to read enhanced datasets
protected DatasetUrl durl;
/*
* For subclasses.
*
* @param location location attribute on the netcdf element
*
* protected Dataset(String location) {
* this.location = (location == null) ? null : StringUtil2.substitute(location, "\\", "/");
* }
*/
protected Dataset(MFile mfile) {
this.mfile = mfile;
this.cacheLocation = mfile.getPath();
this.enhance = (Set) mfile.getAuxInfo();
}
/**
* Dataset constructor.
* With this constructor, the actual opening of the dataset is deferred, and done by the reader.
* Used with explicit netcdf elements, and scanned files.
*
* @param cacheLocation a unique name to use for caching
* @param location attribute "location" on the netcdf element
* @param id attribute "id" on the netcdf element
* @param enhance open dataset in enhance mode, may be null NOT USED
* @param reader factory for reading this netcdf dataset; if null, use NetcdfDataset.open( location)
*/
protected Dataset(String cacheLocation, String location, String id, EnumSet enhance,
ucar.nc2.util.cache.FileFactory reader) {
this.mfile = MFileOS.getExistingFile(location);
this.cacheLocation = cacheLocation;
this.id = id;
// this.enhance = enhance; // LOOK why ??
this.reader = reader;
}
/**
* Get the location of this Dataset
*
* @return the location of this Dataset
*/
public String getLocation() {
return (mfile == null) ? cacheLocation : mfile.getPath();
}
/**
*
* @return MFile or null
*/
public MFile getMFile() {
return mfile;
}
public String getCacheLocation() {
return cacheLocation;
}
public String getId() {
if (id != null)
return id;
if (mfile != null)
return mfile.getPath();
return Integer.toString(this.hashCode());
}
public NetcdfFile acquireFile(CancelTask cancelTask) throws IOException {
if (debugOpenFile)
System.out.println(" try to acquire " + cacheLocation);
long start = System.currentTimeMillis();
if (durl == null)
durl = DatasetUrl.findDatasetUrl(cacheLocation); // cache the ServiceType so we dont have to keep figuring it
// out
NetcdfFile ncfile = NetcdfDataset.acquireFile(reader, null, durl, -1, cancelTask, spiObject);
// must merge NcML before enhancing
if (mergeNcml != null)
ncfile = NcMLReader.mergeNcML(ncfile, mergeNcml); // create new dataset
if (enhance == null || enhance.isEmpty()) {
if (debugOpenFile)
System.out
.println(" acquire (no enhance) " + cacheLocation + " took " + (System.currentTimeMillis() - start));
return ncfile;
}
// must enhance
NetcdfDataset ds;
if (ncfile instanceof NetcdfDataset) {
ds = (NetcdfDataset) ncfile;
ds.enhance(enhance); // enhance "in place", ie modify the NetcdfDataset
} else {
ds = new NetcdfDataset(ncfile, enhance); // enhance when wrapping
}
if (debugOpenFile)
System.out.println(" acquire (enhance) " + cacheLocation + " took " + (System.currentTimeMillis() - start));
return ds;
}
protected void close(NetcdfFile ncfile) throws IOException {
if (ncfile == null)
return;
cacheVariables(ncfile);
ncfile.close();
}
// overridden in DatasetOuterDimension
protected void cacheVariables(NetcdfFile ncfile) throws IOException {}
public void show(Formatter f) {
f.format(" %s%n", mfile.getPath());
}
protected Array read(Variable mainv, CancelTask cancelTask) throws IOException {
NetcdfFile ncd = null;
try {
ncd = acquireFile(cancelTask);
if ((cancelTask != null) && cancelTask.isCancel())
return null;
Variable v = findVariable(ncd, mainv);
if (debugRead)
System.out.printf("Agg.read %s from %s in %s%n", mainv.getNameAndDimensions(), v.getNameAndDimensions(),
getLocation());
return v.read();
} finally {
close(ncd);
}
}
/**
* Read a section of the local Variable.
*
* @param mainv aggregated Variable
* @param cancelTask let user cancel
* @param section relative to the local Variable
* @return the complete Array for mainv
* @throws IOException on I/O error
* @throws InvalidRangeException on section error
*/
protected Array read(Variable mainv, CancelTask cancelTask, List section)
throws IOException, InvalidRangeException {
NetcdfFile ncd = null;
try {
ncd = acquireFile(cancelTask);
if ((cancelTask != null) && cancelTask.isCancel())
return null;
Variable v = findVariable(ncd, mainv);
if (debugRead) {
Section want = new Section(section);
System.out.printf("Agg.read(%s) %s from %s in %s%n", want, mainv.getNameAndDimensions(),
v.getNameAndDimensions(), getLocation());
}
return v.read(section);
} finally {
close(ncd);
}
}
protected Variable findVariable(NetcdfFile ncfile, Variable mainV) {
Variable v = ncfile.findVariable(mainV.getFullNameEscaped());
if (v == null) { // might be renamed
VariableEnhanced ve = (VariableEnhanced) mainV;
v = ncfile.findVariable(ve.getOriginalName()); // LOOK not escaped
}
return v;
}
// Datasets with the same locations are equal
public boolean equals(Object oo) {
if (this == oo)
return true;
if (!(oo instanceof Dataset))
return false;
Dataset other = (Dataset) oo;
return getLocation().equals(other.getLocation());
}
public int hashCode() {
return getLocation().hashCode();
}
@Override
public int compareTo(Dataset o) {
return getLocation().compareTo(o.getLocation());
}
} // class Dataset
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* All non-agg variables use a proxy to acquire the file before reading.
* If the variable is caching, read data into cache now.
* If not caching, VariableEnhanced.setProxyReader() is called.
*
* @param typicalDataset read from a "typical dataset"
* @param newds containing dataset
* @throws IOException on i/o error
*/
protected void setDatasetAcquireProxy(Dataset typicalDataset, NetcdfDataset newds) throws IOException {
DatasetProxyReader proxy = new DatasetProxyReader(typicalDataset);
setDatasetAcquireProxy(proxy, newds.getRootGroup());
}
protected void setDatasetAcquireProxy(DatasetProxyReader proxy, Group g) throws IOException {
// all normal (non agg) variables must use a proxy to lock the file
for (Variable v : g.getVariables()) {
if (v.getProxyReader() != v) {
if (debugProxy)
System.out.println(" debugProxy: hasProxyReader " + v.getFullName());
continue; // dont mess with agg variables
}
if (v.isCaching()) { // cache the small ones
v.setCachedData(v.read()); // cache the variableDS directly
} else { // put proxy on the rest
v.setProxyReader(proxy);
if (debugProxy)
System.out.println(" debugProxy: set proxy on " + v.getFullName());
}
}
// recurse
for (Group nested : g.getGroups()) {
setDatasetAcquireProxy(proxy, nested);
}
}
protected class DatasetProxyReader implements ProxyReader {
Dataset dataset;
DatasetProxyReader(Dataset dataset) {
this.dataset = dataset;
}
@Override
public Array reallyRead(Variable mainV, CancelTask cancelTask) throws IOException {
NetcdfFile ncfile = null;
try {
ncfile = dataset.acquireFile(cancelTask);
if ((cancelTask != null) && cancelTask.isCancel())
return null;
Variable proxyV = findVariable(ncfile, mainV);
return proxyV.read();
} finally {
dataset.close(ncfile);
}
}
@Override
public Array reallyRead(Variable mainV, Section section, CancelTask cancelTask)
throws IOException, InvalidRangeException {
NetcdfFile ncfile = null;
try {
ncfile = dataset.acquireFile(cancelTask);
Variable proxyV = findVariable(ncfile, mainV);
if ((cancelTask != null) && cancelTask.isCancel())
return null;
return proxyV.read(section);
} finally {
dataset.close(ncfile);
}
}
}
protected Variable findVariable(NetcdfFile ncfile, Variable mainV) {
Variable v = ncfile.findVariable(mainV.getFullNameEscaped());
if (v == null) { // might be renamed
VariableEnhanced ve = (VariableEnhanced) mainV;
v = ncfile.findVariable(ve.getOriginalName()); // LOOK not escaped
}
return v;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy