All Downloads are FREE. Search and download functionalities are using the official Maven repository.

thredds.inventory.MFileCollectionManager Maven / Gradle / Ivy

Go to download

The NetCDF-Java Library is a Java interface to NetCDF files, as well as to many other types of scientific data formats.

The newest version!
/*
 * Copyright (c) 1998 - 2011. University Corporation for Atmospheric Research/Unidata
 * Portions of this software were developed by the Unidata Program at the
 * University Corporation for Atmospheric Research.
 *
 * Access and use of this software shall impose the following obligations
 * and understandings on the user. The user is granted the right, without
 * any fee or cost, to use, copy, modify, alter, enhance and distribute
 * this software, and any derivative works thereof, and its supporting
 * documentation for any purpose whatsoever, provided that this entire
 * notice appears in all copies of the software, derivative works and
 * supporting documentation.  Further, UCAR requests that the user credit
 * UCAR/Unidata in any publications that result from the use of this
 * software or in any product that includes this software. The names UCAR
 * and/or Unidata, however, may not be used in any advertising or publicity
 * to endorse or promote any products or commercial entity unless specific
 * written permission is obtained from UCAR/Unidata. The user also
 * understands that UCAR/Unidata is not obligated to provide the user with
 * any support, consulting, training or assistance of any kind with regard
 * to the use, operation and performance of this software nor to provide
 * the user with any updates, revisions, new versions or "bug fixes."
 *
 * THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL,
 * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
 * WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE.
 */

package thredds.inventory;

import net.jcip.annotations.ThreadSafe;
import thredds.featurecollection.FeatureCollectionConfig;
import ucar.nc2.time.CalendarDate;
import ucar.nc2.units.TimeDuration;

import java.util.*;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;

import thredds.inventory.filter.*;

/**
 * Manage Collections of MFiles.
 * Used in:
 * 
    *
  • replaces older version in ncml.Aggregation *
  • ucar.nc2.ft.point.collection.TimedCollectionImpl (ucar.nc2.ft.point.collection.CompositeDatasetFactory) *
  • ucar.nc2.ft.fmrc.Fmrc *
  • ucar.nc2.grib.GribCollection *
*

* we need to be thread safe, for InvDatasetFeatureCollection * * @author caron * @since Jul 8, 2009 */ @ThreadSafe public class MFileCollectionManager extends CollectionManagerAbstract { static public final String CATALOG = "catalog:"; static private MController controller; /** * Set the MController used by scan. Defaults to thredds.filesystem.ControllerOS() if not set. * * @param _controller use this MController */ static public void setController(MController _controller) { controller = _controller; } static public MController getController() { if (null == controller) controller = new thredds.filesystem.ControllerOS(); // default return controller; } // called from Aggregation, Fmrc, FeatureDatasetFactoryManager static public MFileCollectionManager open(String collectionName, String olderThan, Formatter errlog) throws IOException { if (collectionName.startsWith(CATALOG)) return new CatalogCollectionManager(collectionName); else return new MFileCollectionManager(collectionName, olderThan, errlog); } // retrofit to Aggregation static public MFileCollectionManager openWithRecheck(String collectionName, String recheckS) { return new MFileCollectionManager(collectionName, recheckS); } //////////////////////////////////////////////////////////////////// // these actually dont change, but are not set in the constructor protected DateExtractor dateExtractor; protected CalendarDate startPartition; // these are final private final List scanList = new ArrayList(); // an MCollection is a collection of managed files private final long olderThanInMsecs; private final String rootDir; protected FeatureCollectionConfig config; // this can change = keep under lock private Map map; // current map of MFile in the collection private long lastScanned; // last time scanned private AtomicLong lastChanged = new AtomicLong(); // last time the set of files changed // simplified version called from DatasetCollectionManager.open() private MFileCollectionManager(String collectionSpec, String olderThan, Formatter errlog) { super(collectionSpec, null); CollectionSpecParser sp = new CollectionSpecParser(collectionSpec, errlog); this.recheck = null; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default this.rootDir = sp.getRootDir(); List filters = new ArrayList(2); if (null != sp.getFilter()) filters.add(new WildcardMatchOnName(sp.getFilter())); olderThanInMsecs = parseOlderThanFilter(olderThan); dateExtractor = (sp.getDateFormatMark() == null) ? new DateExtractorNone() : new DateExtractorFromName(sp.getDateFormatMark(), true); scanList.add(new MCollection(sp.getRootDir(), sp.getRootDir(), sp.wantSubdirs(), filters, null)); } // this is the full featured constructor, using FeatureCollectionConfig for config. public MFileCollectionManager(FeatureCollectionConfig config, Formatter errlog, org.slf4j.Logger logger) { super(config.name != null ? config.name : config.spec, logger); this.config = config; CollectionSpecParser sp = new CollectionSpecParser(config.spec, errlog); this.rootDir = sp.getRootDir(); List filters = new ArrayList(3); if (null != sp.getFilter()) filters.add(new WildcardMatchOnName(sp.getFilter())); olderThanInMsecs = parseOlderThanFilter(config.olderThan); if (config.dateFormatMark != null) dateExtractor = new DateExtractorFromName(config.dateFormatMark, false); else if (sp.getDateFormatMark() != null) dateExtractor = new DateExtractorFromName(sp.getDateFormatMark(), true); else dateExtractor = new DateExtractorNone(); scanList.add(new MCollection(sp.getRootDir(), sp.getRootDir(), sp.wantSubdirs(), filters, null)); this.recheck = makeRecheck(config.updateConfig.recheckAfter); protoChoice = config.protoConfig.choice; // static means never rescan on checkState; let it be externally triggered. if ((config.updateConfig.recheckAfter == null) && (config.updateConfig.rescan == null) && (config.updateConfig.deleteAfter == null)) setStatic(true); } private long parseOlderThanFilter(String olderThan) { if (olderThan != null) { try { TimeDuration tu = new TimeDuration(olderThan); return (long) (1000 * tu.getValueInSeconds()); } catch (Exception e) { logger.error(collectionName + ": Invalid time unit for olderThan = {}", olderThan); } } return -1; } private TimeDuration makeRecheck(String recheckS) { if (recheckS != null) { try { return new TimeDuration(recheckS); } catch (Exception e) { logger.error(collectionName + ": Invalid time unit for recheckEvery = {}", recheckS); } } return null; } // for subclasses protected MFileCollectionManager(String name, org.slf4j.Logger logger) { super(name, logger); this.recheck = null; this.olderThanInMsecs = -1; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default this.rootDir = null; } //////////////////////////////////////////////////////////////////////////////// public MFileCollectionManager(String name, String spec, Formatter errlog, org.slf4j.Logger logger) { super(name, logger); CollectionSpecParser sp = new CollectionSpecParser(spec, errlog); this.rootDir = sp.getRootDir(); List filters = new ArrayList(3); if (null != sp.getFilter()) filters.add(new WildcardMatchOnName(sp.getFilter())); dateExtractor = (sp.getDateFormatMark() == null) ? new DateExtractorNone() : new DateExtractorFromName(sp.getDateFormatMark(), true); scanList.add(new MCollection(sp.getRootDir(), sp.getRootDir(), sp.wantSubdirs(), filters, null)); this.recheck = null; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default this.olderThanInMsecs = -1; } public MFileCollectionManager(String name, MCollection mc, CalendarDate startPartition, org.slf4j.Logger logger) { super(name, logger); this.startPartition = startPartition; this.scanList.add(mc); this.rootDir = mc.getDirectoryName(); this.recheck = null; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default this.olderThanInMsecs = -1; } @Override public CalendarDate getStartCollection() { return startPartition; } //////////////////////////////////////////////////////////////////// // Aggregation retrofit /** * For retrofitting to Aggregation * Must also call addDirectoryScan one or more times * * @param recheckS a undunit time unit, specifying how often to rscan */ private MFileCollectionManager(String collectionName, String recheckS) { super(collectionName, null); this.recheck = makeRecheck(recheckS); this.olderThanInMsecs = -1; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; this.rootDir = null; } public void setDateExtractor(DateExtractor dateExtractor) { this.dateExtractor = dateExtractor; } /** * Add a directory scan to the collection * * @param dirName scan this directory * @param suffix require this suffix (overriddden by regexp), may be null * @param regexpPatternString if present, use this reqular expression to filter files , may be null * @param subdirsS if "true", descend into subdirectories, may be null * @param olderS udunit time unit - files must be older than this amount of time (now - lastModified > olderTime), may be null * // * @param dateFormatString dateFormatMark string, may be null * @param auxInfo attach this object to any MFile found by this scan */ public void addDirectoryScan(String dirName, String suffix, String regexpPatternString, String subdirsS, String olderS, Object auxInfo) { List filters = new ArrayList(3); if (null != regexpPatternString) filters.add(new RegExpMatchOnName(regexpPatternString)); else if (suffix != null) filters.add(new WildcardMatchOnPath("*" + suffix + "$")); if (olderS != null) { try { TimeDuration tu = new TimeDuration(olderS); filters.add(new LastModifiedLimit((long) (1000 * tu.getValueInSeconds()))); } catch (Exception e) { logger.error(collectionName + ": Invalid time unit for olderThan = {}", olderS); } } boolean wantSubdirs = true; if ((subdirsS != null) && subdirsS.equalsIgnoreCase("false")) wantSubdirs = false; MFileFilter filter = (filters.size() == 0) ? null : ((filters.size() == 1) ? filters.get(0) : new Composite(filters)); MCollection mc = new thredds.inventory.MCollection(dirName, dirName, wantSubdirs, filter, auxInfo); // create name StringBuilder sb = new StringBuilder(dirName); if (wantSubdirs) sb.append("**/"); if (null != regexpPatternString) sb.append(regexpPatternString); else if (suffix != null) sb.append(suffix); else sb.append("noFilter"); collectionName = sb.toString(); scanList.add(mc); } //////////////////////////////////////////////////////////////////// @Override public String getRoot() { return rootDir; } @Override public long getOlderThanFilterInMSecs() { return olderThanInMsecs; } @Override public long getLastScanned() { return lastScanned; } @Override public long getLastChanged() { return lastChanged.get(); } @Override public boolean scanIfNeeded() throws IOException { if (map == null && !isStatic()) return true; return isScanNeeded() && scan(true); } protected boolean hasScans() { return !scanList.isEmpty(); } /** * Compute if synchronous scan is needed. * True if recheck is true and enough time has elapsed. * @return true if rescan is needed */ @Override public boolean isScanNeeded() { // see if we need to recheck if (recheck == null) { logger.debug("{}: scan not needed, recheck null", collectionName); return false; } if (!hasScans()) { logger.debug("{}: scan not needed, no scanners", collectionName); return false; } if (map == null && !isStatic()) { logger.debug("{}: scan needed, never scanned", collectionName); return true; } Date now = new Date(); Date lastCheckedDate = new Date(lastScanned); Date need = recheck.add(lastCheckedDate); if (now.before(need)) { logger.debug("{}: scan not needed, last scanned={}, now={}", collectionName, lastCheckedDate, now); return false; } return true; } /** * Do not use * @throws IOException */ public void scanDebug(Formatter f) throws IOException { getController(); // make sure a controller is instantiated // run through all scanners and collect MFile instances into the Map for (MCollection mc : scanList) { // lOOK: are there any circumstances where we dont need to recheck against OS, ie always use cached values? Iterator iter = (mc.wantSubdirs()) ? controller.getInventoryAll(mc, true) : controller.getInventoryTop(mc, true); /// NCDC wants subdir /global/nomads/nexus/gfsanl/**/gfsanl_3_.*\.grb$ if (iter == null) { logger.error(collectionName + ": Invalid collection= " + mc); continue; } int count = 0; while (iter.hasNext()) { MFile mfile = iter.next(); mfile.setAuxInfo(mc.getAuxInfo()); map.put(mfile.getPath(), mfile); count++; } logger.debug("{} : was scanned nfiles= {} ", collectionName, count); } } @Override public boolean scan(boolean sendEvent) throws IOException { if (map == null) { boolean changed = scanFirstTime(); if (changed && sendEvent) sendEvent(new TriggerEvent(this, TriggerType.update)); // watch out for infinite loop return changed; } long olderThan = (olderThanInMsecs <= 0) ? -1 : System.currentTimeMillis() - olderThanInMsecs; // new files must be older than this. // rescan Map oldMap = map; Map newMap = new HashMap(); reallyScan(newMap); // replace with previous datasets if they exist int nnew = 0; int nchange = 0; Iterator iter = newMap.values().iterator(); // need iterator so we can remove() while (iter.hasNext()) { MFile newFile = iter.next(); String path = newFile.getPath(); MFile oldFile = oldMap.get(path); if (oldFile != null) { if (newFile.getLastModified() > oldFile.getLastModified()) { // the file has changed since last time nchange++; logger.debug("{}: scan found Dataset changed= {}", collectionName, path); } else if (changeChecker != null && changeChecker.hasntChangedSince(newFile, oldFile.getLastModified())) { // the ancilliary file hasnt changed nchange++; logger.debug("{}: scan changeChecker found Dataset changed= {}", collectionName, path); } } else { // oldFile doesnt exist if (olderThan > 0 && newFile.getLastModified() > olderThan) { // the file is too new iter.remove(); logger.debug("{}: scan found new Dataset but its too recently modified = {}", collectionName, path); } else { nnew++; logger.debug("{}: scan found new Dataset= {} ", collectionName, path); } } } // check for deletions int ndelete = 0; for (MFile oldDataset : oldMap.values()) { String path = oldDataset.getPath(); MFile newDataset = newMap.get(path); if (newDataset == null) { ndelete++; logger.debug("{}: scan found deleted Dataset={}", collectionName, path); } } boolean changed = (nnew > 0) || (ndelete > 0) || (nchange > 0); if (changed) { if (logger.isInfoEnabled()) logger.info("{}: scan found changes {}: nnew={}, nchange={}, ndelete={}", collectionName, new Date(), nnew, nchange, ndelete); synchronized (this) { map = newMap; this.lastScanned = System.currentTimeMillis(); this.lastChanged.set(this.lastScanned); } } else { synchronized (this) { this.lastScanned = System.currentTimeMillis(); } } if (changed && sendEvent) { // event is processed on this thread sendEvent(new TriggerEvent(this, TriggerType.update)); // watch out for infinite loop } return changed; } @Override public void setFiles(Iterable files) { Map newMap = new HashMap(); for (MFile file : files) newMap.put(file.getPath(), file); synchronized (this) { map = newMap; this.lastScanned = System.currentTimeMillis(); this.lastChanged.set(this.lastScanned); } } @Override public Iterable getFiles() { if (map == null) try { scanFirstTime(); // never scanned } catch (IOException e) { e.printStackTrace(); return Collections.emptyList(); } List result = new ArrayList(map.values()); if (hasDateExtractor()) { Collections.sort(result, new DateSorter()); } else { Collections.sort(result); } return result; } private class DateSorter implements Comparator { public int compare(MFile m1, MFile m2) { return extractRunDateWithError(m1).compareTo(extractRunDateWithError(m2)); } } @Override public CalendarDate extractRunDate(MFile mfile) { return (dateExtractor == null) ? null : dateExtractor.getCalendarDate(mfile); } private CalendarDate extractRunDateWithError(MFile mfile) { CalendarDate result = extractRunDate(mfile); if (result == null) logger.error("Failed to extract date from file {} with Extractor {}", mfile.getPath(), dateExtractor); return result; } @Override public boolean hasDateExtractor() { return (dateExtractor != null) && !(dateExtractor instanceof DateExtractorNone); } private boolean scanFirstTime() throws IOException { Map newMap = new HashMap(); if (!hasScans()) { map = newMap; return false; } reallyScan(newMap); // deleteOld(newMap); // ?? hmmmmm LOOK this seems wrong; maintainence in background ?? generally collection doesnt exist // implement olderThan if (olderThanInMsecs > 0) { long olderThan = System.currentTimeMillis() - olderThanInMsecs; // new files must be older than this. Iterator iter = newMap.values().iterator(); // need iterator so we can remove() while (iter.hasNext()) { MFile newFile = iter.next(); String path = newFile.getPath(); if (newFile.getLastModified() > olderThan) { // the file is too new iter.remove(); logger.debug("{}: scan found new Dataset but its too recently modified = {}", collectionName, path); } } } synchronized (this) { map = newMap; this.lastScanned = System.currentTimeMillis(); this.lastChanged.set(this.lastScanned); } logger.debug("{} : initial scan found n datasets = {} ", collectionName, map.keySet().size()); return map.keySet().size() > 0; } protected void reallyScan(java.util.Map map) throws IOException { getController(); // make sure a controller is instantiated // run through all scanners and collect MFile instances into the Map for (MCollection mc : scanList) { // lOOK: are there any circumstances where we dont need to recheck against OS, ie always use cached values? Iterator iter = (mc.wantSubdirs()) ? controller.getInventoryAll(mc, true) : controller.getInventoryTop(mc, true); /// NCDC wants subdir /global/nomads/nexus/gfsanl/**/gfsanl_3_.*\.grb$ if (iter == null) { logger.error(collectionName + ": Invalid collection= " + mc); continue; } int count = 0; while (iter.hasNext()) { MFile mfile = iter.next(); mfile.setAuxInfo(mc.getAuxInfo()); map.put(mfile.getPath(), mfile); count++; } logger.debug("{} : was scanned nfiles= {} ", collectionName, count); } if (map.size() == 0) { logger.warn("MFileCollectionManager: No files found for {}", collectionName); } } @Override public String toString() { Formatter f = new Formatter(); f.format("DatasetCollectionManager{ collectionName='%s' recheck=%s ", collectionName, recheck); for (MCollection mc : scanList) { f.format("%n dir=%s filter=%s", mc.getDirectoryName(), mc.getFileFilter()); } return f.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy