All Downloads are FREE. Search and download functionalities are using the official Maven repository.

thredds.inventory.MFileCollectionManager Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 1998-2020 University Corporation for Atmospheric Research/Unidata
 * See LICENSE.txt for license information.
 */

package thredds.inventory;

import thredds.featurecollection.FeatureCollectionConfig;
import thredds.filesystem.ControllerOS;
import thredds.inventory.filter.CompositeMFileFilter;
import thredds.inventory.filter.LastModifiedLimit;
import thredds.inventory.filter.RegExpMatchOnName;
import thredds.inventory.filter.WildcardMatchOnName;
import thredds.inventory.filter.WildcardMatchOnPath;
import ucar.nc2.time.CalendarDate;
import ucar.nc2.units.TimeDuration;
import javax.annotation.concurrent.GuardedBy;
import javax.annotation.concurrent.ThreadSafe;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;

/**
 * Manage Collections of MFiles.
 * Used in:
 * 
    *
  • thredds.inventory *
  • ucar.nc2.ft.fmrc.Fmrc *
  • ucar.nc2.ncml.Aggregation *
*

* we need to be thread safe, for InvDatasetFeatureCollection * * @author caron * @since Jul 8, 2009 */ @ThreadSafe public class MFileCollectionManager extends CollectionManagerAbstract { private MController controller; /** * Set the MController used by scan. Defaults to thredds.filesystem.ControllerOS() if not set. * * @param _controller use this MController */ public void setController(MController _controller) { controller = _controller; } public MController getController() { if (null == controller) { if (!scanList.isEmpty()) { CollectionConfig mc = scanList.get(0); controller = MControllers.create(mc.getDirectoryName()); } else { controller = new ControllerOS(); } } return controller; } // called from Aggregation, Fmrc, FeatureDatasetFactoryManager public static MFileCollectionManager open(String collectionName, String collectionSpec, String olderThan, Formatter errlog) { return new MFileCollectionManager(collectionName, collectionSpec, olderThan, errlog); } // retrofit to Aggregation public static MFileCollectionManager openWithRecheck(String collectionName, String recheckS) { return new MFileCollectionManager(collectionName, recheckS); } //////////////////////////////////////////////////////////////////// // these are final private final List scanList = new ArrayList<>(); // an MCollection is a collection of managed files private final long olderThanInMsecs; // LOOK why not use LastModifiedLimit filter ? // protected String rootDir; protected FeatureCollectionConfig config; @GuardedBy("this") private Map map; // current map of MFile in the collection. this can change = keep under lock @GuardedBy("this") private long lastScanned; // last time scanned @GuardedBy("this") private AtomicLong lastChanged = new AtomicLong(); // last time the set of files changed @GuardedBy("this") private Map filesRunDateMap = new HashMap<>(); private MFileCollectionManager(String collectionName, String collectionSpec, String olderThan, Formatter errlog) { super(collectionName, null); CollectionSpecParserAbstract sp = CollectionSpecParsers.create(collectionSpec, errlog); this.recheck = null; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default this.root = sp.getRootDir(); CompositeMFileFilter filters = new CompositeMFileFilter(); if (null != sp.getFilter()) filters.addIncludeFilter(new WildcardMatchOnName(sp.getFilter())); olderThanInMsecs = parseOlderThanFilter(olderThan); dateExtractor = (sp.getDateFormatMark() == null) ? new DateExtractorNone() : new DateExtractorFromName(sp.getDateFormatMark(), true); scanList.add(new CollectionConfig(sp.getRootDir(), getScanRootDir(sp), sp.wantSubdirs(), filters, null)); } // this is the full featured constructor, using FeatureCollectionConfig for config. public MFileCollectionManager(FeatureCollectionConfig config, Formatter errlog, org.slf4j.Logger logger) { super(config.collectionName != null ? config.collectionName : config.spec, logger); this.config = config; CollectionSpecParserAbstract sp = config.getCollectionSpecParserAbstract(errlog); this.root = sp.getRootDir(); CompositeMFileFilter filters = new CompositeMFileFilter(); if (null != sp.getFilter()) filters.addIncludeFilter(new WildcardMatchOnName(sp.getFilter())); olderThanInMsecs = parseOlderThanFilter(config.olderThan); if (config.dateFormatMark != null) dateExtractor = new DateExtractorFromName(config.dateFormatMark, false); else if (sp.getDateFormatMark() != null) dateExtractor = new DateExtractorFromName(sp.getDateFormatMark(), true); else dateExtractor = new DateExtractorNone(); scanList.add(new CollectionConfig(sp.getRootDir(), getScanRootDir(sp), sp.wantSubdirs(), filters, null)); if (config.protoConfig != null) protoChoice = config.protoConfig.choice; if (config.updateConfig != null) { this.recheck = makeRecheck(config.updateConfig.recheckAfter); // static means never rescan on checkState; let it be externally triggered. if ((config.updateConfig.recheckAfter == null) && (config.updateConfig.rescan == null) && (config.updateConfig.deleteAfter == null)) setStatic(true); } if (this.auxInfo == null) this.auxInfo = new HashMap<>(10); this.auxInfo.put(FeatureCollectionConfig.AUX_CONFIG, config); } private long parseOlderThanFilter(String olderThan) { if (olderThan != null) { try { TimeDuration tu = new TimeDuration(olderThan); return (long) (1000 * tu.getValueInSeconds()); } catch (Exception e) { logger.error(collectionName + ": Invalid time unit for olderThan = {}", olderThan); } } return -1; } private TimeDuration makeRecheck(String recheckS) { if (recheckS != null) { try { return new TimeDuration(recheckS); } catch (Exception e) { logger.error(collectionName + ": Invalid time unit for recheckEvery = {}", recheckS); } } return null; } // for subclasses protected MFileCollectionManager(String name, org.slf4j.Logger logger) { super(name, logger); this.recheck = null; this.olderThanInMsecs = -1; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default } //////////////////////////////////////////////////////////////////////////////// public MFileCollectionManager(String name, String spec, Formatter errlog, org.slf4j.Logger logger) { super(name, logger); CollectionSpecParserAbstract sp = CollectionSpecParsers.create(spec, errlog); this.root = sp.getRootDir(); CompositeMFileFilter filters = new CompositeMFileFilter(); if (null != sp.getFilter()) filters.addIncludeFilter(new WildcardMatchOnName(sp.getFilter())); dateExtractor = (sp.getDateFormatMark() == null) ? new DateExtractorNone() : new DateExtractorFromName(sp.getDateFormatMark(), true); scanList.add(new CollectionConfig(sp.getRootDir(), getScanRootDir(sp), sp.wantSubdirs(), filters, null)); this.recheck = null; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default this.olderThanInMsecs = -1; } private static String getScanRootDir(CollectionSpecParserAbstract specParser) { return specParser.getRootDir() + specParser.getFragment(); } public MFileCollectionManager(String name, CollectionConfig mc, CalendarDate startPartition, org.slf4j.Logger logger) { super(name, logger); this.startCollection = startPartition; this.scanList.add(mc); this.root = mc.getDirectoryName(); this.recheck = null; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; // default this.olderThanInMsecs = -1; } //////////////////////////////////////////////////////////////////// // Aggregation retrofit /** * For retrofitting to Aggregation * Must also call addDirectoryScan one or more times * * @param recheckS a undunit time unit, specifying how often to rscan */ private MFileCollectionManager(String collectionName, String recheckS) { super(collectionName, null); this.recheck = makeRecheck(recheckS); this.olderThanInMsecs = -1; this.protoChoice = FeatureCollectionConfig.ProtoChoice.Penultimate; } /** * Add a directory scan to the collection * * @param dirName scan this directory * @param suffix require this suffix (overriddden by regexp), may be null * @param regexpPatternString if present, use this reqular expression to filter files , may be null * @param subdirsS if "true", descend into subdirectories, may be null * @param olderS udunit time unit - files must be older than this amount of time (now - lastModified > olderTime), may * be null * // * @param dateFormatString dateFormatMark string, may be null * @param auxInfo attach this object to any MFile found by this scan */ public void addDirectoryScan(String dirName, String suffix, String regexpPatternString, String subdirsS, String olderS, Object auxInfo) { CompositeMFileFilter filters = new CompositeMFileFilter(); if (null != regexpPatternString) filters.addIncludeFilter(new RegExpMatchOnName(regexpPatternString)); else if (suffix != null) filters.addIncludeFilter(new WildcardMatchOnPath("*" + suffix + "$")); if (olderS != null) { try { TimeDuration tu = new TimeDuration(olderS); filters.addAndFilter(new LastModifiedLimit((long) (1000 * tu.getValueInSeconds()))); } catch (Exception e) { logger.error(collectionName + ": Invalid time unit for olderThan = {}", olderS); } } boolean wantSubdirs = true; if ("false".equalsIgnoreCase(subdirsS)) wantSubdirs = false; CollectionConfig mc = new CollectionConfig(dirName, dirName, wantSubdirs, filters, auxInfo); // create name StringBuilder sb = new StringBuilder(dirName); if (wantSubdirs) sb.append("**/"); if (null != regexpPatternString) sb.append(regexpPatternString); else if (suffix != null) sb.append(suffix); else sb.append("noFilter"); collectionName = sb.toString(); scanList.add(mc); } //////////////////////////////////////////////////////////////////// public long getOlderThanFilterInMSecs() { return olderThanInMsecs; } @Override public synchronized long getLastScanned() { return lastScanned; } @Override public synchronized long getLastChanged() { return lastChanged.get(); } protected boolean hasScans() { return !scanList.isEmpty(); } /** * Compute if synchronous scan is needed. * True if recheck is true and enough time has elapsed. * * @return true if rescan is needed */ @Override public boolean isScanNeeded() { // see if we need to recheck if (recheck == null) { logger.debug("{}: scan not needed, recheck null", collectionName); return false; } if (!hasScans()) { logger.debug("{}: scan not needed, no scanners", collectionName); return false; } synchronized (this) { if (map == null && !isStatic()) { logger.debug("{}: scan needed, never scanned", collectionName); return true; } } Date now = new Date(); Date lastCheckedDate = new Date(getLastScanned()); Date need = recheck.add(lastCheckedDate); if (now.before(need)) { logger.debug("{}: scan not needed, last scanned={}, now={}", collectionName, lastCheckedDate, now); return false; } return true; } //////////////////////// // experimental protected ChangeChecker changeChecker; public synchronized void setChangeChecker(ChangeChecker strat) { this.changeChecker = strat; } @Override public synchronized boolean scan(boolean sendEvent) throws IOException { if (map == null) { boolean changed = scanFirstTime(); if (changed && sendEvent) sendEvent(new TriggerEvent(this, CollectionUpdateType.always)); // watch out for infinite loop return changed; } long olderThan = (olderThanInMsecs <= 0) ? -1 : System.currentTimeMillis() - olderThanInMsecs; // new files must be // older than this. // rescan Map oldMap = map; Map newMap = new HashMap<>(); if ((!hasScans())) { // if no directory scans, the map of files should not change // but we should still make a new map to see if the files // have been updated since the last recheck for (String file : oldMap.keySet()) { newMap.put(file, MFiles.create(file)); } } else { // we have a directory scan, so scan it reallyScan(newMap); } // replace with previous datasets if they exist int nnew = 0; int nchange = 0; Iterator iter = newMap.values().iterator(); // need iterator so we can remove() while (iter.hasNext()) { MFile newFile = iter.next(); String path = newFile.getPath(); MFile oldFile = oldMap.get(path); if (oldFile != null) { if (newFile.getLastModified() > oldFile.getLastModified()) { // the file has changed since last time nchange++; logger.debug("{}: scan found Dataset changed= {}", collectionName, path); } else if (changeChecker != null && changeChecker.hasntChangedSince(newFile, oldFile.getLastModified())) { // the // ancillary // file // hasn't // changed nchange++; logger.debug("{}: scan changeChecker found Dataset changed= {}", collectionName, path); } } else { // oldFile doesnt exist if (olderThan > 0 && newFile.getLastModified() > olderThan) { // the file is too new iter.remove(); logger.debug("{}: scan found new Dataset but its too recently modified = {}", collectionName, path); } else { nnew++; logger.debug("{}: scan found new Dataset= {} ", collectionName, path); } } } // check for deletions int ndelete = 0; for (MFile oldDataset : oldMap.values()) { String path = oldDataset.getPath(); MFile newDataset = newMap.get(path); if (newDataset == null) { ndelete++; logger.debug("{}: scan found deleted Dataset={}", collectionName, path); } } boolean changed = (nnew > 0) || (ndelete > 0) || (nchange > 0); if (changed) { if (logger.isInfoEnabled()) logger.info("{}: scan found changes {}: nnew={}, nchange={}, ndelete={}", collectionName, new Date(), nnew, nchange, ndelete); map = newMap; this.lastScanned = System.currentTimeMillis(); this.lastChanged.set(this.lastScanned); } else { this.lastScanned = System.currentTimeMillis(); } if (changed && sendEvent) { // event is processed on this thread sendEvent(new TriggerEvent(this, CollectionUpdateType.always)); // watch out for infinite loop } return changed; } public void setFiles(Iterable files) { Map newMap = new HashMap<>(); for (MFile file : files) newMap.put(file.getPath(), file); synchronized (this) { map = newMap; this.lastScanned = System.currentTimeMillis(); this.lastChanged.set(this.lastScanned); } } public void setFilesAndRunDate(Map filesRunDateMap) { // a simple mapping between file names and coordValue dates for // aggregations in which files are explicitly defined. this.filesRunDateMap = filesRunDateMap; // update MFileCollection map of files List files = new ArrayList<>(filesRunDateMap.size()); for (String file : filesRunDateMap.keySet()) { files.add(MFiles.create(file)); } setFiles(files); } @Override public synchronized Iterable getFilesSorted() { if (map == null) try { scanFirstTime(); // never scanned } catch (IOException e) { e.printStackTrace(); return Collections.emptyList(); } List result = new ArrayList<>(map.values()); if (hasDateExtractor()) { result.sort(new DateSorter()); } else { Collections.sort(result); } return result; } @Override public boolean hasDateExtractor() { return (dateExtractor != null) && !(dateExtractor instanceof DateExtractorNone); } public synchronized Map getFilesRunDateMap() { return filesRunDateMap; } // only called from synch methods private boolean scanFirstTime() throws IOException { Map newMap = new HashMap<>(); if (!hasScans()) { map = newMap; return false; } reallyScan(newMap); // deleteOld(newMap); // ?? hmmmmm LOOK this seems wrong; maintainence in background ?? generally collection doesnt // exist // implement olderThan if (olderThanInMsecs > 0) { long olderThan = System.currentTimeMillis() - olderThanInMsecs; // new files must be older than this. Iterator iter = newMap.values().iterator(); // need iterator so we can remove() while (iter.hasNext()) { MFile newFile = iter.next(); String path = newFile.getPath(); if (newFile.getLastModified() > olderThan) { // the file is too new iter.remove(); logger.debug("{}: scan found new Dataset but its too recently modified = {}", collectionName, path); } } } map = newMap; this.lastScanned = System.currentTimeMillis(); this.lastChanged.set(this.lastScanned); logger.debug("{} : initial scan found n datasets = {} ", collectionName, map.keySet().size()); return !map.keySet().isEmpty(); } @Override CalendarDate extractRunDateWithError(MFile mfile) { CalendarDate result = super.extractRunDateWithError(mfile); // if there isn't a DateExtractor, see if a mapping exists between // filenames and runtimes as defied by the coordValue attribute // in explicitly defined file aggregations (i.e. not a directory scan) if (result == null) if (!this.filesRunDateMap.isEmpty()) { String dateString = filesRunDateMap.get(mfile.getPath()); result = CalendarDate.parseISOformat(null, dateString); } if (result == null) logger.error("Failed to find a run date associated with file {}", mfile.getPath()); return result; } protected void reallyScan(java.util.Map map) throws IOException { getController(); // make sure a controller is instantiated // run through all scanners and collect MFile instances into the Map int count = 0; for (CollectionConfig mc : scanList) { long start = System.currentTimeMillis(); // lOOK: are there any circumstances where we dont need to recheck against OS, ie always use cached values? Iterator iter = (mc.wantSubdirs()) ? controller.getInventoryAll(mc, true) : controller.getInventoryTop(mc, true); /// NCDC /// wants /// subdir /// /global/nomads/nexus/gfsanl/**/gfsanl_3_.*\.grb$ if (iter == null) { logger.error(collectionName + ": Invalid collection= " + mc); continue; } while (iter.hasNext()) { MFile mfile = iter.next(); mfile.setAuxInfo(mc.getAuxInfo()); map.put(mfile.getPath(), mfile); count++; } if (logger.isDebugEnabled()) { long took2 = (System.currentTimeMillis() - start) / 1000; logger.debug("{} : was scanned nfiles= {} took={} secs", collectionName, count, took2); } } if (map.isEmpty()) { if (hasScans()) { // only warn if a directory scan comes up with no files found logger.warn("MFileCollectionManager: No files found for {}", collectionName); } } } @Override public String toString() { Formatter f = new Formatter(); f.format("DatasetCollectionManager{ collectionName='%s' recheck=%s ", collectionName, recheck); for (CollectionConfig mc : scanList) { f.format("%n dir=%s filter=%s", mc.getDirectoryName(), mc.getFileFilter()); } return f.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy