thredds.inventory.CollectionSpecParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cdm Show documentation
Show all versions of cdm Show documentation
The NetCDF-Java Library is a Java interface to NetCDF files,
as well as to many other types of scientific data formats.
The newest version!
/*
* Copyright (c) 1998 - 2011. University Corporation for Atmospheric Research/Unidata
* Portions of this software were developed by the Unidata Program at the
* University Corporation for Atmospheric Research.
*
* Access and use of this software shall impose the following obligations
* and understandings on the user. The user is granted the right, without
* any fee or cost, to use, copy, modify, alter, enhance and distribute
* this software, and any derivative works thereof, and its supporting
* documentation for any purpose whatsoever, provided that this entire
* notice appears in all copies of the software, derivative works and
* supporting documentation. Further, UCAR requests that the user credit
* UCAR/Unidata in any publications that result from the use of this
* software or in any product that includes this software. The names UCAR
* and/or Unidata, however, may not be used in any advertising or publicity
* to endorse or promote any products or commercial entity unless specific
* written permission is obtained from UCAR/Unidata. The user also
* understands that UCAR/Unidata is not obligated to provide the user with
* any support, consulting, training or assistance of any kind with regard
* to the use, operation and performance of this software nor to provide
* the user with any updates, revisions, new versions or "bug fixes."
*
* THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE.
*/
package thredds.inventory;
import java.util.*;
import java.util.regex.Pattern;
import java.io.File;
import net.jcip.annotations.ThreadSafe;
import ucar.unidata.util.StringUtil2;
/**
* Parses the collection specification string.
* the idea is that one copies the full path of an example dataset, then edits it
* Example: "/data/ldm/pub/native/grid/NCEP/GFS/Alaska_191km/** /GFS_Alaska_191km_#yyyyMMdd_HHmm#\.grib1$"
*
* - rootDir ="/data/ldm/pub/native/grid/NCEP/GFS/Alaska_191km"/
* - subdirs=true (because ** is present)
* - dateFormatMark="GFS_Alaska_191km_#yyyyMMdd_HHmm"
* - regExp='GFS_Alaska_191km_.............\.grib1$
*
* Example: "Q:/grid/grib/grib1/data/agg/.*\.grb"
*
* - rootDir ="Q:/grid/grib/grib1/data/agg/"/
* - subdirs=false
* - dateFormatMark=null
* - useName=yes
* - regexp= ".*\.grb" (anything ending with .grb)
*
*
* @see "http://www.unidata.ucar.edu/projects/THREDDS/tech/tds4.2/reference/collections/CollectionSpecification.html"
* @author caron
* @since Jul 7, 2009
*/
@ThreadSafe
public class CollectionSpecParser {
private final String spec;
private final String rootDir;
private final boolean subdirs; // recurse into subdirectories under the root dir
private final java.util.regex.Pattern filter; // regexp filter
private final String dateFormatMark;
//private final boolean useName; // true = use name, false = use path for dateFormatMark
/**
* Single spec : "/topdir/** /#dateFormatMark#regExp"
* This only allows the dateFormatMark to be in the file name, not anywhere else in the filename path,
* and you cant use any part of the dateFormat to filter on.
* @param collectionSpec the collection Spec
* @param errlog put error messages here
*/
public CollectionSpecParser(String collectionSpec, Formatter errlog) {
this.spec = collectionSpec.trim();
int posFilter;
int posGlob = collectionSpec.indexOf("/**/");
if (posGlob > 0) {
rootDir = collectionSpec.substring(0, posGlob);
posFilter = posGlob + 3;
subdirs = true;
} else {
subdirs = false;
posFilter = collectionSpec.lastIndexOf('/');
if (posFilter > 0)
rootDir = collectionSpec.substring(0, posFilter);
else
rootDir = System.getProperty("user.dir"); // working directory
}
File locFile = new File(rootDir);
if (!locFile.exists()) {
errlog.format(" Directory %s does not exist %n", rootDir);
}
// optional filter
String filter = null;
if (posFilter < collectionSpec.length() - 2)
filter = collectionSpec.substring(posFilter + 1); // remove topDir
if (filter != null) {
// optional dateFormatMark
int posFormat = filter.indexOf('#');
if (posFormat >= 0) {
// check for two hash marks
int posFormat2 = filter.lastIndexOf('#');
if (posFormat != posFormat2) { // two hash
dateFormatMark = filter.substring(0, posFormat2); // everything up to the second hash
filter = StringUtil2.remove(filter, '#'); // remove hashes, replace with .
StringBuilder sb = new StringBuilder(filter);
for (int i = posFormat; i < posFormat2 - 1; i++)
sb.setCharAt(i, '.');
String regExp = sb.toString();
this.filter = java.util.regex.Pattern.compile(regExp);
} else { // one hash
dateFormatMark = filter; // everything
String regExp = filter.substring(0, posFormat) + "*";
this.filter = java.util.regex.Pattern.compile(regExp);
}
} else { // no hash (dateFormatMark)
dateFormatMark = null;
this.filter = java.util.regex.Pattern.compile(filter);
}
} else {
dateFormatMark = null;
this.filter = null;
}
//useName = true;
}
/*
* Seperate the spec, with no dateMatcher, and a seperate string with a dateMatcher
* This only allows the dateFormatMark to be in the file name, not anywhere else in the filename path
* @param collectionSpec the collection Spec, no dateMatcher
* @param dateMatcher regexp the dateMatcher regular expression
* @param errlog put error messages here
*
public CollectionSpecParser(String collectionSpec, String dateMatcher, Formatter errlog) {
this.spec = collectionSpec.trim();
int posGlob = collectionSpec.indexOf("/** /");
if (posGlob > 0) {
rootDir = collectionSpec.substring(0, posGlob);
int posFilter = posGlob + 3;
subdirs = true;
String regexp = collectionSpec.substring(posFilter+1);
this.filter = java.util.regex.Pattern.compile(regexp);
} else {
int posFilter = collectionSpec.lastIndexOf('/');
rootDir = collectionSpec.substring(0, posFilter);
subdirs = false;
String regexp = collectionSpec.substring(posFilter+1);
this.filter = java.util.regex.Pattern.compile(regexp);
}
File locFile = new File(rootDir);
if (!locFile.exists()) {
errlog.format(" Directory %s does not exist %n", rootDir);
}
this.dateFormatMark = dateMatcher;
/* int hashPos = -1;
if ((hashPos = dateMatcher.indexOf('#', hashPos+1)) >= 0) {
// check for two hash marks
hashPos = dateMatcher.indexOf('#', hashPos+1);
int secondHash = hashPos;
if (secondHash > 0) { // two hashes
dateFormatMark = dateMatcher.substring(0, secondHash+1); // everything up to the second hash
} else { // one hash
dateFormatMark = dateMatcher; // everything
}
} else { // no hashes
errlog.format(" No DateMatcher specified in '%s'%n", dateMatcher);
dateFormatMark = null;
}
useName = false;
} */
public String getSpec() {
return spec;
}
public String getRootDir() {
return rootDir;
}
public boolean wantSubdirs() {
return subdirs;
}
//public boolean useName() {
// return true;
//}
public Pattern getFilter() {
return filter;
}
public String getDateFormatMark() {
return dateFormatMark;
}
@Override
public String toString() {
return "CollectionSpecParser{" +
"\n topDir='" + rootDir + '\'' +
"\n subdirs=" + subdirs +
"\n regExp='" + filter + '\'' +
"\n dateFormatMark='" + dateFormatMark + '\'' +
// "\n useName=" + useName +
"\n}";
}
/////////////////////////////////////////////////////////
// debugging
/* private static void doit2(String spec, String timePart, Formatter errlog) {
CollectionSpecParser specp = new CollectionSpecParser(spec, timePart, errlog);
System.out.printf("spec= %s timePart=%s%n%s%n", spec, timePart, specp);
String err = errlog.toString();
if (err.length() > 0)
System.out.printf("%s%n", err);
System.out.printf("-----------------------------------%n");
}
public static void main(String arg[]) {
doit2("G:/nomads/cfsr/timeseries/** /.*grb2$", "G:/nomads/cfsr/#timeseries/#yyyyMM", new Formatter());
//doit("C:/data/formats/gempak/surface/#yyyyMMdd#_sao\\.gem", new Formatter());
// doit("Q:/station/ldm/metar/Surface_METAR_#yyyyMMdd_HHmm#.nc", new Formatter());
} */
private static void doit(String spec, Formatter errlog) {
CollectionSpecParser specp = new CollectionSpecParser(spec, errlog);
System.out.printf("spec= %s%n%s%n", spec, specp);
String err = errlog.toString();
if (err.length() > 0)
System.out.printf("%s%n", err);
System.out.printf("-----------------------------------%n");
}
public static void main(String arg[]) {
doit("/data/ldm/pub/native/grid/NCEP/GFS/Alaska_191km/**/GFS_Alaska_191km_#yyyyMMdd_HHmm#\\.grib1$", new Formatter());
doit("Q:/grid/grib/grib1/data/agg/.*\\.grb", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/**/Surface_METAR_#yyyyMMdd_HHmm#\\.nc", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/**/Surface_METAR_#yyyyMMdd_HHmm#.nc", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/**/Surface_METAR_#yyyyMMdd_HHmm", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/Surface_METAR_#yyyyMMdd_HHmm", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/Surface_METAR_#yyyyMMdd_HHmm#.nc", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/Surface_METAR_yyyyMMdd_HHmm.nc", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/**/", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/**/*", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/*", new Formatter());
doit("/data/ldm/pub/decoded/netcdf/surface/metar/T*.T", new Formatter());
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy