All Downloads are FREE. Search and download functionalities are using the official Maven repository.

thredds.inventory.CollectionGlob Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 1998-2018 University Corporation for Atmospheric Research/Unidata
 * See LICENSE for license information.
 */

package thredds.inventory;

import org.slf4j.Logger;
import thredds.filesystem.MFileOS7;
import ucar.nc2.util.CloseableIterator;
import java.io.IOException;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;

/**
 * A MCollection defined by a glob filter. Experimental.
 *
 * From http://blog.eyallupu.com/2011/11/java-7-working-with-directories.html
 *
 * The 'glob' Syntax
 * The glob (stands for globbing) syntax is a 'simplified' form of regular expressions with awareness to path
 * components (directories), the syntax is composed of the following syntactic tokens:
 * 1) The '*' character matches zero or more characters from the path elements without crossing directory boundaries
 * (unlike regular expression this is not a Kleene star and it has nothing to do with the preceding part of the
 * expression)
 * 2) The '**' characters match zero or more characters crossing directory boundaries
 * 3) The '?' character matches exactly one character of a name component
 * 4) '[' and ']' can be used to match a single character in the path name from a set of characters
 * '-' (hyphen) can be used to specify a range of characters. If hyphen has to be included in the characters set it must
 * be the first in the set
 * '!' as the first character in the set can be used as a negation expression
 * 5) '{' and '}' can group sub patterns, the group matches if any of the sub patterns matches (comma is used to
 * separate between the groups)
 * 6 ) The dot '.' character represents a dot (unlike regular expressions in which a dot is a replacement for any
 * character)
 * 7) and finally: special characters escaping is done using backslash
 * 
 * The '**' expression is the only one to cross directory boundaries, all other expressions are bound within a single
 * element
 * (either a directory or a filename), below is a sample usage of PathMatcher followed by few pattern examples:
 *
 *
 *
 * 
 * @author caron
 * @since 5/19/14
 */
public class CollectionGlob extends CollectionAbstract {
  PathMatcher matcher;
  boolean debug;
  int depth;

  public CollectionGlob(String collectionName, String glob, Logger logger) {
    super(collectionName, logger);

    matcher = FileSystems.getDefault().getPathMatcher("glob:" + glob);

    // lets suppose the first "*" indicates the top dir
    int pos = glob.indexOf("*");
    this.root = glob.substring(0, pos);
    String match = glob.substring(pos);

    // count how far to recurse. LAME!!! why doesnt java provide the right thing !!!!
    pos = glob.indexOf("**");
    if (pos > 0)
      depth = Integer.MAX_VALUE;
    else {
      // count the "/" !!
      for (char c : match.toCharArray())
        if (c == '/')
          depth++;
    }

    if (debug)
      System.out.printf(" CollectionGlob.MFileIterator topPath='%s' depth=%d%n", this.root, this.depth);

  }

  @Override
  public void close() {

  }

  @Override
  public Iterable getFilesSorted() throws IOException {
    return makeFileListSorted();
  }

  @Override
  public CloseableIterator getFileIterator() throws IOException {
    return new MyFileIterator(this.root);
  }

  // from http://blog.eyallupu.com/2011/11/java-7-working-with-directories.html
  public static DirectoryStream newDirectoryStream(Path dir, String glob) throws IOException {
    FileSystem fs = dir.getFileSystem();
    PathMatcher matcher = fs.getPathMatcher("glob:" + glob);
    DirectoryStream.Filter filter = entry -> matcher.matches(entry.getFileName());
    return fs.provider().newDirectoryStream(dir, filter);
  }

  private class MyFileIterator implements CloseableIterator {
    DirectoryStream dirStream;
    Iterator dirStreamIterator;
    MFile nextMFile;
    int count, total;
    Stack subdirs = new Stack<>();
    int currDepth;

    MyFileIterator(String topDir) throws IOException {
      Path topPath = Paths.get(topDir);
      dirStream = Files.newDirectoryStream(topPath);
      dirStreamIterator = dirStream.iterator();
    }

    public boolean hasNext() {
      while (true) {

        try {
          while (!dirStreamIterator.hasNext()) {
            dirStream.close();
            if (subdirs.isEmpty()) {
              nextMFile = null;
              return false;
            }
            currDepth++; // LOOK wrong
            Path nextSubdir = subdirs.pop();
            dirStream = Files.newDirectoryStream(nextSubdir);
            dirStreamIterator = dirStream.iterator();
          }

          total++;
          Path nextPath = dirStreamIterator.next();
          BasicFileAttributes attr = Files.readAttributes(nextPath, BasicFileAttributes.class);
          if (attr.isDirectory()) {
            if (currDepth < depth)
              subdirs.push(nextPath);
            continue;
          }

          if (!matcher.matches(nextPath)) {
            continue;
          }

          nextMFile = new MFileOS7(nextPath, attr);
          return true;

        } catch (IOException e) {
          throw new RuntimeException(e);
        }
        // if (filter == null || filter.accept(nextMFile)) return true;
      }
    }

    public MFile next() {
      if (nextMFile == null)
        throw new NoSuchElementException();
      count++;
      return nextMFile;
    }

    public void remove() {
      throw new UnsupportedOperationException();
    }

    // better alternative is for caller to send in callback (Visitor pattern)
    // then we could use the try-with-resource
    public void close() throws IOException {
      if (debug)
        System.out.printf("  OK=%d total=%d%n ", count, total);
      dirStream.close();
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy