All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.io.NumberRangesFileFilter Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.io;

import edu.stanford.nlp.util.Pair;

import java.io.File;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Implements a file filter that examines a number in a filename to
 * determine acceptance.  This is useful for wanting to process ranges
 * of numbered files in collections where each file has some name, part
 * of which is alphabetic and constant, and part of which is numeric.
 * The test is evaluated based on the rightmost natural number found in
 * the filename string.  (It only looks in the final filename, not in other
 * components of the path.)  Number ranges are inclusive.
 * 

* This filter can select multiple discontinuous ranges based on a format * similar to page selection ranges in various formatting software, such as * "34,52-65,67,93-95". The constructor takes a String of this sort and * deconstructs it into a list of ranges. The accepted syntax is:

* ranges = range
* ranges = range "," ranges
* range = integer
* range = integer "-" integer

* Whitespace will be ignored. If the filter constructor is passed anything * that is not a list of numeric ranges of this sort, including being passed * an empty String, then an IllegalArgumentException will be * thrown. * * @author Christopher Manning * @version 2003/03/31 */ public class NumberRangesFileFilter implements FileFilter { private List> ranges = new ArrayList<>(); private boolean recursively; /** * Sets up a NumberRangesFileFilter by specifying the ranges of numbers * to accept, and whether to also traverse * folders for recursive search. * * @param ranges The ranges of numbers to accept (see class documentation) * @param recurse Whether to go into subfolders * @throws IllegalArgumentException If the String ranges does not * contain a suitable ranges format */ public NumberRangesFileFilter(String ranges, boolean recurse) { recursively = recurse; try { String[] ra = ranges.split(","); for (String range : ra) { String[] one = range.split("-"); if (one.length > 2) { throw new IllegalArgumentException("Constructor argument not valid list of number ranges (too many hyphens): "); } else { int low = Integer.parseInt(one[0].trim()); int high; if (one.length == 2) { high = Integer.parseInt(one[1].trim()); } else { high = low; } Pair p = new Pair<>(Integer.valueOf(low), Integer.valueOf(high)); this.ranges.add(p); } } } catch (Exception e) { IllegalArgumentException iae = new IllegalArgumentException("Constructor argument not valid list of number ranges: " + ranges); iae.initCause(e); throw iae; } } /** * Checks whether a file satisfies the number range selection filter. * The test is evaluated based on the rightmost natural number found in * the filename string (proper, not including directories in a path). * * @param file The file * @return true If the file is within the ranges filtered for */ public boolean accept(File file) { if (file.isDirectory()) { return recursively; } else { String filename = file.getName(); return accept(filename); } } /** * Checks whether a String satisfies the number range selection filter. * The test is evaluated based on the rightmost natural number found in * the String. Note that this is just evaluated on the String as given. * It is not trying to interpret it as a filename and to decide whether * the file exists, is a directory or anything like that. * * @param str The String to check for a number in * @return true If the String is within the ranges filtered for */ public boolean accept(String str) { int k = str.length() - 1; char c = str.charAt(k); while (k >= 0 && !Character.isDigit(c)) { k--; if (k >= 0) { c = str.charAt(k); } } if (k < 0) { return false; } int j = k; c = str.charAt(j); while (j >= 0 && Character.isDigit(c)) { j--; if (j >= 0) { c = str.charAt(j); } } j++; k++; String theNumber = str.substring(j, k); int number = Integer.parseInt(theNumber); for (Pair p : ranges) { int low = p.first().intValue(); int high = p.second().intValue(); if (number >= low && number <= high) { return true; } } return false; } @Override public String toString() { StringBuilder sb; if (recursively) { sb = new StringBuilder("recursively "); } else { sb = new StringBuilder(); } for (Iterator> it = ranges.iterator(); it.hasNext(); ) { Pair p = it.next(); int low = p.first().intValue(); int high = p.second().intValue(); if (low == high) { sb.append(low); } else { sb.append(low); sb.append('-'); sb.append(high); } if (it.hasNext()) { sb.append(','); } } return sb.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy