All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.util.StringUtils Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.util;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasOffset;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.math.SloppyMath;
import edu.stanford.nlp.util.logging.Redwood;

import java.io.*;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.text.Normalizer;
import java.util.*;
import java.util.Map.Entry;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

/**
 * StringUtils is a class for random String things, including output formatting and command line argument parsing.
 * 

* Many of these methods will be familiar to perl users: {@link #join(Iterable)}, {@link #split(String, String)}, {@link * #trim(String, int)}, {@link #find(String, String)}, {@link #lookingAt(String, String)}, and {@link #matches(String, * String)}. *

* There are also useful methods for padding Strings/Objects with spaces on the right or left for printing even-width * table columns: {@link #padLeft(int, int)}, {@link #pad(String, int)}. * *

Example: print a comma-separated list of numbers:

*

{@code System.out.println(StringUtils.pad(nums, ", "));}

*

Example: print a 2D array of numbers with 8-char cells:

*

for(int i = 0; i < nums.length; i++) {
*     for(int j = 0; j < nums[i].length; j++) {
*         * System.out.print(StringUtils.leftPad(nums[i][j], 8));
*    
*     System.out.println();
*

* * @author Dan Klein * @author Christopher Manning * @author Tim Grow ([email protected]) * @author Chris Cox * @version 2006/02/03 */ public class StringUtils { // todo [cdm 2016]: Remove CoreMap/CoreLabel methods from this class // todo [cdm 2016]: Write a really good join method for this class, like William's Ruby one /** A logger for this class */ private static final Redwood.RedwoodChannels log = Redwood.channels(StringUtils.class); /** * Don't let anyone instantiate this class. */ private StringUtils() {} public static final String[] EMPTY_STRING_ARRAY = new String[0]; private static final String PROP = "prop"; private static final String PROPS = "props"; private static final String PROPERTIES = "properties"; private static final String ARGS = "args"; private static final String ARGUMENTS = "arguments"; /** * Say whether this regular expression can be found inside * this String. This method provides one of the two "missing" * convenience methods for regular expressions in the String class * in JDK1.4. This is the one you'll want to use all the time if * you're used to Perl. What were they smoking? * * @param str String to search for match in * @param regex String to compile as the regular expression * @return Whether the regex can be found in str */ public static boolean find(String str, String regex) { return Pattern.compile(regex).matcher(str).find(); } /** * Convenience method: a case-insensitive variant of Collection.contains * @param c Collection<String> * @param s String * @return true if s case-insensitively matches a string in c */ public static boolean containsIgnoreCase(Collection c, String s) { for (String squote: c) { if (squote.equalsIgnoreCase(s)) return true; } return false; } /** * Say whether this regular expression can be found at the beginning of * this String. This method provides one of the two "missing" * convenience methods for regular expressions in the String class * in JDK1.4. * * @param str String to search for match at start of * @param regex String to compile as the regular expression * @return Whether the regex can be found at the start of str */ public static boolean lookingAt(String str, String regex) { return Pattern.compile(regex).matcher(str).lookingAt(); } /** * Takes a string of the form "x1=y1,x2=y2,..." such * that each y is an integer and each x is a key. A * String[] s is returned such that s[yn]=xn. * * @param map A string of the form "x1=y1,x2=y2,..." such * that each y is an integer and each x is a key. * @return A String[] s is returned such that s[yn]=xn */ public static String[] mapStringToArray(String map) { String[] m = map.split("[,;]"); int maxIndex = 0; String[] keys = new String[m.length]; int[] indices = new int[m.length]; for (int i = 0; i < m.length; i++) { int index = m[i].lastIndexOf('='); keys[i] = m[i].substring(0, index); indices[i] = Integer.parseInt(m[i].substring(index + 1)); if (indices[i] > maxIndex) { maxIndex = indices[i]; } } String[] mapArr = new String[maxIndex + 1]; // Arrays.fill(mapArr, null); // not needed; Java arrays zero initialized for (int i = 0; i < m.length; i++) { mapArr[indices[i]] = keys[i]; } return mapArr; } /** * Takes a string of the form "x1=y1,x2=y2,..." and returns Map. * * @param map A string of the form "x1=y1,x2=y2,..." * @return A Map m is returned such that m.get(xn) = yn */ public static Map mapStringToMap(String map) { String[] m = map.split("[,;]"); Map res = Generics.newHashMap(); for (String str : m) { int index = str.lastIndexOf('='); String key = str.substring(0, index); String val = str.substring(index + 1); res.put(key.trim(), val.trim()); } return res; } public static List regexesToPatterns(Iterable regexes) { List patterns = new ArrayList<>(); for (String regex:regexes) { patterns.add(Pattern.compile(regex)); } return patterns; } /** * Given a pattern and a string, returns a list with the values of the * captured groups in the pattern. If the pattern does not match, returns * null. Note that this uses Matcher.find() rather than Matcher.matches(). * If str is null, returns null. */ public static List regexGroups(Pattern regex, String str) { if (str == null) { return null; } Matcher matcher = regex.matcher(str); if (!matcher.find()) { return null; } List groups = new ArrayList<>(); for (int index = 1; index <= matcher.groupCount(); index++) { groups.add(matcher.group(index)); } return groups; } /** * Say whether this regular expression matches * this String. This method is the same as the String.matches() method, * and is included just to give a call that is parallel to the other * static regex methods in this class. * * @param str String to search for match at start of * @param regex String to compile as the regular expression * @return Whether the regex matches the whole of this str */ public static boolean matches(String str, String regex) { return Pattern.compile(regex).matcher(str).matches(); } public static Set stringToSet(String str, String delimiter) { Set ret = null; if (str != null) { String[] fields = str.split(delimiter); ret = Generics.newHashSet(fields.length); for (String field:fields) { field = field.trim(); ret.add(field); } } return ret; } public static String joinWords(Iterable l, String glue) { StringBuilder sb = new StringBuilder(l instanceof Collection ? ((Collection) l).size() : 64); boolean first = true; for (HasWord o : l) { if ( ! first) { sb.append(glue); } else { first = false; } sb.append(o.word()); } return sb.toString(); } public static String join(List l, String glue, Function toStringFunc, int start, int end) { StringBuilder sb = new StringBuilder(); boolean first = true; start = Math.max(start, 0); end = Math.min(end, l.size()); for (int i = start; i < end; i++) { if ( ! first) { sb.append(glue); } else { first = false; } sb.append(toStringFunc.apply(l.get(i))); } return sb.toString(); } public static String joinWords(List l, String glue, int start, int end) { return join(l, glue, HasWord::word, start, end); } private static final Function DEFAULT_TOSTRING = Object::toString; public static String joinFields(List l, final Class field, final String defaultFieldValue, String glue, int start, int end, final Function toStringFunc) { return join(l, glue, new Function() { public String apply(CoreMap in) { Object val = in.get(field); return (val != null)? toStringFunc.apply(val):defaultFieldValue; } }, start, end); } public static String joinFields(List l, final Class field, final String defaultFieldValue, String glue, int start, int end) { return joinFields(l, field, defaultFieldValue, glue, start, end, DEFAULT_TOSTRING); } public static String joinFields(List l, final Class field, final Function toStringFunc) { return joinFields(l, field, "-", " ", 0, l.size(), toStringFunc); } public static String joinFields(List l, final Class field) { return joinFields(l, field, "-", " ", 0, l.size()); } public static String joinMultipleFields(List l, final Class[] fields, final String defaultFieldValue, final String fieldGlue, String glue, int start, int end, final Function toStringFunc) { return join(l, glue, new Function() { @Override public String apply(CoreMap in) { StringBuilder sb = new StringBuilder(); for (Class field: fields) { if (sb.length() > 0) { sb.append(fieldGlue); } Object val = in.get(field); String str = (val != null)? toStringFunc.apply(val):defaultFieldValue; sb.append(str); } return sb.toString(); } }, start, end); } public static String joinMultipleFields(List l, final Class[] fields, final Function toStringFunc) { return joinMultipleFields(l, fields, "-", "/", " ", 0, l.size(), toStringFunc); } public static String joinMultipleFields(List l, final Class[] fields, final String defaultFieldValue, final String fieldGlue, String glue, int start, int end) { return joinMultipleFields(l, fields, defaultFieldValue, fieldGlue, glue, start, end, DEFAULT_TOSTRING); } public static String joinMultipleFields(List l, final Class[] fields) { return joinMultipleFields(l, fields, "-", "/", " ", 0, l.size()); } /** * Joins all the tokens together (more or less) according to their original whitespace. * It assumes all whitespace was " " * @param tokens list of tokens which implement {@link HasOffset} and {@link HasWord} * @return a string of the tokens with the appropriate amount of spacing */ public static String joinWithOriginalWhiteSpace(List tokens) { if (tokens.isEmpty()) { return ""; } CoreLabel lastToken = tokens.get(0); StringBuilder buffer = new StringBuilder(lastToken.word()); for (int i = 1; i < tokens.size(); i++) { CoreLabel currentToken = tokens.get(i); int numSpaces = currentToken.beginPosition() - lastToken.endPosition(); if (numSpaces < 0) { numSpaces = 0; } buffer.append(repeat(' ', numSpaces)).append(currentToken.word()); lastToken = currentToken; } return buffer.toString(); } /** * Joins each elem in the {@link Iterable} with the given glue. * For example, given a list of {@code Integers}, you can create * a comma-separated list by calling {@code join(numbers, ", ")}. * * @see StringUtils#join(Stream, String) */ public static String join(Iterable l, String glue) { StringBuilder sb = new StringBuilder(); boolean first = true; for (X o : l) { if ( ! first) { sb.append(glue); } else { first = false; } sb.append(o); } return sb.toString(); } /** * Joins each elem in the {@link Stream} with the given glue. * For example, given a list of {@code Integers}, you can create * a comma-separated list by calling {@code join(numbers, ", ")}. * * @see StringUtils#join(Iterable, String) */ public static String join(Stream l, String glue) { StringBuilder sb = new StringBuilder(); boolean first = true; Iterator iter = l.iterator(); while (iter.hasNext()) { if ( ! first) { sb.append(glue); } else { first = false; } sb.append(iter.next()); } return sb.toString(); } /** * Joins each elem in the array with the given glue. For example, given a * list of ints, you can create a comma-separated list by calling * {@code join(numbers, ", ")}. */ public static String join(Object[] elements, String glue) { return (join(Arrays.asList(elements), glue)); } /** * Joins an array of elements in a given span. * @param elements The elements to join. * @param start The start index to join from. * @param end The end (non-inclusive) to join until. * @param glue The glue to hold together the elements. * @return The string form of the sub-array, joined on the given glue. */ public static String join(Object[] elements, int start, int end, String glue) { StringBuilder b = new StringBuilder(127); boolean isFirst = true; for (int i = start; i < end; ++i) { if (isFirst) { b.append(elements[i].toString()); isFirst = false; } else { b.append(glue).append(elements[i].toString()); } } return b.toString(); } /** * Joins each element in the given array with the given glue. For example, * given an array of Integers, you can create a comma-separated list by calling * {@code join(numbers, ", ")}. */ public static String join(String[] items, String glue) { return join(Arrays.asList(items), glue); } /** * Joins elems with a space. */ public static String join(Iterable l) { return join(l, " "); } /** * Joins elements with a space. */ public static String join(Object[] elements) { return (join(elements, " ")); } /** * Splits on whitespace (\\s+). * @param s String to split * @return List of split strings */ public static List split(String s) { return split(s, "\\s+"); } /** * Splits the given string using the given regex as delimiters. * This method is the same as the String.split() method (except it throws * the results in a List), * and is included just to give a call that is parallel to the other * static regex methods in this class. * * @param str String to split up * @param regex String to compile as the regular expression * @return List of Strings resulting from splitting on the regex */ public static List split(String str, String regex) { return (Arrays.asList(str.split(regex))); } /** * Split a string on a given single character. * This method is often faster than the regular split() method. * @param input The input to split. * @param delimiter The character to split on. * @return An array of Strings corresponding to the original input split on the delimiter character. */ public static String[] splitOnChar(String input, char delimiter) { // State String[] out = new String[input.length() + 1]; int nextIndex = 0; int lastDelimiterIndex = -1; char[] chars = input.toCharArray(); // Split for ( int i = 0; i <= chars.length; ++i ) { if (i >= chars.length || chars[i] == delimiter) { char[] tokenChars = new char[i - (lastDelimiterIndex + 1)]; System.arraycopy(chars, lastDelimiterIndex + 1, tokenChars, 0, tokenChars.length); out[nextIndex] = new String(tokenChars); nextIndex += 1; lastDelimiterIndex = i; } } // Clean Result String[] trimmedOut = new String[nextIndex]; System.arraycopy(out, 0, trimmedOut, 0, trimmedOut.length); return trimmedOut; } /** * Splits a string into whitespace tokenized fields based on a delimiter and then whitespace. * For example, "aa bb | bb cc | ccc ddd" would be split into "[aa,bb],[bb,cc],[ccc,ddd]" based on * the delimiter "|". This method uses the old StringTokenizer class, which is up to * 3x faster than the regex-based "split()" methods. * * @param delimiter String to split on * @return List of lists of strings. */ public static List> splitFieldsFast(String str, String delimiter) { List> fields = Generics.newArrayList(); StringTokenizer tokenizer = new StringTokenizer(str.trim()); List currentField = Generics.newArrayList(); while(tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (token.equals(delimiter)) { fields.add(currentField); currentField = Generics.newArrayList(); } else { currentField.add(token.trim()); } } if (currentField.size() > 0) { fields.add(currentField); } return fields; } /** * Split on a given character, filling out the fields in the output array. * This is suitable for, e.g., splitting a TSV file of known column count. * @param out The output array to fill * @param input The input to split * @param delimiter The delimiter to split on. */ public static void splitOnChar(String[] out, String input, char delimiter) { int lastSplit = 0; int outI = 0; char[] chars = input.toCharArray(); for (int i = 0; i < chars.length; ++i) { if (chars[i] == delimiter) { out[outI] = new String(chars, lastSplit, i - lastSplit); outI += 1; lastSplit = i + 1; } } if (outI < out.length) { out[outI] = input.substring(lastSplit); } } /** Split a string into tokens. Because there is a tokenRegex as well as a * separatorRegex (unlike for the conventional split), you can do things * like correctly split quoted strings or parenthesized arguments. * However, it doesn't do the unquoting of quoted Strings for you. * An empty String argument is returned at the beginning, if valueRegex * accepts the empty String and str begins with separatorRegex. * But str can end with either valueRegex or separatorRegex and this does * not generate an empty String at the end (indeed, valueRegex need not * even accept the empty String in this case. However, if it does accept * the empty String and there are multiple trailing separators, then * empty values will be returned. * * @param str The String to split * @param valueRegex Must match a token. You may wish to let it match the empty String * @param separatorRegex Must match a separator * @return The List of tokens * @throws IllegalArgumentException if str cannot be tokenized by the two regex */ public static List valueSplit(String str, String valueRegex, String separatorRegex) { Pattern vPat = Pattern.compile(valueRegex); Pattern sPat = Pattern.compile(separatorRegex); List ret = new ArrayList<>(); while (str.length() > 0) { Matcher vm = vPat.matcher(str); if (vm.lookingAt()) { ret.add(vm.group()); str = str.substring(vm.end()); // String got = vm.group(); // log.info("vmatched " + got + "; now str is " + str); } else { throw new IllegalArgumentException("valueSplit: " + valueRegex + " doesn't match " + str); } if (str.length() > 0) { Matcher sm = sPat.matcher(str); if (sm.lookingAt()) { str = str.substring(sm.end()); // String got = sm.group(); // log.info("smatched " + got + "; now str is " + str); } else { throw new IllegalArgumentException("valueSplit: " + separatorRegex + " doesn't match " + str); } } } // end while return ret; } /** * Return a String of length a minimum of totalChars characters by * padding the input String str at the right end with spaces. * If str is already longer * than totalChars, it is returned unchanged. */ public static String pad(String str, int totalChars) { return pad(str, totalChars, ' '); } /** * Return a String of length a minimum of totalChars characters by * padding the input String str at the right end with spaces. * If str is already longer * than totalChars, it is returned unchanged. */ public static String pad(String str, int totalChars, char pad) { if (str == null) { str = "null"; } int slen = str.length(); StringBuilder sb = new StringBuilder(str); for (int i = 0; i < totalChars - slen; i++) { sb.append(pad); } return sb.toString(); } /** * Pads the toString value of the given Object. */ public static String pad(Object obj, int totalChars) { return pad(obj.toString(), totalChars); } /** * Pad or trim so as to produce a string of exactly a certain length. * * @param str The String to be padded or truncated * @param num The desired length */ public static String padOrTrim(String str, int num) { if (str == null) { str = "null"; } int leng = str.length(); if (leng < num) { StringBuilder sb = new StringBuilder(str); for (int i = 0; i < num - leng; i++) { sb.append(' '); } return sb.toString(); } else if (leng > num) { return str.substring(0, num); } else { return str; } } /** * Pad or trim so as to produce a string of exactly a certain length. * * @param str The String to be padded or truncated * @param num The desired length */ public static String padLeftOrTrim(String str, int num) { if (str == null) { str = "null"; } int leng = str.length(); if (leng < num) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < num - leng; i++) { sb.append(' '); } sb.append(str); return sb.toString(); } else if (leng > num) { return str.substring(str.length() - num); } else { return str; } } /** * Pad or trim the toString value of the given Object. */ public static String padOrTrim(Object obj, int totalChars) { return padOrTrim(obj.toString(), totalChars); } /** * Pads the given String to the left with the given character ch to ensure that * it's at least totalChars long. */ public static String padLeft(String str, int totalChars, char ch) { if (str == null) { str = "null"; } StringBuilder sb = new StringBuilder(); for (int i = 0, num = totalChars - str.length(); i < num; i++) { sb.append(ch); } sb.append(str); return sb.toString(); } /** * Pads the given String to the left with spaces to ensure that it's * at least totalChars long. */ public static String padLeft(String str, int totalChars) { return padLeft(str, totalChars, ' '); } public static String padLeft(Object obj, int totalChars) { return padLeft(obj.toString(), totalChars); } public static String padLeft(int i, int totalChars) { return padLeft(Integer.valueOf(i), totalChars); } public static String padLeft(double d, int totalChars) { return padLeft(new Double(d), totalChars); } /** * Returns s if it's at most maxWidth chars, otherwise chops right side to fit. */ public static String trim(String s, int maxWidth) { if (s.length() <= maxWidth) { return (s); } return s.substring(0, maxWidth); } public static String trim(Object obj, int maxWidth) { return trim(obj.toString(), maxWidth); } public static String trimWithEllipsis(String s, int width) { if (s.length() > width) s = s.substring(0, width - 3) + "..."; return s; } public static String trimWithEllipsis(Object o, int width) { return trimWithEllipsis(o.toString(), width); } public static String repeat(String s, int times) { if (times == 0) { return ""; } StringBuilder sb = new StringBuilder(times * s.length()); for (int i = 0; i < times; i++) { sb.append(s); } return sb.toString(); } public static String repeat(char ch, int times) { if (times == 0) { return ""; } StringBuilder sb = new StringBuilder(times); for (int i = 0; i < times; i++) { sb.append(ch); } return sb.toString(); } /** * Returns a "clean" version of the given filename in which spaces have * been converted to dashes and all non-alphanumeric chars are underscores. */ public static String fileNameClean(String s) { char[] chars = s.toCharArray(); StringBuilder sb = new StringBuilder(); for (char c : chars) { if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c == '_')) { sb.append(c); } else { if (c == ' ' || c == '-') { sb.append('_'); } else { sb.append('x').append((int) c).append('x'); } } } return sb.toString(); } /** * Returns the index of the nth occurrence of ch in s, or -1 * if there are less than n occurrences of ch. */ public static int nthIndex(String s, char ch, int n) { int index = 0; for (int i = 0; i < n; i++) { // if we're already at the end of the string, // and we need to find another ch, return -1 if (index == s.length() - 1) { return -1; } index = s.indexOf(ch, index + 1); if (index == -1) { return (-1); } } return index; } /** * This returns a string from decimal digit smallestDigit to decimal digit * biggest digit. Smallest digit is labeled 1, and the limits are * inclusive. */ public static String truncate(int n, int smallestDigit, int biggestDigit) { int numDigits = biggestDigit - smallestDigit + 1; char[] result = new char[numDigits]; for (int j = 1; j < smallestDigit; j++) { n = n / 10; } for (int j = numDigits - 1; j >= 0; j--) { result[j] = Character.forDigit(n % 10, 10); n = n / 10; } return new String(result); } /** * Parses command line arguments into a Map. Arguments of the form *

* {@code -flag1 arg1a arg1b ... arg1m -flag2 -flag3 arg3a ... arg3n} *

* will be parsed so that the flag is a key in the Map (including * the hyphen) and its value will be a {@link String}[] containing * the optional arguments (if present). The non-flag values not * captured as flag arguments are collected into a String[] array * and returned as the value of {@code null} in the Map. In * this invocation, flags cannot take arguments, so all the {@link * String} array values other than the value for {@code null} * will be zero-length. * * @param args A command-line arguments array * @return a {@link Map} of flag names to flag argument {@link * String} arrays. */ public static Map argsToMap(String[] args) { return argsToMap(args, Collections.emptyMap()); } /** * Parses command line arguments into a Map. Arguments of the form *

* {@code -flag1 arg1a arg1b ... arg1m -flag2 -flag3 arg3a ... arg3n} *

* will be parsed so that the flag is a key in the Map (including * the hyphen) and its value will be a {@link String}[] containing * the optional arguments (if present). The non-flag values not * captured as flag arguments are collected into a String[] array * and returned as the value of {@code null} in the Map. In * this invocation, the maximum number of arguments for each flag * can be specified as an {@link Integer} value of the appropriate * flag key in the {@code flagsToNumArgs} {@link Map} * argument. (By default, flags cannot take arguments.) *

* Example of usage: *

* * Map flagsToNumArgs = new HashMap(); * flagsToNumArgs.put("-x",new Integer(2)); * flagsToNumArgs.put("-d",new Integer(1)); * Map result = argsToMap(args,flagsToNumArgs); * *

* If a given flag appears more than once, the extra args are appended to * the String[] value for that flag. * * @param args the argument array to be parsed * @param flagsToNumArgs a {@link Map} of flag names to {@link Integer} * values specifying the number of arguments * for that flag (default min 0, max 1). * @return a {@link Map} of flag names to flag argument {@link String} */ public static Map argsToMap(String[] args, Map flagsToNumArgs) { Map result = Generics.newHashMap(); List remainingArgs = new ArrayList<>(); for (int i = 0; i < args.length; i++) { String key = args[i]; if (key.charAt(0) == '-') { // found a flag Integer numFlagArgs = flagsToNumArgs.get(key); int max = numFlagArgs == null ? 1 : numFlagArgs.intValue(); int min = numFlagArgs == null ? 0 : numFlagArgs.intValue(); List flagArgs = new ArrayList<>(); for (int j = 0; j < max && i + 1 < args.length && (j < min || args[i + 1].length() == 0 || args[i + 1].charAt(0) != '-'); i++, j++) { flagArgs.add(args[i + 1]); } if (result.containsKey(key)) { // append the second specification into the args. String[] newFlagArg = new String[result.get(key).length + flagsToNumArgs.get(key)]; int oldNumArgs = result.get(key).length; System.arraycopy(result.get(key), 0, newFlagArg, 0, oldNumArgs); for (int j = 0; j < flagArgs.size(); j++) { newFlagArg[j + oldNumArgs] = flagArgs.get(j); } result.put(key, newFlagArg); } else { result.put(key, flagArgs.toArray(new String[flagArgs.size()])); } } else { remainingArgs.add(args[i]); } } result.put(null, remainingArgs.toArray(new String[remainingArgs.size()])); return result; } /** * In this version each flag has zero or one argument. It has one argument * if there is a thing following a flag that does not begin with '-'. See * {@link #argsToProperties(String[], Map)} for full documentation. * * @param args Command line arguments * @return A Properties object representing the arguments. */ public static Properties argsToProperties(String... args) { return argsToProperties(args, Collections.emptyMap()); } /** * Analogous to {@link #argsToMap}. However, there are several key differences between this method and {@link #argsToMap}: *

    *
  • Hyphens are stripped from flag names
  • *
  • Since Properties objects are String to String mappings, the default number of arguments to a flag is * assumed to be 1 and not 0.
  • *
  • Furthermore, the list of arguments not bound to a flag is mapped to the "" property, not null
  • *
  • The special flags "-prop", "-props", "-properties", "-args", or "-arguments" will load the property file * specified by its argument.
  • *
  • The value for flags without arguments is set to "true"
  • *
  • If a flag has multiple arguments, the value of the property is all * of the arguments joined together with a space (" ") character between them.
  • *
  • The value strings are trimmed so trailing spaces do not stop you from loading a file.
  • *
* Properties are read from left to right, and later properties will override earlier ones with the same name. * Properties loaded from a Properties file with the special args are defaults that can be overriden by command line * flags (or earlier Properties files if there is nested usage of the special args. * * @param args Command line arguments * @param flagsToNumArgs Map of how many arguments flags should have. The keys are without the minus signs. * @return A Properties object representing the arguments. */ public static Properties argsToProperties(String[] args, Map flagsToNumArgs) { Properties result = new Properties(); List remainingArgs = new ArrayList<>(); for (int i = 0; i < args.length; i++) { String key = args[i]; if ( ! key.isEmpty() && key.charAt(0) == '-') { // found a flag if (key.length() > 1 && key.charAt(1) == '-') { key = key.substring(2); // strip off 2 hyphens } else { key = key.substring(1); // strip off the hyphen } Integer maxFlagArgs = flagsToNumArgs.get(key); int max = maxFlagArgs == null ? 1 : maxFlagArgs; int min = maxFlagArgs == null ? 0 : maxFlagArgs; if (maxFlagArgs != null && maxFlagArgs == 0 && i < args.length - 1 && ("true".equalsIgnoreCase(args[i + 1]) || "false".equalsIgnoreCase(args[i + 1]))) { max = 1; // case: we're reading a boolean flag. TODO(gabor) there's gotta be a better way... } List flagArgs = new ArrayList<>(); // cdm oct 2007: add length check to allow for empty string argument! for (int j = 0; j < max && i + 1 < args.length && (j < min || args[i + 1].isEmpty() || args[i + 1].charAt(0) != '-'); i++, j++) { flagArgs.add(args[i + 1]); } String value; if (flagArgs.isEmpty()) { value = "true"; } else { value = join(flagArgs, " "); } if (key.equalsIgnoreCase(PROP) || key.equalsIgnoreCase(PROPS) || key.equalsIgnoreCase(PROPERTIES) || key.equalsIgnoreCase(ARGUMENTS) || key.equalsIgnoreCase(ARGS)) { result.setProperty(PROPERTIES, value); } else { result.setProperty(key, value); } } else { remainingArgs.add(args[i]); } } if ( ! remainingArgs.isEmpty()) { result.setProperty("", join(remainingArgs, " ")); } /* Processing in reverse order, add properties that aren't present only. Thus, later ones override earlier ones. */ while (result.containsKey(PROPERTIES)) { String file = result.getProperty(PROPERTIES); result.remove(PROPERTIES); Properties toAdd = new Properties(); BufferedReader reader = null; try { reader = IOUtils.readerFromString(file); toAdd.load(reader); // trim all values for (String propKey : toAdd.stringPropertyNames()) { String newVal = toAdd.getProperty(propKey); toAdd.setProperty(propKey, newVal.trim()); } } catch (IOException e) { String msg = "argsToProperties could not read properties file: " + file; throw new RuntimeIOException(msg, e); } finally { IOUtils.closeIgnoringExceptions(reader); } for (String key : toAdd.stringPropertyNames()) { String val = toAdd.getProperty(key); if ( ! result.containsKey(key)) { result.setProperty(key, val); } } } return result; } /** * This method reads in properties listed in a file in the format prop=value, one property per line. * Although {@code Properties.load(InputStream)} exists, I implemented this method to trim the lines, * something not implemented in the {@code load()} method. * * @param filename A properties file to read * @return The corresponding Properties object */ public static Properties propFileToProperties(String filename) { Properties result = new Properties(); try { InputStream is = new BufferedInputStream(new FileInputStream(filename)); result.load(is); // trim all values for (String propKey : result.stringPropertyNames()){ String newVal = result.getProperty(propKey); result.setProperty(propKey,newVal.trim()); } is.close(); return result; } catch (IOException e) { throw new RuntimeIOException("propFileToProperties could not read properties file: " + filename, e); } } /** * This method converts a comma-separated String (with whitespace * optionally allowed after the comma) representing properties * to a Properties object. Each property is "property=value". The value * for properties without an explicitly given value is set to "true". This can be used for a 2nd level * of properties, for example, when you have a commandline argument like "-outputOptions style=xml,tags". */ public static Properties stringToProperties(String str) { Properties result = new Properties(); return stringToProperties(str, result); } /** * This method updates a Properties object based on * a comma-separated String (with whitespace * optionally allowed after the comma) representing properties * to a Properties object. Each property is "property=value". The value * for properties without an explicitly given value is set to "true". */ public static Properties stringToProperties(String str, Properties props) { String[] propsStr = str.trim().split(",\\s*"); for (String term : propsStr) { int divLoc = term.indexOf('='); String key; String value; if (divLoc >= 0) { key = term.substring(0, divLoc).trim(); value = term.substring(divLoc + 1).trim(); } else { key = term.trim(); value = "true"; } props.setProperty(key, value); } return props; } /** * If any of the given list of properties are not found, returns the * name of that property. Otherwise, returns null. */ public static String checkRequiredProperties(Properties props, String ... requiredProps) { for (String required : requiredProps) { if (props.getProperty(required) == null) { return required; } } return null; } /** * Prints to a file. If the file already exists, appends if * {@code append=true}, and overwrites if {@code append=false}. */ public static void printToFile(File file, String message, boolean append, boolean printLn, String encoding) { PrintWriter pw = null; try { Writer fw; if (encoding != null) { fw = new OutputStreamWriter(new FileOutputStream(file, append), encoding); } else { fw = new FileWriter(file, append); } pw = new PrintWriter(fw); if (printLn) { pw.println(message); } else { pw.print(message); } } catch (Exception e) { log.info("Exception: in printToFile " + file.getAbsolutePath()); e.printStackTrace(); } finally { if (pw != null) { pw.flush(); pw.close(); } } } /** * Prints to a file. If the file already exists, appends if * {@code append=true}, and overwrites if {@code append=false}. */ public static void printToFileLn(File file, String message, boolean append) { PrintWriter pw = null; try { Writer fw = new FileWriter(file, append); pw = new PrintWriter(fw); pw.println(message); } catch (Exception e) { log.info("Exception: in printToFileLn " + file.getAbsolutePath() + ' ' + message); e.printStackTrace(); } finally { if (pw != null) { pw.flush(); pw.close(); } } } /** * Prints to a file. If the file already exists, appends if * {@code append=true}, and overwrites if {@code append=false}. */ public static void printToFile(File file, String message, boolean append) { PrintWriter pw = null; try { Writer fw = new FileWriter(file, append); pw = new PrintWriter(fw); pw.print(message); } catch (Exception e) { throw new RuntimeIOException("Exception in printToFile " + file.getAbsolutePath(), e); } finally { IOUtils.closeIgnoringExceptions(pw); } } /** * Prints to a file. If the file does not exist, rewrites the file; * does not append. */ public static void printToFile(File file, String message) { printToFile(file, message, false); } /** * Prints to a file. If the file already exists, appends if * {@code append=true}, and overwrites if {@code append=false}. */ public static void printToFile(String filename, String message, boolean append) { printToFile(new File(filename), message, append); } /** * Prints to a file. If the file already exists, appends if * {@code append=true}, and overwrites if {@code append=false}. */ public static void printToFileLn(String filename, String message, boolean append) { printToFileLn(new File(filename), message, append); } /** * Prints to a file. If the file does not exist, rewrites the file; * does not append. */ public static void printToFile(String filename, String message) { printToFile(new File(filename), message, false); } /** * A simpler form of command line argument parsing. * Dan thinks this is highly superior to the overly complexified code that * comes before it. * Parses command line arguments into a Map. Arguments of the form * -flag1 arg1 -flag2 -flag3 arg3 * will be parsed so that the flag is a key in the Map (including the hyphen) * and the * optional argument will be its value (if present). * * @return A Map from keys to possible values (String or null) */ @SuppressWarnings("unchecked") public static Map parseCommandLineArguments(String[] args) { return (Map)parseCommandLineArguments(args, false); } /** * A simpler form of command line argument parsing. * Dan thinks this is highly superior to the overly complexified code that * comes before it. * Parses command line arguments into a Map. Arguments of the form * -flag1 arg1 -flag2 -flag3 arg3 * will be parsed so that the flag is a key in the Map (including the hyphen) * and the * optional argument will be its value (if present). * In this version, if the argument is numeric, it will be a Double value * in the map, not a String. * * @return A Map from keys to possible values (String or null) */ public static Map parseCommandLineArguments(String[] args, boolean parseNumbers) { Map result = Generics.newHashMap(); for (int i = 0; i < args.length; i++) { String key = args[i]; if (key.charAt(0) == '-') { if (i + 1 < args.length) { String value = args[i + 1]; if (value.charAt(0) != '-') { if (parseNumbers) { Object numericValue = value; try { numericValue = Double.parseDouble(value); } catch (NumberFormatException e2) { // ignore } result.put(key, numericValue); } else { result.put(key, value); } i++; } else { result.put(key, null); } } else { result.put(key, null); } } } return result; } public static String stripNonAlphaNumerics(String orig) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < orig.length(); i++) { char c = orig.charAt(i); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) { sb.append(c); } } return sb.toString(); } public static String stripSGML(String orig) { Pattern sgmlPattern = Pattern.compile("<.*?>", Pattern.DOTALL); Matcher sgmlMatcher = sgmlPattern.matcher(orig); return sgmlMatcher.replaceAll(""); } public static void printStringOneCharPerLine(String s) { for (int i = 0; i < s.length(); i++) { int c = s.charAt(i); System.out.println(c + " \'" + (char) c + "\' "); } } public static String escapeString(String s, char[] charsToEscape, char escapeChar) { StringBuilder result = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c == escapeChar) { result.append(escapeChar); } else { for (char charToEscape : charsToEscape) { if (c == charToEscape) { result.append(escapeChar); break; } } } result.append(c); } return result.toString(); } /** * This function splits the String s into multiple Strings using the * splitChar. However, it provides a quoting facility: it is possible to * quote strings with the quoteChar. * If the quoteChar occurs within the quotedExpression, it must be prefaced * by the escapeChar. * This routine can be useful for processing a line of a CSV file. * * @param s The String to split into fields. Cannot be null. * @param splitChar The character to split on * @param quoteChar The character to quote items with * @param escapeChar The character to escape the quoteChar with * @return An array of Strings that s is split into */ public static String[] splitOnCharWithQuoting(String s, char splitChar, char quoteChar, char escapeChar) { List result = new ArrayList<>(); int i = 0; int length = s.length(); StringBuilder b = new StringBuilder(); while (i < length) { char curr = s.charAt(i); if (curr == splitChar) { // add last buffer // cdm 2014: Do this even if the field is empty! // if (b.length() > 0) { result.add(b.toString()); b = new StringBuilder(); // } i++; } else if (curr == quoteChar) { // find next instance of quoteChar i++; while (i < length) { curr = s.charAt(i); // mrsmith: changed this condition from // if (curr == escapeChar) { if ((curr == escapeChar) && (i+1 < length) && (s.charAt(i+1) == quoteChar)) { b.append(s.charAt(i + 1)); i += 2; } else if (curr == quoteChar) { i++; break; // break this loop } else { b.append(s.charAt(i)); i++; } } } else { b.append(curr); i++; } } // RFC 4180 disallows final comma. At any rate, don't produce a field after it unless non-empty if (b.length() > 0) { result.add(b.toString()); } return result.toArray(new String[result.size()]); } /** * Computes the longest common substring of s and t. * The longest common substring of a and b is the longest run of * characters that appear in order inside both a and b. Both a and b * may have other extraneous characters along the way. This is like * edit distance but with no substitution and a higher number means * more similar. For example, the LCS of "abcD" and "aXbc" is 3 (abc). */ public static int longestCommonSubstring(String s, String t) { int[][] d; // matrix int n; // length of s int m; // length of t int i; // iterates through s int j; // iterates through t // int cost; // cost // Step 1 n = s.length(); m = t.length(); if (n == 0) { return 0; } if (m == 0) { return 0; } d = new int[n + 1][m + 1]; // Step 2 for (i = 0; i <= n; i++) { d[i][0] = 0; } for (j = 0; j <= m; j++) { d[0][j] = 0; } // Step 3 for (i = 1; i <= n; i++) { char s_i = s.charAt(i - 1); // ith character of s // Step 4 for (j = 1; j <= m; j++) { char t_j = t.charAt(j - 1); // jth character of t // Step 5 // js: if the chars match, you can get an extra point // otherwise you have to skip an insertion or deletion (no subs) if (s_i == t_j) { d[i][j] = SloppyMath.max(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1] + 1); } else { d[i][j] = Math.max(d[i - 1][j], d[i][j - 1]); } } } /* ---- // num chars needed to display longest num int numChars = (int) Math.ceil(Math.log(d[n][m]) / Math.log(10)); for (i = 0; i < numChars + 3; i++) { log.info(' '); } for (j = 0; j < m; j++) { log.info(t.charAt(j) + " "); } log.info(); for (i = 0; i <= n; i++) { log.info((i == 0 ? ' ' : s.charAt(i - 1)) + " "); for (j = 0; j <= m; j++) { log.info(d[i][j] + " "); } log.info(); } ---- */ // Step 7 return d[n][m]; } /** * Computes the longest common contiguous substring of s and t. * The LCCS is the longest run of characters that appear consecutively in * both s and t. For instance, the LCCS of "color" and "colour" is 4, because * of "colo". */ public static int longestCommonContiguousSubstring(String s, String t) { if (s.isEmpty() || t.isEmpty()) { return 0; } int M = s.length(); int N = t.length(); int[][] d = new int[M + 1][N + 1]; for (int j = 0; j <= N; j++) { d[0][j] = 0; } for (int i = 0; i <= M; i++) { d[i][0] = 0; } int max = 0; for (int i = 1; i <= M; i++) { for (int j = 1; j <= N; j++) { if (s.charAt(i - 1) == t.charAt(j - 1)) { d[i][j] = d[i - 1][j - 1] + 1; } else { d[i][j] = 0; } if (d[i][j] > max) { max = d[i][j]; } } } // log.info("LCCS(" + s + "," + t + ") = " + max); return max; } /** * Computes the Levenshtein (edit) distance of the two given Strings. * This method doesn't allow transposition, so one character transposed between two strings has a cost of 2 (one insertion, one deletion). * The EditDistance class also implements the Levenshtein distance, but does allow transposition. */ public static int editDistance(String s, String t) { // Step 1 int n = s.length(); // length of s int m = t.length(); // length of t if (n == 0) { return m; } if (m == 0) { return n; } int[][] d = new int[n + 1][m + 1]; // matrix // Step 2 for (int i = 0; i <= n; i++) { d[i][0] = i; } for (int j = 0; j <= m; j++) { d[0][j] = j; } // Step 3 for (int i = 1; i <= n; i++) { char s_i = s.charAt(i - 1); // ith character of s // Step 4 for (int j = 1; j <= m; j++) { char t_j = t.charAt(j - 1); // jth character of t // Step 5 int cost; // cost if (s_i == t_j) { cost = 0; } else { cost = 1; } // Step 6 d[i][j] = SloppyMath.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost); } } // Step 7 return d[n][m]; } /** * Computes the WordNet 2.0 POS tag corresponding to the PTB POS tag s. * * @param s a Penn TreeBank POS tag. */ public static String pennPOSToWordnetPOS(String s) { if (s.matches("NN|NNP|NNS|NNPS")) { return "noun"; } if (s.matches("VB|VBD|VBG|VBN|VBZ|VBP|MD")) { return "verb"; } if (s.matches("JJ|JJR|JJS|CD")) { return "adjective"; } if (s.matches("RB|RBR|RBS|RP|WRB")) { return "adverb"; } return null; } /** * Returns a short class name for an object. * This is the class name stripped of any package name. * * @return The name of the class minus a package name, for example * ArrayList */ public static String getShortClassName(Object o) { if (o == null) { return "null"; } String name = o.getClass().getName(); int index = name.lastIndexOf('.'); if (index >= 0) { name = name.substring(index + 1); } return name; } /** * Converts a tab delimited string into an object with given fields * Requires the object has setXxx functions for the specified fields * * @param objClass Class of object to be created * @param str string to convert * @param delimiterRegex delimiter regular expression * @param fieldNames fieldnames * @param type to return * @return Object created from string */ public static T columnStringToObject(Class objClass, String str, String delimiterRegex, String[] fieldNames) throws InstantiationException, IllegalAccessException, NoSuchFieldException, NoSuchMethodException, InvocationTargetException { Pattern delimiterPattern = Pattern.compile(delimiterRegex); return StringUtils.columnStringToObject(objClass, str, delimiterPattern, fieldNames); } /** * Converts a tab delimited string into an object with given fields * Requires the object has public access for the specified fields * * @param objClass Class of object to be created * @param str string to convert * @param delimiterPattern delimiter * @param fieldNames fieldnames * @param type to return * @return Object created from string */ public static T columnStringToObject(Class objClass, String str, Pattern delimiterPattern, String[] fieldNames) throws InstantiationException, IllegalAccessException, NoSuchMethodException, NoSuchFieldException, InvocationTargetException { String[] fields = delimiterPattern.split(str); T item = ErasureUtils.uncheckedCast(objClass.newInstance()); for (int i = 0; i < fields.length; i++) { try { Field field = objClass.getDeclaredField(fieldNames[i]); field.set(item, fields[i]); } catch (IllegalAccessException ex) { Method method = objClass.getDeclaredMethod("set" + StringUtils.capitalize(fieldNames[i]), String.class); method.invoke(item, fields[i]); } } return item; } /** * Converts an object into a tab delimited string with given fields * Requires the object has public access for the specified fields * * @param object Object to convert * @param delimiter delimiter * @param fieldNames fieldnames * @return String representing object */ public static String objectToColumnString(Object object, String delimiter, String[] fieldNames) throws IllegalAccessException, NoSuchFieldException, NoSuchMethodException, InvocationTargetException { StringBuilder sb = new StringBuilder(); for (String fieldName : fieldNames) { if (sb.length() > 0) { sb.append(delimiter); } try { Field field = object.getClass().getDeclaredField(fieldName); sb.append(field.get(object)); } catch (IllegalAccessException ex) { Method method = object.getClass().getDeclaredMethod("get" + StringUtils.capitalize(fieldName)); sb.append(method.invoke(object)); } } return sb.toString(); } /** * Uppercases the first character of a string. * * @param s a string to capitalize * @return a capitalized version of the string */ public static String capitalize(String s) { if (Character.isLowerCase(s.charAt(0))) { return Character.toUpperCase(s.charAt(0)) + s.substring(1); } else { return s; } } /** * Check if a string begins with an uppercase. * * @param s a string * @return true if the string is capitalized * false otherwise */ public static boolean isCapitalized(String s) { return (Character.isUpperCase(s.charAt(0))); } public static String searchAndReplace(String text, String from, String to) { from = escapeString(from, new char[]{'.', '[', ']', '\\'}, '\\'); // special chars in regex Pattern p = Pattern.compile(from); Matcher m = p.matcher(text); return m.replaceAll(to); } /** * Returns an HTML table containing the matrix of Strings passed in. * The first dimension of the matrix should represent the rows, and the * second dimension the columns. */ public static String makeHTMLTable(String[][] table, String[] rowLabels, String[] colLabels) { StringBuilder buff = new StringBuilder(); buff.append("\n"); // top row buff.append("\n"); buff.append("\n"); // the top left cell for (int j = 0; j < table[0].length; j++) { // assume table is a rectangular matrix buff.append("\n"); } buff.append("\n"); // all other rows for (int i = 0; i < table.length; i++) { // one row buff.append("\n"); buff.append("\n"); for (int j = 0; j < table[i].length; j++) { buff.append("\n"); } buff.append("\n"); } buff.append("
").append(colLabels[j]).append("
").append(rowLabels[i]).append(""); buff.append(((table[i][j] != null) ? table[i][j] : "")); buff.append("
"); return buff.toString(); } /** * Returns a text table containing the matrix of objects passed in. * The first dimension of the matrix should represent the rows, and the * second dimension the columns. Each object is printed in a cell with toString(). * The printing may be padded with spaces on the left and then on the right to * ensure that the String form is of length at least padLeft or padRight. * If tsv is true, a tab is put between columns. * * @return A String form of the table */ public static String makeTextTable(Object[][] table, Object[] rowLabels, Object[] colLabels, int padLeft, int padRight, boolean tsv) { StringBuilder buff = new StringBuilder(); if (colLabels != null) { // top row buff.append(makeAsciiTableCell("", padLeft, padRight, tsv)); // the top left cell for (int j = 0; j < table[0].length; j++) { // assume table is a rectangular matrix buff.append(makeAsciiTableCell(colLabels[j], padLeft, padRight, (j != table[0].length - 1) && tsv)); } buff.append('\n'); } // all other rows for (int i = 0; i < table.length; i++) { // one row if (rowLabels != null) { buff.append(makeAsciiTableCell(rowLabels[i], padLeft, padRight, tsv)); } for (int j = 0; j < table[i].length; j++) { buff.append(makeAsciiTableCell(table[i][j], padLeft, padRight, (j != table[0].length - 1) && tsv)); } buff.append('\n'); } return buff.toString(); } /** The cell String is the string representation of the object. * If padLeft is greater than 0, it is padded. Ditto right * */ private static String makeAsciiTableCell(Object obj, int padLeft, int padRight, boolean tsv) { String result = obj.toString(); if (padLeft > 0) { result = padLeft(result, padLeft); } if (padRight > 0) { result = pad(result, padRight); } if (tsv) { result = result + '\t'; } return result; } /** * Tests the string edit distance function. */ public static void main(String[] args) { String[] s = {"there once was a man", "this one is a manic", "hey there", "there once was a mane", "once in a manger.", "where is one match?", "Jo3seph Smarr!", "Joseph R Smarr"}; for (int i = 0; i < 8; i++) { for (int j = 0; j < 8; j++) { System.out.println("s1: " + s[i]); System.out.println("s2: " + s[j]); System.out.println("edit distance: " + editDistance(s[i], s[j])); System.out.println("LCS: " + longestCommonSubstring(s[i], s[j])); System.out.println("LCCS: " + longestCommonContiguousSubstring(s[i], s[j])); System.out.println(); } } } public static String toAscii(String s) { StringBuilder b = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); if (c > 127) { String result = "?"; if (c >= 0x00c0 && c <= 0x00c5) { result = "A"; } else if (c == 0x00c6) { result = "AE"; } else if (c == 0x00c7) { result = "C"; } else if (c >= 0x00c8 && c <= 0x00cb) { result = "E"; } else if (c >= 0x00cc && c <= 0x00cf) { result = "F"; } else if (c == 0x00d0) { result = "D"; } else if (c == 0x00d1) { result = "N"; } else if (c >= 0x00d2 && c <= 0x00d6) { result = "O"; } else if (c == 0x00d7) { result = "x"; } else if (c == 0x00d8) { result = "O"; } else if (c >= 0x00d9 && c <= 0x00dc) { result = "U"; } else if (c == 0x00dd) { result = "Y"; } else if (c >= 0x00e0 && c <= 0x00e5) { result = "a"; } else if (c == 0x00e6) { result = "ae"; } else if (c == 0x00e7) { result = "c"; } else if (c >= 0x00e8 && c <= 0x00eb) { result = "e"; } else if (c >= 0x00ec && c <= 0x00ef) { result = "i"; } else if (c == 0x00f1) { result = "n"; } else if (c >= 0x00f2 && c <= 0x00f8) { result = "o"; } else if (c >= 0x00f9 && c <= 0x00fc) { result = "u"; } else if (c >= 0x00fd && c <= 0x00ff) { result = "y"; } else if (c >= 0x2018 && c <= 0x2019) { result = "\'"; } else if (c >= 0x201c && c <= 0x201e) { result = "\""; } else if (c >= 0x0213 && c <= 0x2014) { result = "-"; } else if (c >= 0x00A2 && c <= 0x00A5) { result = "$"; } else if (c == 0x2026) { result = "."; } b.append(result); } else { b.append(c); } } return b.toString(); } public static String toCSVString(String[] fields) { StringBuilder b = new StringBuilder(); for (String fld : fields) { if (b.length() > 0) { b.append(','); } String field = escapeString(fld, new char[]{'\"'}, '\"'); // escape quotes with double quotes b.append('\"').append(field).append('\"'); } return b.toString(); } /** * Swap any occurrences of any characters in the from String in the input String with * the corresponding character from the to String. As Perl tr, for example, * tr("chris", "irs", "mop").equals("chomp"), except it does not * support regular expression character ranges. *

* Note: This is now optimized to not allocate any objects if the * input is returned unchanged. */ public static String tr(String input, String from, String to) { assert from.length() == to.length(); StringBuilder sb = null; int len = input.length(); for (int i = 0; i < len; i++) { int ind = from.indexOf(input.charAt(i)); if (ind >= 0) { if (sb == null) { sb = new StringBuilder(input); } sb.setCharAt(i, to.charAt(ind)); } } if (sb == null) { return input; } else { return sb.toString(); } } /** * Returns the supplied string with any trailing '\n' or '\r\n' removed. */ public static String chomp(String s) { if (s == null) { return null; } int l_1 = s.length() - 1; if (l_1 >= 0 && s.charAt(l_1) == '\n') { int l_2 = l_1 - 1; if (l_2 >= 0 && s.charAt(l_2) == '\r') { return s.substring(0, l_2); } else { return s.substring(0, l_1); } } else { return s; } } /** * Returns the result of calling toString() on the supplied Object, but with * any trailing '\n' or '\r\n' removed. */ public static String chomp(Object o) { return chomp(o.toString()); } /** * Strip directory from filename. Like Unix 'basename'.

* * Example: {@code getBaseName("/u/wcmac/foo.txt") ==> "foo.txt"} */ public static String getBaseName(String fileName) { return getBaseName(fileName, ""); } /** * Strip directory and suffix from filename. Like Unix 'basename'. * * Example: {@code getBaseName("/u/wcmac/foo.txt", "") ==> "foo.txt"}
* Example: {@code getBaseName("/u/wcmac/foo.txt", ".txt") ==> "foo"}
* Example: {@code getBaseName("/u/wcmac/foo.txt", ".pdf") ==> "foo.txt"}
*/ public static String getBaseName(String fileName, String suffix) { return getBaseName(fileName, suffix, "/"); } /** * Strip directory and suffix from the given name. Like Unix 'basename'. * * Example: {@code getBaseName("/tmp/foo/bar/foo", "", "/") ==> "foo"}
* Example: {@code getBaseName("edu.stanford.nlp", "", "\\.") ==> "nlp"}
*/ public static String getBaseName(String fileName, String suffix, String sep) { String[] elts = fileName.split(sep); if (elts.length == 0) return ""; String lastElt = elts[elts.length - 1]; if (lastElt.endsWith(suffix)) { lastElt = lastElt.substring(0, lastElt.length() - suffix.length()); } return lastElt; } /** * Given a String the method uses Regex to check if the String only contains alphabet characters * * @param s a String to check using regex * @return true if the String is valid */ public static boolean isAlpha(String s){ Pattern p = Pattern.compile("^[\\p{Alpha}\\s]+$"); Matcher m = p.matcher(s); return m.matches(); } /** * Given a String the method uses Regex to check if the String only contains numeric characters * * @param s a String to check using regex * @return true if the String is valid */ public static boolean isNumeric(String s){ Pattern p = Pattern.compile("^[\\p{Digit}\\s\\.]+$"); Matcher m = p.matcher(s); return m.matches(); } /** * Given a String the method uses Regex to check if the String only contains alphanumeric characters * * @param s a String to check using regex * @return true if the String is valid */ public static boolean isAlphanumeric(String s){ Pattern p = Pattern.compile("^[\\p{Alnum}\\s\\.]+$"); Matcher m = p.matcher(s); return m.matches(); } /** * Given a String the method uses Regex to check if the String only contains punctuation characters * * @param s a String to check using regex * @return true if the String is valid */ public static boolean isPunct(String s){ Pattern p = Pattern.compile("^[\\p{Punct}]+$"); Matcher m = p.matcher(s); return m.matches(); } /** * Given a String the method uses Regex to check if the String looks like an acronym * * @param s a String to check using regex * @return true if the String is valid */ public static boolean isAcronym(String s){ Pattern p = Pattern.compile("^[\\p{Upper}]+$"); Matcher m = p.matcher(s); return m.matches(); } public static String getNotNullString(String s) { if (s == null) return ""; else return s; } /** Returns whether a String is either null or empty. * (Copies the Guava method for this.) * * @param str The String to test * @return Whether the String is either null or empty */ public static boolean isNullOrEmpty(String str) { return str == null || str.equals(""); } /** * Resolve variable. If it is the props file, then substitute that variable with * the value mentioned in the props file, otherwise look for the variable in the environment variables. * If the variable is not found then substitute it for empty string. */ public static String resolveVars(String str, Map props) { if (str == null) return null; // ${VAR_NAME} or $VAR_NAME Pattern p = Pattern.compile("\\$\\{(\\w+)\\}"); Matcher m = p.matcher(str); StringBuffer sb = new StringBuffer(); while (m.find()) { String varName = null == m.group(1) ? m.group(2) : m.group(1); String vrValue; //either in the props file if (props.containsKey(varName)) { vrValue = ((String) props.get(varName)); } else { //or as the environment variable vrValue = System.getenv(varName); } m.appendReplacement(sb, null == vrValue ? "" : vrValue); } m.appendTail(sb); return sb.toString(); } /** * convert args to properties with variable names resolved. for each value * having a ${VAR} or $VAR, its value is first resolved using the variables * listed in the props file, and if not found then using the environment * variables. if the variable is not found then substitute it for empty string */ public static Properties argsToPropertiesWithResolve(String[] args) { LinkedHashMap result = new LinkedHashMap<>(); Map existingArgs = new LinkedHashMap<>(); for (int i = 0; i < args.length; i++) { String key = args[i]; if (key.length() > 0 && key.charAt(0) == '-') { // found a flag if (key.length() > 1 && key.charAt(1) == '-') key = key.substring(2); // strip off 2 hyphens else key = key.substring(1); // strip off the hyphen int max = 1; int min = 0; List flagArgs = new ArrayList<>(); // cdm oct 2007: add length check to allow for empty string argument! for (int j = 0; j < max && i + 1 < args.length && (j < min || args[i + 1].length() == 0 || args[i + 1].charAt(0) != '-'); i++, j++) { flagArgs.add(args[i + 1]); } if (flagArgs.isEmpty()) { existingArgs.put(key, "true"); } else { if (key.equalsIgnoreCase(PROP) || key.equalsIgnoreCase(PROPS) || key.equalsIgnoreCase(PROPERTIES) || key.equalsIgnoreCase(ARGUMENTS) || key.equalsIgnoreCase(ARGS)) { for(String flagArg: flagArgs) result.putAll(propFileToLinkedHashMap(flagArg, existingArgs)); existingArgs.clear(); } else existingArgs.put(key, join(flagArgs, " ")); } } } result.putAll(existingArgs); for (Entry o : result.entrySet()) { String val = resolveVars(o.getValue(), result); result.put(o.getKey(), val); } Properties props = new Properties(); props.putAll(result); return props; } /** * This method reads in properties listed in a file in the format prop=value, * one property per line. and reads them into a LinkedHashMap (insertion order preserving) * Flags not having any arguments is set to "true". * * @param filename A properties file to read * @return The corresponding LinkedHashMap where the ordering is the same as in the * props file */ public static LinkedHashMap propFileToLinkedHashMap(String filename, Map existingArgs) { LinkedHashMap result = new LinkedHashMap<>(existingArgs); for (String l : IOUtils.readLines(filename)) { l = l.trim(); if (l.isEmpty() || l.startsWith("#")) continue; int index = l.indexOf('='); if (index == -1) result.put(l, "true"); else result.put(l.substring(0, index).trim(), l.substring(index + 1).trim()); } return result; } /** * n grams for already splitted string. the ngrams are joined with a single space */ public static Collection getNgrams(List words, int minSize, int maxSize){ List> ng = CollectionUtils.getNGrams(words, minSize, maxSize); Collection ngrams = new ArrayList<>(); for(List n: ng) ngrams.add(StringUtils.join(n," ")); return ngrams; } /** * n grams for already splitted string. the ngrams are joined with a single space */ public static Collection getNgramsFromTokens(List words, int minSize, int maxSize){ List wordsStr = new ArrayList<>(); for(CoreLabel l : words) wordsStr.add(l.word()); List> ng = CollectionUtils.getNGrams(wordsStr, minSize, maxSize); Collection ngrams = new ArrayList<>(); for(List n: ng) ngrams.add(StringUtils.join(n," ")); return ngrams; } /** * The string is split on whitespace and the ngrams are joined with a single space */ public static Collection getNgramsString(String s, int minSize, int maxSize){ return getNgrams(Arrays.asList(s.split("\\s+")), minSize, maxSize); } /** * Build a list of character-based ngrams from the given string. */ public static Collection getCharacterNgrams(String s, int minSize, int maxSize) { Collection ngrams = new ArrayList<>(); int len = s.length(); for (int i = 0; i < len; i++) { for (int ngramSize = minSize; ngramSize > 0 && ngramSize <= maxSize && i + ngramSize <= len; ngramSize++) { ngrams.add(s.substring(i, i + ngramSize)); } } return ngrams; } private static Pattern diacriticalMarksPattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}"); public static String normalize(String s) { // Normalizes string and strips diacritics (map to ascii) by // 1. taking the NFKD (compatibility decomposition - // in compatibility equivalence, formatting such as subscripting is lost - // see http://unicode.org/reports/tr15/) // 2. Removing diacriticals // 3. Recombining into NFKC form (compatibility composition) // This process may be slow. // // The main purpose of the function is to remove diacritics for asciis, // but it may normalize other stuff as well. // A more conservative approach is to do explicit folding just for ascii character // (see RuleBasedNameMatcher.normalize) String d = Normalizer.normalize(s, Normalizer.Form.NFKD); d = diacriticalMarksPattern.matcher(d).replaceAll(""); return Normalizer.normalize(d, Normalizer.Form.NFKC); } /** * Convert a list of labels into a string, by simply joining them with spaces. * @param words The words to join. * @return A string representation of the sentence, tokenized by a single space. */ public static String toString(List words) { return join(words.stream().map(CoreLabel::word), " "); } /** * Convert a CoreMap representing a sentence into a string, by simply joining them with spaces. * @param sentence The sentence to stringify. * @return A string representation of the sentence, tokenized by a single space. */ public static String toString(CoreMap sentence) { return toString(sentence.get(CoreAnnotations.TokensAnnotation.class)); } /** I shamefully stole this from: http://rosettacode.org/wiki/Levenshtein_distance#Java --Gabor */ public static int levenshteinDistance(String s1, String s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); int[] costs = new int[s2.length() + 1]; for (int i = 0; i <= s1.length(); i++) { int lastValue = i; for (int j = 0; j <= s2.length(); j++) { if (i == 0) costs[j] = j; else { if (j > 0) { int newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length()] = lastValue; } return costs[s2.length()]; } /** I shamefully stole this from: http://rosettacode.org/wiki/Levenshtein_distance#Java --Gabor */ public static int levenshteinDistance(E[] s1, E[] s2) { int[] costs = new int[s2.length + 1]; for (int i = 0; i <= s1.length; i++) { int lastValue = i; for (int j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { int newValue = costs[j - 1]; if (!s1[i - 1].equals(s2[j - 1])) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; } /** * Unescape an HTML string. * Taken from: http://stackoverflow.com/questions/994331/java-how-to-decode-html-character-entities-in-java-like-httputility-htmldecode * @param input The string to unescape * @return The unescaped String */ public static String unescapeHtml3(final String input) { StringWriter writer = null; int len = input.length(); int i = 1; int st = 0; while (true) { // look for '&' while (i < len && input.charAt(i-1) != '&') i++; if (i >= len) break; // found '&', look for ';' int j = i; while (j < len && j < i + 6 + 1 && input.charAt(j) != ';') j++; if (j == len || j < i + 2 || j == i + 6 + 1) { i++; continue; } // found escape if (input.charAt(i) == '#') { // numeric escape int k = i + 1; int radix = 10; final char firstChar = input.charAt(k); if (firstChar == 'x' || firstChar == 'X') { k++; radix = 16; } try { int entityValue = Integer.parseInt(input.substring(k, j), radix); if (writer == null) writer = new StringWriter(input.length()); writer.append(input.substring(st, i - 1)); if (entityValue > 0xFFFF) { final char[] chrs = Character.toChars(entityValue); writer.write(chrs[0]); writer.write(chrs[1]); } else { writer.write(entityValue); } } catch (NumberFormatException ex) { i++; continue; } } else { // named escape CharSequence value = htmlUnescapeLookupMap.get(input.substring(i, j)); if (value == null) { i++; continue; } if (writer == null) writer = new StringWriter(input.length()); writer.append(input.substring(st, i - 1)); writer.append(value); } // skip escape st = j + 1; i = st; } if (writer != null) { writer.append(input.substring(st, len)); return writer.toString(); } return input; } private static final String[][] HTML_ESCAPES = { {"\"", "quot"}, // " - double-quote {"&", "amp"}, // & - ampersand {"<", "lt"}, // < - less-than {">", "gt"}, // > - greater-than {"-", "ndash"}, // - - dash // Mapping to escape ISO-8859-1 characters to their named HTML 3.x equivalents. {"\u00A0", "nbsp"}, // non-breaking space {"\u00A1", "iexcl"}, // inverted exclamation mark {"\u00A2", "cent"}, // cent sign {"\u00A3", "pound"}, // pound sign {"\u00A4", "curren"}, // currency sign {"\u00A5", "yen"}, // yen sign = yuan sign {"\u00A6", "brvbar"}, // broken bar = broken vertical bar {"\u00A7", "sect"}, // section sign {"\u00A8", "uml"}, // diaeresis = spacing diaeresis {"\u00A9", "copy"}, // © - copyright sign {"\u00AA", "ordf"}, // feminine ordinal indicator {"\u00AB", "laquo"}, // left-pointing double angle quotation mark = left pointing guillemet {"\u00AC", "not"}, // not sign {"\u00AD", "shy"}, // soft hyphen = discretionary hyphen {"\u00AE", "reg"}, // ® - registered trademark sign {"\u00AF", "macr"}, // macron = spacing macron = overline = APL overbar {"\u00B0", "deg"}, // degree sign {"\u00B1", "plusmn"}, // plus-minus sign = plus-or-minus sign {"\u00B2", "sup2"}, // superscript two = superscript digit two = squared {"\u00B3", "sup3"}, // superscript three = superscript digit three = cubed {"\u00B4", "acute"}, // acute accent = spacing acute {"\u00B5", "micro"}, // micro sign {"\u00B6", "para"}, // pilcrow sign = paragraph sign {"\u00B7", "middot"}, // middle dot = Georgian comma = Greek middle dot {"\u00B8", "cedil"}, // cedilla = spacing cedilla {"\u00B9", "sup1"}, // superscript one = superscript digit one {"\u00BA", "ordm"}, // masculine ordinal indicator {"\u00BB", "raquo"}, // right-pointing double angle quotation mark = right pointing guillemet {"\u00BC", "frac14"}, // vulgar fraction one quarter = fraction one quarter {"\u00BD", "frac12"}, // vulgar fraction one half = fraction one half {"\u00BE", "frac34"}, // vulgar fraction three quarters = fraction three quarters {"\u00BF", "iquest"}, // inverted question mark = turned question mark {"\u00C0", "Agrave"}, // А - uppercase A, grave accent {"\u00C1", "Aacute"}, // Б - uppercase A, acute accent {"\u00C2", "Acirc"}, // В - uppercase A, circumflex accent {"\u00C3", "Atilde"}, // Г - uppercase A, tilde {"\u00C4", "Auml"}, // Д - uppercase A, umlaut {"\u00C5", "Aring"}, // Е - uppercase A, ring {"\u00C6", "AElig"}, // Ж - uppercase AE {"\u00C7", "Ccedil"}, // З - uppercase C, cedilla {"\u00C8", "Egrave"}, // И - uppercase E, grave accent {"\u00C9", "Eacute"}, // Й - uppercase E, acute accent {"\u00CA", "Ecirc"}, // К - uppercase E, circumflex accent {"\u00CB", "Euml"}, // Л - uppercase E, umlaut {"\u00CC", "Igrave"}, // М - uppercase I, grave accent {"\u00CD", "Iacute"}, // Н - uppercase I, acute accent {"\u00CE", "Icirc"}, // О - uppercase I, circumflex accent {"\u00CF", "Iuml"}, // П - uppercase I, umlaut {"\u00D0", "ETH"}, // Р - uppercase Eth, Icelandic {"\u00D1", "Ntilde"}, // С - uppercase N, tilde {"\u00D2", "Ograve"}, // Т - uppercase O, grave accent {"\u00D3", "Oacute"}, // У - uppercase O, acute accent {"\u00D4", "Ocirc"}, // Ф - uppercase O, circumflex accent {"\u00D5", "Otilde"}, // Х - uppercase O, tilde {"\u00D6", "Ouml"}, // Ц - uppercase O, umlaut {"\u00D7", "times"}, // multiplication sign {"\u00D8", "Oslash"}, // Ш - uppercase O, slash {"\u00D9", "Ugrave"}, // Щ - uppercase U, grave accent {"\u00DA", "Uacute"}, // Ъ - uppercase U, acute accent {"\u00DB", "Ucirc"}, // Ы - uppercase U, circumflex accent {"\u00DC", "Uuml"}, // Ь - uppercase U, umlaut {"\u00DD", "Yacute"}, // Э - uppercase Y, acute accent {"\u00DE", "THORN"}, // Ю - uppercase THORN, Icelandic {"\u00DF", "szlig"}, // Я - lowercase sharps, German {"\u00E0", "agrave"}, // а - lowercase a, grave accent {"\u00E1", "aacute"}, // б - lowercase a, acute accent {"\u00E2", "acirc"}, // в - lowercase a, circumflex accent {"\u00E3", "atilde"}, // г - lowercase a, tilde {"\u00E4", "auml"}, // д - lowercase a, umlaut {"\u00E5", "aring"}, // е - lowercase a, ring {"\u00E6", "aelig"}, // ж - lowercase ae {"\u00E7", "ccedil"}, // з - lowercase c, cedilla {"\u00E8", "egrave"}, // и - lowercase e, grave accent {"\u00E9", "eacute"}, // й - lowercase e, acute accent {"\u00EA", "ecirc"}, // к - lowercase e, circumflex accent {"\u00EB", "euml"}, // л - lowercase e, umlaut {"\u00EC", "igrave"}, // м - lowercase i, grave accent {"\u00ED", "iacute"}, // н - lowercase i, acute accent {"\u00EE", "icirc"}, // о - lowercase i, circumflex accent {"\u00EF", "iuml"}, // п - lowercase i, umlaut {"\u00F0", "eth"}, // р - lowercase eth, Icelandic {"\u00F1", "ntilde"}, // с - lowercase n, tilde {"\u00F2", "ograve"}, // т - lowercase o, grave accent {"\u00F3", "oacute"}, // у - lowercase o, acute accent {"\u00F4", "ocirc"}, // ф - lowercase o, circumflex accent {"\u00F5", "otilde"}, // х - lowercase o, tilde {"\u00F6", "ouml"}, // ц - lowercase o, umlaut {"\u00F7", "divide"}, // division sign {"\u00F8", "oslash"}, // ш - lowercase o, slash {"\u00F9", "ugrave"}, // щ - lowercase u, grave accent {"\u00FA", "uacute"}, // ъ - lowercase u, acute accent {"\u00FB", "ucirc"}, // ы - lowercase u, circumflex accent {"\u00FC", "uuml"}, // ь - lowercase u, umlaut {"\u00FD", "yacute"}, // э - lowercase y, acute accent {"\u00FE", "thorn"}, // ю - lowercase thorn, Icelandic {"\u00FF", "yuml"}, // я - lowercase y, umlaut }; private static final HashMap htmlUnescapeLookupMap; static { htmlUnescapeLookupMap = new HashMap<>(); for (final CharSequence[] seq : HTML_ESCAPES) htmlUnescapeLookupMap.put(seq[1].toString(), seq[0]); } /** * Decode an array encoded as a String. This entails a comma separated value enclosed in brackets * or parentheses. * * @param encoded The String encoded array * @return A String array corresponding to the encoded array */ public static String[] decodeArray(String encoded){ if (encoded.isEmpty()) return EMPTY_STRING_ARRAY; char[] chars = encoded.trim().toCharArray(); //--Parse the String // (state) char quoteCloseChar = (char) 0; List terms = new LinkedList<>(); StringBuilder current = new StringBuilder(); //(start/stop overhead) int start = 0; int end = chars.length; if(chars[0] == '('){ start += 1; end -= 1; if(chars[end] != ')') throw new IllegalArgumentException("Unclosed paren in encoded array: " + encoded); } if(chars[0] == '['){ start += 1; end -= 1; if(chars[end] != ']') throw new IllegalArgumentException("Unclosed bracket in encoded array: " + encoded); } if(chars[0] == '{'){ start += 1; end -= 1; if(chars[end] != '}') throw new IllegalArgumentException("Unclosed bracket in encoded array: " + encoded); } // (finite state automaton) for (int i=start; i 0) { terms.add(current); } current = new StringBuilder(); }else{ current.append(chars[i]); } } } //--Return if(current.length() > 0) terms.add(current); String[] rtn = new String[terms.size()]; int i=0; for(StringBuilder b : terms){ rtn[i] = b.toString().trim(); i += 1; } return rtn; } /** * Decode a map encoded as a string * @param encoded The String encoded map * @return A String map corresponding to the encoded map */ public static Map decodeMap(String encoded){ if (encoded.isEmpty()) return new HashMap<>(); char[] chars = encoded.trim().toCharArray(); //--Parse the String //(state) char quoteCloseChar = (char) 0; Map map = new HashMap<>(); String key = ""; String value = ""; boolean onKey = true; StringBuilder current = new StringBuilder(); //(start/stop overhead) int start = 0; int end = chars.length; if(chars[0] == '('){ start += 1; end -= 1; if(chars[end] != ')') throw new IllegalArgumentException("Unclosed paren in encoded map: " + encoded); } if(chars[0] == '['){ start += 1; end -= 1; if(chars[end] != ']') throw new IllegalArgumentException("Unclosed bracket in encoded map: " + encoded); } if(chars[0] == '{'){ start += 1; end -= 1; if(chars[end] != '}') throw new IllegalArgumentException("Unclosed bracket in encoded map: " + encoded); } //(finite state automata) for(int i=start; i 0) { value = current.toString().trim(); } current = new StringBuilder(); onKey = true; map.put(key, value); // <- add value } else if((chars[i] == '-' || chars[i] == '=') && (i < chars.length - 1 && chars[i + 1] == '>')) { // case: end a key if (!onKey) { throw new IllegalArgumentException("Encountered a value without a key"); } if (current.length() > 0) { key = current.toString().trim(); } current = new StringBuilder(); onKey = false; i += 1; // skip '>' character } else if (chars[i] == ':') { // case: end a key if (!onKey) { throw new IllegalArgumentException("Encountered a value without a key"); } if (current.length() > 0) { key = current.toString().trim(); } current = new StringBuilder(); onKey = false; } else { current.append(chars[i]); } } } //--Return if (current.toString().trim().length() > 0 && !onKey) { map.put(key.trim(), current.toString().trim()); } return map; } /** * Takes an input String, and replaces any bash-style variables (e.g., $VAR_NAME) * with its actual environment variable from the passed environment specification. * * @param raw The raw String to replace variables in. * @param env The environment specification; e.g., {@link System#getenv()}. * @return The input String, but with all variables replaced. */ public static String expandEnvironmentVariables(String raw, Map env) { String pattern = "\\$\\{?([a-zA-Z_]+[a-zA-Z0-9_]*)\\}?"; Pattern expr = Pattern.compile(pattern); String text = raw; Matcher matcher = expr.matcher(text); while (matcher.find()) { String envValue = env.get(matcher.group(1)); if (envValue == null) { envValue = ""; } else { envValue = envValue.replace("\\", "\\\\"); } Pattern subexpr = Pattern.compile(Pattern.quote(matcher.group(0))); text = subexpr.matcher(text).replaceAll(envValue); } return text; } /** * Takes an input String, and replaces any bash-style variables (e.g., $VAR_NAME) * with its actual environment variable from {@link System#getenv()}. * * @param raw The raw String to replace variables in. * @return The input String, but with all variables replaced. */ public static String expandEnvironmentVariables(String raw) { return expandEnvironmentVariables(raw, System.getenv()); } /** * Logs the command line arguments to Redwood on the given channels. * The logger should be a RedwoodChannels of a single channel: the main class. * * @param logger The redwood logger to log to. * @param args The command-line arguments to log. */ public static void logInvocationString(Redwood.RedwoodChannels logger, String[] args) { StringBuilder sb = new StringBuilder("Invoked on "); sb.append(new Date()); sb.append(" with arguments:"); for (String arg : args) { sb.append(' ').append(arg); } logger.info(sb.toString()); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy