![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.util.StrUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.util;
import java.util.*;
import java.util.regex.*;
import java.lang.reflect.*;
public class StrUtils {
public static String[] split(String s) {
// Differs from Java's functions in that it returns String[0] on ""
return split(s, " ");
}
public static String[] split(String s, String delim) {
return isEmpty(s) ? new String[0] : s.split(delim);
}
// delim is treated as a string, rather than a list of delimiters
public static List splitByStr(String s, String delim) {
if(isEmpty(s)) return Collections.emptyList();
ArrayList tokens = new ArrayList();
int i = 0;
while(i < s.length()) {
int j = s.indexOf(delim, i);
if(j == -1) break;
tokens.add(s.substring(i, j));
i = j + delim.length();
}
tokens.add(s.substring(i));
return tokens;
}
// Return the first occurrence of c that doesn't have an escape character in
// front of it starting at position i
public static int indexOfIgnoreEscaped(String s, char c) {
return indexOfIgnoreEscaped(s, c, 0);
}
public static int indexOfIgnoreEscaped(String s, char c, int i) {
return indexOfIgnoreEscaped(s, ""+c, i);
}
// Find first occurrence of some non-escaped character in cs
public static int indexOfIgnoreEscaped(String s, String cs, int i) {
boolean escape = false;
while(i < s.length()) {
if(escape)
escape = false;
else {
if(s.charAt(i) == '\\') // Enable escaping
escape = true;
else
if(cs.indexOf(s.charAt(i)) != -1) return i;
}
i++;
}
return -1;
}
// Split, but mind the escaped delimiters.
// Example: "a\ b c" => "a\ b" "c"
public static List splitIgnoreEscaped(String line, String delim) {
// Split
String[] tokens = StrUtils.split(line, delim);
// But now, have to piece together the escaped delimiters
List newTokens = new ArrayList();
for(int i = 0; i < tokens.length; i++) {
if(tokens[i].endsWith("\\") && i+1 < tokens.length)
tokens[i+1] = tokens[i].substring(0, tokens[i].length()-1) + "\\" + delim + tokens[i+1];
else
newTokens.add(tokens[i]);
}
return newTokens;
}
public static double[] doubleSplit(String s, String delim) {
String[] tokens = split(s, delim);
double[] data = new double[tokens.length];
for(int i = 0; i < tokens.length; i++)
data[i] = Double.parseDouble(tokens[i]);
return data;
}
public static double[] doubleSplit(String s) { return doubleSplit(s, " "); }
public static int[] intSplit(String s, String delim) {
String[] tokens = split(s, delim);
int[] data = new int[tokens.length];
for(int i = 0; i < tokens.length; i++)
data[i] = Integer.parseInt(tokens[i]);
return data;
}
public static int[] intSplit(String s) { return intSplit(s, " "); }
public static short[] shortSplit(String s, String delim) {
String[] tokens = split(s, delim);
short[] data = new short[tokens.length];
for(int i = 0; i < tokens.length; i++)
data[i] = Short.parseShort(tokens[i]);
return data;
}
public static short[] shortSplit(String s) { return shortSplit(s, " "); }
public static List intSplitList(String s, String delim) {
String[] tokens = split(s, delim);
ArrayList data = new ArrayList(tokens.length);
for(int i = 0; i < tokens.length; i++)
data.add(Integer.parseInt(tokens[i]));
return data;
}
public static List intSplitList(String s) {
return intSplitList(s, " ");
}
// Example: a=3 b=4
public static Map parseHashMap(String line, String keyValueDelim) {
return parseHashMap(line, keyValueDelim, " ");
}
public static Map parseHashMap(String line, String keyValueDelim, String entryDelim) {
return parseHashMap(Arrays.asList(StrUtils.split(line, entryDelim)), keyValueDelim);
}
public static Map parseHashMap(List tokens, String keyValueDelim) {
Map map = new HashMap();
for(String token : tokens) {
String[] kv = token.split(keyValueDelim);
if(kv.length != 2) continue; // Be silent?
map.put(kv[0], kv[1]);
}
return map;
}
// public static TDoubleMap parseTDoubleMap(String line, String keyValueDelim) {
// return parseTDoubleMap(line, keyValueDelim, " ");
// }
// public static TDoubleMap parseTDoubleMap(String line, String keyValueDelim, String entryDelim) {
// TDoubleMap map = new TDoubleMap();
// String[] tokens = line.split(entryDelim);
// for(String token : tokens) {
// String[] kv = token.split(keyValueDelim);
// if(kv.length != 2) continue; // Be silent?
// map.put(kv[0], Double.parseDouble(kv[1]));
// }
// return map;
// }
public static String join(double[] list) {
return join(list, " ");
}
public static String join(double[] list, String delim) {
if(list == null) return "";
List objs = new ArrayList();
for(double x : list) objs.add(x);
return join(objs, delim);
}
public static String join(int[] list) { return join(list, " "); }
public static String join(int[] list, String delim) {
if(list == null) return "";
List objs = new ArrayList();
for(int x : list) objs.add(x);
return join(objs, delim);
}
public static String join(boolean[] list) { return join(list, " "); }
public static String join(boolean[] list, String delim) {
if(list == null) return "";
List objs = new ArrayList();
for(boolean x : list) objs.add(x);
return join(objs, delim);
}
public static String join(T[] objs) {
if(objs == null) return "";
return join(Arrays.asList(objs), " ");
}
public static String join(T[] objs, int start, int end) {
if(objs == null) return "";
return join(Arrays.asList(objs), " ", start, end);
}
public static String join(List objs) {
return join(objs, " ");
}
public static String join(T[] objs, String delim) {
if(objs == null) return "";
return join(Arrays.asList(objs), delim);
}
public static String join(List objs, String delim) {
if(objs == null) return "";
return join(objs, delim, 0, objs.size());
}
public static String join(List objs, String delim, int start, int end) {
if(objs == null) return "";
StringBuilder sb = new StringBuilder();
boolean first = true;
for(int i = start; i < end; i++) {
if(!first) sb.append(delim);
sb.append(objs.get(i));
first = false;
}
return sb.toString();
}
public static String join(Collection objs, String delim) {
if(objs == null) return "";
StringBuilder sb = new StringBuilder();
boolean first = true;
for(T x : objs) {
if(!first) sb.append(delim);
sb.append(x);
first = false;
}
return sb.toString();
}
public static String join(int[] x, boolean withIndices, int magnitudeThreshold) {
StringBuilder sb = new StringBuilder();
for(int i = 0; i < x.length; i++) {
if(Math.abs(x[i]) < magnitudeThreshold) continue;
if(i > 0) sb.append(' ');
if(withIndices) sb.append(i+":");
sb.append(x[i]);
}
return sb.toString();
}
public static String joinWithIndices(int[] x) { return join(x, true, 0); }
public static String join(double[] x, boolean withIndices, double magnitudeThreshold) {
StringBuilder sb = new StringBuilder();
for(int i = 0; i < x.length; i++) {
if(Math.abs(x[i]) < magnitudeThreshold) continue;
if(i > 0) sb.append(' ');
if(withIndices) sb.append(i+":");
sb.append(x[i]);
}
return sb.toString();
}
public static String joinWithIndices(double[] x) { return join(x, true, 0); }
// Format a matrix with spaces and newlines
public static String join(T[][] data, String[] rowNames, String[] colNames) {
int r0 = (colNames == null ? 0 : 1);
int c0 = (rowNames == null ? 0 : 1);
int R = data.length;
int C = data[0].length;
String[][] mat = new String[R+r0][C+r0];
for(int r = 0; r < R; r++)
for(int c = 0; c < C; c++)
mat[r+r0][c+c0] = ""+data[r][c];
if(rowNames != null)
for(int r = 0; r < R; r++)
mat[r+r0][0] = rowNames[r];
if(colNames != null)
for(int c = 0; c < C; c++)
mat[0][c+c0] = colNames[c];
int[] widths = new int[C+c0];
for(int c = 0; c < C+c0; c++) {
for(int r = 0; r < R+r0; r++) {
if(mat[r][c] == null) mat[r][c] = "";
widths[c] = Math.max(widths[c], mat[r][c].length());
}
}
StringBuilder buf = new StringBuilder();
for(int r = 0; r < R+r0; r++) {
for(int c = 0; c < C+c0; c++) {
String fmt = "%-"+(widths[c]+1)+"s";
buf.append(String.format(fmt, mat[r][c]));
}
buf.append("\n");
}
return buf.toString();
}
public static String toString(Object o) {
return o == null ? null : o.toString();
}
public static boolean isEmpty(String s) { return s == null || s.equals(""); }
public static String repeat(String s, int n) {
StringBuilder sb = new StringBuilder();
for(int i = 0; i < n; i++)
sb.append(s);
return sb.toString();
}
// Regular expression
public static Matcher match(String pattern, String s) {
return Pattern.compile(pattern).matcher(s);
}
public static String[] format(String fmt, Object... is) {
int n = Array.getLength(is[0]);
String[] out = new String[n];
for(int i = 0; i < n; i++) {
Object[] args = new Object[is.length];
for(int j = 0; j < is.length; j++)
args[j] = Array.get(is[j], i);
out[i] = String.format(fmt, args);
}
return out;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy