edu.emory.mathcs.nlp.common.util.DSUtils Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2015, Emory University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.emory.mathcs.nlp.common.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Random;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import edu.emory.mathcs.nlp.common.collection.tuple.DoubleIntPair;
import edu.emory.mathcs.nlp.common.collection.tuple.Pair;
/**
* @author Jinho D. Choi ({@code [email protected]})
*/
public class DSUtils
{
private DSUtils() {}
static public Set createStringHashSet(InputStream in)
{
return createStringHashSet(in, true, false);
}
/**
* @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}.
* The file that the input-stream is created from consists of one entry per line.
*/
static public Set createStringHashSet(InputStream in, boolean trim, boolean decap)
{
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
Set set = new HashSet<>();
String line;
try
{
while ((line = reader.readLine()) != null)
{
if (trim)
{
line = line.trim();
if (line.isEmpty()) continue;
}
if (decap)
line = StringUtils.toLowerCase(line);
set.add(line);
}
}
catch (IOException e) {e.printStackTrace();}
return set;
}
static public Map createStringHashMap(InputStream in, CharTokenizer tokenizer)
{
return createStringHashMap(in, tokenizer, true);
}
/**
* @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}.
* The file that the input-stream is created from consists of one entry per line ("key""value").
*/
static public Map createStringHashMap(InputStream in, CharTokenizer tokenizer, boolean trim)
{
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
Map map = new HashMap<>();
String[] t;
String line;
try
{
while ((line = reader.readLine()) != null)
{
if (trim)
{
line = line.trim();
if (line.isEmpty()) continue;
}
t = tokenizer.tokenize(line);
map.put(t[0], t[1]);
}
}
catch (IOException e) {e.printStackTrace();}
return map;
}
static public >void sortReverseOrder(List list)
{
Collections.sort(list, Collections.reverseOrder());
}
static public >void sortReverseOrder(T[] array)
{
Arrays.sort(array, Collections.reverseOrder());
}
static public boolean hasIntersection(Collection col1, Collection col2)
{
if (col2.size() < col1.size())
{
Collection tmp = col1;
col1 = col2;
col2 = tmp;
}
for (T item : col1)
{
if (col2.contains(item))
return true;
}
return false;
}
/** @return a set containing all field values of this class. */
static public Set getFieldSet(Class> cs)
{
Set set = new HashSet<>();
try
{
for (Field f : cs.getFields())
set.add(f.get(cs).toString());
}
catch (IllegalArgumentException e) {e.printStackTrace();}
catch (IllegalAccessException e) {e.printStackTrace();}
return set;
}
/** @return the index'th item if exists; otherwise, {@code null}. */
static public T get(List list, int index)
{
return isRange(list, index) ? list.get(index) : null;
}
/** @return the index'th item if exists; otherwise, {@code null}. */
static public T get(T[] array, int index)
{
return isRange(array, index) ? array[index] : null;
}
/** @return the last item in the list if exists; otherwise, {@code null}. */
static public T getLast(List list)
{
return list.isEmpty() ? null : list.get(list.size()-1);
}
static public boolean isRange(List list, int index)
{
return 0 <= index && index < list.size();
}
static public boolean isRange(T[] array, int index)
{
return 0 <= index && index < array.length;
}
/**
* @param beginIndex inclusive
* @param endIndex exclusive
*/
static public int[] range(int beginIndex, int endIndex, int gap)
{
double d = MathUtils.divide(endIndex-beginIndex, gap);
if (d < 0) return new int[0];
int[] array = new int[MathUtils.ceil(d)];
int i, j;
if (beginIndex < endIndex)
{
for (i=beginIndex,j=0; iendIndex; i+=gap,j++)
array[j] = i;
}
return array;
}
static public int[] range(int size)
{
return range(0, size, 1);
}
static public void swap(int[] array, int index0, int index1)
{
int tmp = array[index0];
array[index0] = array[index1];
array[index1] = tmp;
}
static public void swap(List list, int index0, int index1)
{
T tmp = list.get(index0);
list.set(index0, list.get(index1));
list.set(index1, tmp);
}
static public void shuffle(int[] array, Random rand)
{
shuffle(array, rand, array.length);
}
/** Calls {@link #shuffle(List, Random, int)}, where {@code lastIndex = list.size()}. */
static public void shuffle(List list, Random rand)
{
shuffle(list, rand, list.size());
}
static public void shuffle(int[] array, Random rand, int lastIndex)
{
int i, j, size = lastIndex - 1;
for (i=0; ivoid shuffle(List list, Random rand, int lastIndex)
{
int i, j, size = lastIndex - 1;
for (i=0; i list, String[] array)
{
for (String item : array)
list.add(item);
}
static public void removeLast(List list)
{
if (!list.isEmpty()) list.remove(list.size()-1);
}
static public int max(int[] array)
{
int i, size = array.length;
int m = array[0];
for (i=1; iList>[] createEmptyListArray(int size)
{
List>[] array = new ArrayList>[size];
for (int i=0; i();
return array;
}
static public PriorityQueue>[] createEmptyPriorityQueueArray(int size, boolean ascending)
{
PriorityQueue>[] queue = new PriorityQueue>[size];
for (int i=0; i() : new PriorityQueue<>(Collections.reverseOrder());
return queue;
}
@SuppressWarnings("unchecked")
static public List toList(T... items)
{
return Arrays.stream(items).collect(Collectors.toList());
}
@SuppressWarnings("unchecked")
static public Set toHashSet(T... items)
{
return Arrays.stream(items).collect(Collectors.toSet());
}
static public Set merge(List> sets)
{
Set merge = new HashSet<>();
for (Set set : sets) merge.addAll(set);
return merge;
}
static public String[] toArray(Collection col)
{
if (col == null) return null;
String[] array = new String[col.size()];
col.toArray(array);
return array;
}
static public List removeAll(Collection source, Collection remove)
{
List list = new ArrayList<>(source);
list.removeAll(remove);
return list;
}
/** @return true if s2 is a subset of s1. */
static public boolean isSubset(Collection s1, Collection s2)
{
for (T t : s2)
{
if (!s1.contains(t))
return false;
}
return true;
}
static public Pair top2(double[] array)
{
int i, size = array.length;
DoubleIntPair fst, snd;
if (array[0] < array[1])
{
fst = toDoubleIntPair(array, 1);
snd = toDoubleIntPair(array, 0);
}
else
{
fst = toDoubleIntPair(array, 0);
snd = toDoubleIntPair(array, 1);
}
for (i=2; i(fst, snd);
}
static public Pair top2(double[] array, int[] include)
{
int i, j, size = include.length;
DoubleIntPair fst, snd;
if (array[include[0]] < array[include[1]])
{
fst = toDoubleIntPair(array, include[1]);
snd = toDoubleIntPair(array, include[0]);
}
else
{
fst = toDoubleIntPair(array, include[0]);
snd = toDoubleIntPair(array, include[1]);
}
for (j=2; j(fst, snd);
}
static public DoubleIntPair toDoubleIntPair(double[] array, int index)
{
return new DoubleIntPair(array[index], index);
}
static public Set getBagOfWords(String s, Pattern splitter)
{
Set set = new HashSet<>();
for (String t : splitter.split(s))
{
t = t.trim();
if (!t.isEmpty()) set.add(t);
}
return set;
}
static public Set getBagOfWords(InputStream in, Pattern splitter)
{
BufferedReader reader = IOUtils.createBufferedReader(in);
Set set = new HashSet<>();
String line;
try
{
while ((line = reader.readLine()) != null)
set.addAll(getBagOfWords(line, splitter));
}
catch (IOException e) {e.printStackTrace();}
return set;
}
static public Set getBagOfWords(String[] document, int ngram, String delim)
{
Set set = new HashSet<>();
int n, i, len = document.length;
for (n=0; n<=ngram; n++)
{
for (i=0; iSet createSet(T... array)
{
Set set = new HashSet<>();
for (T item : array) set.add(item);
return set;
}
static public void normalize01(float[] array)
{
float min = min(array);
float div = max(array) - min;
for (int i=0; i
© 2015 - 2024 Weber Informatics LLC | Privacy Policy