All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.emory.mathcs.nlp.common.util.DSUtils Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2015, Emory University
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.emory.mathcs.nlp.common.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Random;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import edu.emory.mathcs.nlp.common.collection.tuple.DoubleIntPair;
import edu.emory.mathcs.nlp.common.collection.tuple.Pair;

/**
 * @author Jinho D. Choi ({@code [email protected]})
 */
public class DSUtils
{
	private DSUtils() {}
	
	static public Set createStringHashSet(InputStream in)
	{
		return createStringHashSet(in, true, false);
	}
	
	/**
	 * @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}.
	 * The file that the input-stream is created from consists of one entry per line. 
	 */
	static public Set createStringHashSet(InputStream in, boolean trim, boolean decap)
	{
		BufferedReader reader = new BufferedReader(new InputStreamReader(in));
		Set set = new HashSet<>();
		String line;

		try
		{
			while ((line = reader.readLine()) != null)
			{
				if (trim)
				{
					line = line.trim();
					if (line.isEmpty()) continue;
				}
				
				if (decap)
					line = StringUtils.toLowerCase(line);
				
				set.add(line);
			}			
		}
		catch (IOException e) {e.printStackTrace();}
		
		return set;
	}
	
	static public Map createStringHashMap(InputStream in, CharTokenizer tokenizer)
	{
		return createStringHashMap(in, tokenizer, true);
	}
	
	/**
	 * @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}.
	 * The file that the input-stream is created from consists of one entry per line ("key""value").
	 */
	static public Map createStringHashMap(InputStream in, CharTokenizer tokenizer, boolean trim)
	{
		BufferedReader reader = new BufferedReader(new InputStreamReader(in));
		Map map = new HashMap<>();
		String[] t;
		String line;
		
		try
		{
			while ((line = reader.readLine()) != null)
			{
				if (trim)
				{
					line = line.trim();
					if (line.isEmpty()) continue;
				}
				
				t = tokenizer.tokenize(line);
				map.put(t[0], t[1]);
			}			
		}
		catch (IOException e) {e.printStackTrace();}
		
		return map;
	}

	static public >void sortReverseOrder(List list)
	{
		Collections.sort(list, Collections.reverseOrder());
	}
	
	static public >void sortReverseOrder(T[] array)
	{
		Arrays.sort(array, Collections.reverseOrder());
	}

	static public boolean hasIntersection(Collection col1, Collection col2)
	{
		if (col2.size() < col1.size())
		{
			Collection tmp = col1;
			col1 = col2;
			col2 = tmp;
		}
		
		for (T item : col1)
		{
			if (col2.contains(item))
				return true;
		}
		
		return false;
	}

	/** @return a set containing all field values of this class. */
	static public Set getFieldSet(Class cs)
	{
		Set set = new HashSet<>();
		
		try
		{
			for (Field f : cs.getFields())
				set.add(f.get(cs).toString());
		}
		catch (IllegalArgumentException e) {e.printStackTrace();}
		catch (IllegalAccessException e)   {e.printStackTrace();}
		
		return set;
	}
	
	/** @return the index'th item if exists; otherwise, {@code null}. */
	static public T get(List list, int index)
	{
		return isRange(list, index) ? list.get(index) : null;
	}
	
	/** @return the index'th item if exists; otherwise, {@code null}. */
	static public T get(T[] array, int index)
	{
		return isRange(array, index) ? array[index] : null;
	}
	
	/** @return the last item in the list if exists; otherwise, {@code null}. */
	static public T getLast(List list)
	{
		return list.isEmpty() ? null : list.get(list.size()-1);
	}

	static public boolean isRange(List list, int index)
	{
		return 0 <= index && index < list.size();
	}
	
	static public boolean isRange(T[] array, int index)
	{
		return 0 <= index && index < array.length;
	}
	
	/**
	 * @param beginIndex inclusive
	 * @param endIndex exclusive
	 */
	static public int[] range(int beginIndex, int endIndex, int gap)
	{
		double d = MathUtils.divide(endIndex-beginIndex, gap);
		if (d < 0) return new int[0];
		
		int[] array = new int[MathUtils.ceil(d)];
		int i, j;
		
		if (beginIndex < endIndex)
		{
			for (i=beginIndex,j=0; iendIndex; i+=gap,j++)
				array[j] = i;
		}
	
		return array;
	}

	static public int[] range(int size)
	{
		return range(0, size, 1);
	}
	
	static public void swap(int[] array, int index0, int index1)
	{
		int tmp = array[index0];
		array[index0] = array[index1];
		array[index1] = tmp;
	}
	
	static public void swap(List list, int index0, int index1)
	{
		T tmp = list.get(index0);
		list.set(index0, list.get(index1));
		list.set(index1, tmp);
	}
	
	static public void shuffle(int[] array, Random rand)
	{
		shuffle(array, rand, array.length);
	}
	
	/** Calls {@link #shuffle(List, Random, int)}, where {@code lastIndex = list.size()}. */
	static public void shuffle(List list, Random rand)
	{
		shuffle(list, rand, list.size());
	}
	
	static public void shuffle(int[] array, Random rand, int lastIndex)
	{
		int i, j, size = lastIndex - 1;
		
		for (i=0; ivoid shuffle(List list, Random rand, int lastIndex)
	{
		int i, j, size = lastIndex - 1;
		
		for (i=0; i list, String[] array)
	{
		for (String item : array)
			list.add(item);
	}
	
	static public void removeLast(List list)
	{
		if (!list.isEmpty()) list.remove(list.size()-1);
	}
	
	static public int max(int[] array)
	{
		int i, size = array.length;
		int m = array[0];
		
		for (i=1; iList[] createEmptyListArray(int size)
	{
		List[] array = new ArrayList[size];
		
		for (int i=0; i();
		
		return array;
	}
	
	static public PriorityQueue[] createEmptyPriorityQueueArray(int size, boolean ascending)
	{
		PriorityQueue[] queue = new PriorityQueue[size];
		
		for (int i=0; i() : new PriorityQueue<>(Collections.reverseOrder());
		
		return queue;
	}
	
	@SuppressWarnings("unchecked")
	static public List toList(T... items)
	{
		return Arrays.stream(items).collect(Collectors.toList());
	}
	
	@SuppressWarnings("unchecked")
	static public Set toHashSet(T... items)
	{
		return Arrays.stream(items).collect(Collectors.toSet());
	}
	
	static public Set merge(List> sets)
	{
		Set merge = new HashSet<>();
		for (Set set : sets) merge.addAll(set);
		return merge;
	}
	
	static public String[] toArray(Collection col)
	{
		if (col == null) return null;
		String[] array = new String[col.size()];
		col.toArray(array);
		return array;
	}
	
	static public List removeAll(Collection source, Collection remove)
	{
		List list = new ArrayList<>(source);
		list.removeAll(remove);
		return list;
	}
	
	/** @return true if s2 is a subset of s1. */
	static public boolean isSubset(Collection s1, Collection s2)
	{
		for (T t : s2)
		{
			if (!s1.contains(t))
				return false;
		}
		
		return true;
	}

	static public Pair top2(double[] array)
	{
		int i, size = array.length;
		DoubleIntPair fst, snd;
		
		if (array[0] < array[1])
		{
			fst = toDoubleIntPair(array, 1);
			snd = toDoubleIntPair(array, 0);
		}
		else
		{
			fst = toDoubleIntPair(array, 0);
			snd = toDoubleIntPair(array, 1);			
		}
		
		for (i=2; i(fst, snd);
	}
	
	static public Pair top2(double[] array, int[] include)
	{
		int i, j, size = include.length;
		DoubleIntPair fst, snd;
		
		if (array[include[0]] < array[include[1]])
		{
			fst = toDoubleIntPair(array, include[1]);
			snd = toDoubleIntPair(array, include[0]);
		}
		else
		{
			fst = toDoubleIntPair(array, include[0]);
			snd = toDoubleIntPair(array, include[1]);
		}
		
		for (j=2; j(fst, snd);
	}
	
	static public DoubleIntPair toDoubleIntPair(double[] array, int index)
	{
		return new DoubleIntPair(array[index], index);
	}
	
	static public Set getBagOfWords(String s, Pattern splitter)
	{
		Set set = new HashSet<>();
		
		for (String t : splitter.split(s))
		{
			t = t.trim();
			if (!t.isEmpty()) set.add(t);
		}
		
		return set;
	}
	
	static public Set getBagOfWords(InputStream in, Pattern splitter)
	{
		BufferedReader reader = IOUtils.createBufferedReader(in);
		Set set = new HashSet<>();
		String line;
		
		try
		{
			while ((line = reader.readLine()) != null) 
				set.addAll(getBagOfWords(line, splitter));
		}
		catch (IOException e) {e.printStackTrace();}
		
		return set;
	}
	
	static public Set getBagOfWords(String[] document, int ngram, String delim)
	{
		Set set = new HashSet<>();
		int n, i, len = document.length;
		
		for (n=0; n<=ngram; n++)
		{
			for (i=0; iSet createSet(T... array)
	{
		Set set = new HashSet<>();
		for (T item : array) set.add(item);
		return set;
	}
	
	static public void normalize01(float[] array)
	{
		float min = min(array);
		float div = max(array) - min;
		
		for (int i=0; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy