All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fnlp.util.MyCollection Maven / Gradle / Ivy

/**
*  This file is part of FNLP (formerly FudanNLP).
*  
*  FNLP is free software: you can redistribute it and/or modify
*  it under the terms of the GNU Lesser General Public License as published by
*  the Free Software Foundation, either version 3 of the License, or
*  (at your option) any later version.
*  
*  FNLP is distributed in the hope that it will be useful,
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*  GNU Lesser General Public License for more details.
*  
*  You should have received a copy of the GNU General Public License
*  along with FudanNLP.  If not, see .
*  
*  Copyright 2009-2014 www.fnlp.org. All rights reserved. 
*/

package org.fnlp.util;

import gnu.trove.iterator.TIntFloatIterator;
import gnu.trove.iterator.hash.TObjectHashIterator;
import gnu.trove.map.hash.TIntFloatHashMap;
import gnu.trove.map.hash.TObjectFloatHashMap;
import gnu.trove.set.hash.TCharHashSet;
import gnu.trove.set.hash.THashSet;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
/**
 * 常用集合操作
 * @author xpqiu
 *
 */
public class MyCollection {

	/**
	 * 由大到小排序
	 * @param map
	 * @return 数组下标
	 */
	public static int[] sort(TIntFloatHashMap tmap) {
		HashMap map = new HashMap();

		TIntFloatIterator it = tmap.iterator();
		while (it.hasNext()) {
			it.advance();
			int id = it.key();
			float val = it.value();
			map.put(id, Math.abs(val));
		}
		it = null;

		List list = sort(map);
		int[] idx = new int[list.size()];
		Iterator it1 = list.iterator();
		int i=0;
		while (it1.hasNext()) {
			Entry entry = it1.next();
			idx[i++] = (Integer) entry.getKey();
		}
		return idx;
	}

	/**
	 * 由大到小排序
	 * @param map
	 * @return
	 */
	public static List sort(Map map) {
		LinkedList list = new LinkedList(map.entrySet());

		Collections.sort(list, new Comparator() {
			@Override
			public int compare(Entry o1,Entry o2) {
				// make sure the values implement Comparable
				return -((Comparable) o1.getValue()).compareTo(o2.getValue());
			}
		});
		return list;
	}

	public static void TSet2List(THashSet newset, ArrayList al) {
		TObjectHashIterator it = newset.iterator();
		while(it.hasNext()){
			String s = it.next();
			al.add(s);
		}

	}
	/**
	 * 输出List到文件
	 * @param entryList
	 * @param file
	 * @param b 是否输出值域
	 */
	public static void write(List entryList, String file, boolean b) {
		try {
			BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
					new FileOutputStream(file), "UTF-8"));
			Iterator it = entryList.iterator();
			while (it.hasNext()) {
				Entry entry = it.next();
				bout.write(entry.getKey().toString());
				if (b) {
					bout.write("\t");
					bout.write(entry.getValue().toString());
				}
				bout.write("\n");
			}
			bout.close();

		} catch (Exception e) {

		}
	}
	/**
	 * 将Map写到文件
	 * @param map
	 * @throws IOException 
	 */
	public static void write(Map map,String file) throws IOException {
		BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
				new FileOutputStream(file), "UTF-8"));
		Iterator iterator = map.entrySet().iterator();
		while (iterator.hasNext()) {
			Map.Entry entry = (Map.Entry)iterator.next();
			String key = entry.getKey().toString();
			String v = entry.getValue().toString();
			bout.append(key);
			bout.append("\t");
			bout.append(v);
			bout.newLine();
		}
		bout.close();
	}

	/**
	 * 每行为一个字符集合
	 * @param path
	 * @return
	 * @throws IOException
	 */
	public static ArrayList loadTCharHashSetArray(String path) throws IOException{


		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
		} catch (FileNotFoundException e) {
			System.out.print("没找到文件:"+path);
			return null;
		}
		ArrayList setArray= new ArrayList();
		String line = null;			
		int count=0;

		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			TCharHashSet set = new TCharHashSet();
			for(int i=0;i loadTSet(String path,boolean b) throws IOException{

		THashSet dict = new THashSet();
		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
		} catch (FileNotFoundException e) {
			System.out.print("没找到文件:"+path);
			return dict;
		}
		String line = null;		
		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			if(b)
				dict.add(line);
			else{
				String[] toks = line.split("\\s+");
				for(String tok:toks)
					dict.add(tok);
			}
		}
		bfr.close();
		return dict;
	}

	/**
	 * 每行为一个元素
	 * @param path
	 * @return
	 * @throws IOException
	 */
	public static THashSet loadTSet(String path) throws IOException{
		return loadTSet(path,true);
	}
	/**
	 * 去除重复的集合
	 * @param path
	 * @return
	 * @throws IOException
	 */
	public static void cleanSet(String path) throws IOException{
		THashSet set = loadTSet(path,true);
		write(set, path);
	}
	

	/**
	 * 每行为一个元素
	 * @param path
	 * @return
	 * @throws IOException
	 */
	public static HashSet loadSet(String path) throws IOException{
		return loadSet(path, true);
	}
	/**
	 * 每行为一个或多个元素
	 * @param path
	 * @param b true,每行为一个元素;false: 每行为多个元素
	 * @return
	 * @throws IOException
	 */
	public static HashSet loadSet(String path,boolean b) throws IOException{
		HashSet dict = new HashSet();
		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
		} catch (FileNotFoundException e) {
			System.out.print("没找到文件:"+path);
			return dict;
		}
		String line = null;		

		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			
			if(b)
				dict.add(line);
			else{
				String[] toks = line.split("\\s+");
				for(String tok:toks)
					dict.add(tok);
			}
		}
		return dict;
	}
	
	/**
	 * 每行为一个或多个元素
	 * @param path
	 * @param b true,每行为一个元素;false: 每行为多个元素
	 * @return
	 * @throws IOException
	 */
	public static Set loadSet(Set dict,String path,boolean b) throws IOException{

		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
		} catch (FileNotFoundException e) {
			System.out.print("没找到文件:"+path);
			return dict;
		}
		String line = null;		

		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			
			if(b)
				dict.add(line);
			else{
				String[] toks = line.split("\\s+");
				for(String tok:toks)
					dict.add(tok);
			}
		}
		return dict;
	}

	public static TObjectFloatHashMap loadTStringFloatMap(String path) throws IOException {
		TObjectFloatHashMap dict = new TObjectFloatHashMap();
		BufferedReader 	bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path), "utf8"));

		String line = null;
		while ((line = bfr.readLine()) != null) {
			if (line.length() == 0)
				continue;
			int idx = line.lastIndexOf("\t");
			dict.put(line.substring(0, idx), Float.parseFloat(line.substring(idx + 1)));
		}
		bfr.close();
		return dict;
	}

	/**
	 * 将文件读入到HashMap
	 * @param path
	 * @return
	 * @throws IOException
	 */
	public static HashMap loadStringStringMap(String path) throws IOException{
		return loadStringStringMap(path,false);
	}

	/**
	 * 将文件读入到HashMap
	 * @param path
	 * @param isRevert 是否颠倒顺序
	 * @return
	 * @throws IOException
	 */
	public static HashMap loadStringStringMap(String path,boolean isRevert) throws IOException{

		HashMap dict = new HashMap();
		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
		} catch (FileNotFoundException e) {
			return dict;
		}
		String line = null;			
		int count=0;

		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			int idx = line.lastIndexOf("\t");
			if(isRevert)
				dict.put(line.substring(idx+1),line.substring(0,idx));
			else
				dict.put(line.substring(0,idx), line.substring(idx+1));		
		}
		bfr.close();
		return dict;
	}


	/**
	 * 将文件读入到HashMap
	 * @param path
	 * @return
	 * @throws IOException
	 */
	public static HashMap loadStringFloatMap(String path) throws IOException{

		HashMap dict = new HashMap();
		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
		} catch (FileNotFoundException e) {
			return dict;
		}
		String line = null;			
		int count=0;

		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			int idx = line.lastIndexOf("\t");
			String key = line.substring(0,idx);
			String v = line.substring(idx+1);
			dict.put(key, Float.parseFloat(v));		
		}
		return dict;
	}

	/**
	 * 从多文件中读入Map
	 * @param sfiles
	 * @return
	 * @throws NumberFormatException
	 * @throws IOException
	 */
	public static HashMap loadStringFloatMapInMultiFiles(String sfiles) throws NumberFormatException, IOException {
		HashMap map = new HashMap();

		String[] files = sfiles.split(";");
		for(String f:files){
			BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f),"utf8"));
			String line;
			while ((line = br.readLine()) != null) {
				if(line.length()==0)
					continue;
				int idx = line.lastIndexOf("\t");
				if(idx==-1)
					continue;
				String key = line.substring(0,idx);
				float v = Float.parseFloat(line.substring(idx+1));
				if (map.containsKey(key)) {
					float tempV = map.get(key);
					map.put(key, v + tempV);
				}
				else
					map.put(key, v);
			}
		}
		return map;	
	}

	public static void write(Iterable set, String file) {
		try {
			BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
					new FileOutputStream(file), "UTF-8"));
			Iterator it = set.iterator();
			while (it.hasNext()) {
				String entry = it.next().toString();
				bout.write(entry);
				bout.write("\n");
			}
			bout.close();

		} catch (Exception e) {

		}
	}
	
	
	public static HashMap> loadMultiValueSetMap(String path) throws IOException {
		return loadMultiValueSetMap(new FileInputStream(path));
	}

	public static HashMap> loadMultiValueSetMap(InputStream is) throws IOException {
		HashMap> dict = new HashMap>();
		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(is,"utf8"));
		} catch (Exception e) {
			return dict;
		}
		String line = null;
		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			String[] toks = line.split("\\s");
			HashSet v = dict.get(toks[0]);
			if(v==null){
				v = new HashSet();
			}
			for(int i=1;i loadMultiValueMap(String path) throws IOException {
		return loadMultiValueMap(new FileInputStream(path));
	}

	public static HashMap loadMultiValueMap(InputStream is) throws IOException {
		HashMap dict = new HashMap();
		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(is,"utf8"));
		} catch (Exception e) {
			return dict;
		}
		String line = null;			
		int count=0;

		while ((line = bfr.readLine()) != null) {
			if(line.length()==0)
				continue;
			String[] toks = line.split("\\s");
			String[] v = Arrays.copyOfRange(toks, 1, toks.length);
			dict.put(toks[0], v);		
		}
		return dict;
	}
	
	
	/**
	 * 写多值Map,Map结构为HashMap>
	 * @param map HashMap>
	 * @param file
	 * @see MyCollection#write(HashMap, String, boolean)
	 */
	public static void writeMultiValueMap(Map map,	String file) {
		writeMultiValueMap(map, file,true,"\t");
	}

	/**
	 * 写多值Map,Map结构为HashMap>
	 * @param map HashMap>
	 * @param file
	 */
	public static void writeMultiValueMap(Map> map,	String file,boolean hasKey,String delim) {

		try {
			BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
					new FileOutputStream(file), "UTF-8"));
			Iterator>> it1 = map.entrySet().iterator();
			while(it1.hasNext()){
				Entry> entry = it1.next();
				if(hasKey){
					bout.write(entry.getKey());
					bout.write("\t");
				}
				Collection val = entry.getValue();
				if(val==null){
					if(it1.hasNext())
						bout.write("\n");
					continue;
				}
				Iterator it = val.iterator();
				while (it.hasNext()) {
					String en = it.next();
					bout.write(en);
					if(it.hasNext())
						bout.write(delim);
				}
				if(it1.hasNext())
					bout.write("\n");
			}
			bout.close();

		} catch (Exception e) {
			System.err.println(e.toString());
			e.printStackTrace();
			
		}
	}

	/**
	 * 写多值Map,Map结构为HashMap>
	 * @param map HashMap>
	 * @param file
	 * @return 
	 * @throws IOException 
	 */
	public static HashSet> loadSetSet(String file) throws IOException {
		HashSet> dict = new HashSet> ();
		BufferedReader bfr;
		try {
			bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8"));
		} catch (Exception e) {
			return dict;
		}
		String line = null;			
		int count=0;

		while ((line = bfr.readLine()) != null) {			
			if(line.length()==0)
				continue;
			HashSet set = new HashSet();
			String[] toks = line.split("\\s");
			for(String t:toks){
				set.add(t);
			}
			dict.add(set);
		}
		return dict;
	}



	public static int isContain(THashSet set,
			ArrayList subwords) {
		int i = 0;
		for(String s: subwords){
			if(set.contains(s))
				i++;
		}
		return i;
	}


	public static int getLength(THashSet set) {
		int i = 0;
		TObjectHashIterator it = set.iterator();
		while(it.hasNext()){
			String s = it.next();
			if(s.length()>i)
				i=s.length();
		}
		return i;
	}
	/**
	 * 从文件读入字符串数组
	 * @param file
	 * @param delim 分隔符
	 * @return
	 * @throws IOException
	 */
	public static ArrayList loadList(String file,String delim) throws IOException {
		ArrayList list= new ArrayList();
		BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8"));

		String line = null;			

		while ((line = bfr.readLine()) != null) {			
			if(line.length()==0)
				continue;			
			if(delim!=null){
				String[] toks = line.split(delim);
				for(String t:toks){
					list.add(t);
				}
			}else{
				list.add(line);
			}
		}
		bfr.close();
		return list;
	}

	public static List asList(String[] strs) {
		ArrayList list= new ArrayList();
		for(int i=0;i c2e,
			String c2ePath) {
		
		
	}



}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy