
org.fnlp.util.MyCollection Maven / Gradle / Ivy
/**
* This file is part of FNLP (formerly FudanNLP).
*
* FNLP is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* FNLP is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with FudanNLP. If not, see .
*
* Copyright 2009-2014 www.fnlp.org. All rights reserved.
*/
package org.fnlp.util;
import gnu.trove.iterator.TIntFloatIterator;
import gnu.trove.iterator.hash.TObjectHashIterator;
import gnu.trove.map.hash.TIntFloatHashMap;
import gnu.trove.map.hash.TObjectFloatHashMap;
import gnu.trove.set.hash.TCharHashSet;
import gnu.trove.set.hash.THashSet;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
/**
* 常用集合操作
* @author xpqiu
*
*/
public class MyCollection {
/**
* 由大到小排序
* @param map
* @return 数组下标
*/
public static int[] sort(TIntFloatHashMap tmap) {
HashMap map = new HashMap();
TIntFloatIterator it = tmap.iterator();
while (it.hasNext()) {
it.advance();
int id = it.key();
float val = it.value();
map.put(id, Math.abs(val));
}
it = null;
List list = sort(map);
int[] idx = new int[list.size()];
Iterator it1 = list.iterator();
int i=0;
while (it1.hasNext()) {
Entry entry = it1.next();
idx[i++] = (Integer) entry.getKey();
}
return idx;
}
/**
* 由大到小排序
* @param map
* @return
*/
public static List sort(Map map) {
LinkedList list = new LinkedList(map.entrySet());
Collections.sort(list, new Comparator() {
@Override
public int compare(Entry o1,Entry o2) {
// make sure the values implement Comparable
return -((Comparable) o1.getValue()).compareTo(o2.getValue());
}
});
return list;
}
public static void TSet2List(THashSet newset, ArrayList al) {
TObjectHashIterator it = newset.iterator();
while(it.hasNext()){
String s = it.next();
al.add(s);
}
}
/**
* 输出List到文件
* @param entryList
* @param file
* @param b 是否输出值域
*/
public static void write(List entryList, String file, boolean b) {
try {
BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(file), "UTF-8"));
Iterator it = entryList.iterator();
while (it.hasNext()) {
Entry entry = it.next();
bout.write(entry.getKey().toString());
if (b) {
bout.write("\t");
bout.write(entry.getValue().toString());
}
bout.write("\n");
}
bout.close();
} catch (Exception e) {
}
}
/**
* 将Map写到文件
* @param map
* @throws IOException
*/
public static void write(Map map,String file) throws IOException {
BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(file), "UTF-8"));
Iterator iterator = map.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry entry = (Map.Entry)iterator.next();
String key = entry.getKey().toString();
String v = entry.getValue().toString();
bout.append(key);
bout.append("\t");
bout.append(v);
bout.newLine();
}
bout.close();
}
/**
* 每行为一个字符集合
* @param path
* @return
* @throws IOException
*/
public static ArrayList loadTCharHashSetArray(String path) throws IOException{
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
} catch (FileNotFoundException e) {
System.out.print("没找到文件:"+path);
return null;
}
ArrayList setArray= new ArrayList();
String line = null;
int count=0;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
TCharHashSet set = new TCharHashSet();
for(int i=0;i loadTSet(String path,boolean b) throws IOException{
THashSet dict = new THashSet();
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
} catch (FileNotFoundException e) {
System.out.print("没找到文件:"+path);
return dict;
}
String line = null;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
if(b)
dict.add(line);
else{
String[] toks = line.split("\\s+");
for(String tok:toks)
dict.add(tok);
}
}
bfr.close();
return dict;
}
/**
* 每行为一个元素
* @param path
* @return
* @throws IOException
*/
public static THashSet loadTSet(String path) throws IOException{
return loadTSet(path,true);
}
/**
* 去除重复的集合
* @param path
* @return
* @throws IOException
*/
public static void cleanSet(String path) throws IOException{
THashSet set = loadTSet(path,true);
write(set, path);
}
/**
* 每行为一个元素
* @param path
* @return
* @throws IOException
*/
public static HashSet loadSet(String path) throws IOException{
return loadSet(path, true);
}
/**
* 每行为一个或多个元素
* @param path
* @param b true,每行为一个元素;false: 每行为多个元素
* @return
* @throws IOException
*/
public static HashSet loadSet(String path,boolean b) throws IOException{
HashSet dict = new HashSet();
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
} catch (FileNotFoundException e) {
System.out.print("没找到文件:"+path);
return dict;
}
String line = null;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
if(b)
dict.add(line);
else{
String[] toks = line.split("\\s+");
for(String tok:toks)
dict.add(tok);
}
}
return dict;
}
/**
* 每行为一个或多个元素
* @param path
* @param b true,每行为一个元素;false: 每行为多个元素
* @return
* @throws IOException
*/
public static Set loadSet(Set dict,String path,boolean b) throws IOException{
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
} catch (FileNotFoundException e) {
System.out.print("没找到文件:"+path);
return dict;
}
String line = null;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
if(b)
dict.add(line);
else{
String[] toks = line.split("\\s+");
for(String tok:toks)
dict.add(tok);
}
}
return dict;
}
public static TObjectFloatHashMap loadTStringFloatMap(String path) throws IOException {
TObjectFloatHashMap dict = new TObjectFloatHashMap();
BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path), "utf8"));
String line = null;
while ((line = bfr.readLine()) != null) {
if (line.length() == 0)
continue;
int idx = line.lastIndexOf("\t");
dict.put(line.substring(0, idx), Float.parseFloat(line.substring(idx + 1)));
}
bfr.close();
return dict;
}
/**
* 将文件读入到HashMap
* @param path
* @return
* @throws IOException
*/
public static HashMap loadStringStringMap(String path) throws IOException{
return loadStringStringMap(path,false);
}
/**
* 将文件读入到HashMap
* @param path
* @param isRevert 是否颠倒顺序
* @return
* @throws IOException
*/
public static HashMap loadStringStringMap(String path,boolean isRevert) throws IOException{
HashMap dict = new HashMap();
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
} catch (FileNotFoundException e) {
return dict;
}
String line = null;
int count=0;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
int idx = line.lastIndexOf("\t");
if(isRevert)
dict.put(line.substring(idx+1),line.substring(0,idx));
else
dict.put(line.substring(0,idx), line.substring(idx+1));
}
bfr.close();
return dict;
}
/**
* 将文件读入到HashMap
* @param path
* @return
* @throws IOException
*/
public static HashMap loadStringFloatMap(String path) throws IOException{
HashMap dict = new HashMap();
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(path),"utf8"));
} catch (FileNotFoundException e) {
return dict;
}
String line = null;
int count=0;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
int idx = line.lastIndexOf("\t");
String key = line.substring(0,idx);
String v = line.substring(idx+1);
dict.put(key, Float.parseFloat(v));
}
return dict;
}
/**
* 从多文件中读入Map
* @param sfiles
* @return
* @throws NumberFormatException
* @throws IOException
*/
public static HashMap loadStringFloatMapInMultiFiles(String sfiles) throws NumberFormatException, IOException {
HashMap map = new HashMap();
String[] files = sfiles.split(";");
for(String f:files){
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f),"utf8"));
String line;
while ((line = br.readLine()) != null) {
if(line.length()==0)
continue;
int idx = line.lastIndexOf("\t");
if(idx==-1)
continue;
String key = line.substring(0,idx);
float v = Float.parseFloat(line.substring(idx+1));
if (map.containsKey(key)) {
float tempV = map.get(key);
map.put(key, v + tempV);
}
else
map.put(key, v);
}
}
return map;
}
public static void write(Iterable set, String file) {
try {
BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(file), "UTF-8"));
Iterator it = set.iterator();
while (it.hasNext()) {
String entry = it.next().toString();
bout.write(entry);
bout.write("\n");
}
bout.close();
} catch (Exception e) {
}
}
public static HashMap> loadMultiValueSetMap(String path) throws IOException {
return loadMultiValueSetMap(new FileInputStream(path));
}
public static HashMap> loadMultiValueSetMap(InputStream is) throws IOException {
HashMap> dict = new HashMap>();
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(is,"utf8"));
} catch (Exception e) {
return dict;
}
String line = null;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
String[] toks = line.split("\\s");
HashSet v = dict.get(toks[0]);
if(v==null){
v = new HashSet();
}
for(int i=1;i loadMultiValueMap(String path) throws IOException {
return loadMultiValueMap(new FileInputStream(path));
}
public static HashMap loadMultiValueMap(InputStream is) throws IOException {
HashMap dict = new HashMap();
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(is,"utf8"));
} catch (Exception e) {
return dict;
}
String line = null;
int count=0;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
String[] toks = line.split("\\s");
String[] v = Arrays.copyOfRange(toks, 1, toks.length);
dict.put(toks[0], v);
}
return dict;
}
/**
* 写多值Map,Map结构为HashMap>
* @param map HashMap>
* @param file
* @see MyCollection#write(HashMap, String, boolean)
*/
public static void writeMultiValueMap(Map map, String file) {
writeMultiValueMap(map, file,true,"\t");
}
/**
* 写多值Map,Map结构为HashMap>
* @param map HashMap>
* @param file
*/
public static void writeMultiValueMap(Map> map, String file,boolean hasKey,String delim) {
try {
BufferedWriter bout = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(file), "UTF-8"));
Iterator>> it1 = map.entrySet().iterator();
while(it1.hasNext()){
Entry> entry = it1.next();
if(hasKey){
bout.write(entry.getKey());
bout.write("\t");
}
Collection val = entry.getValue();
if(val==null){
if(it1.hasNext())
bout.write("\n");
continue;
}
Iterator it = val.iterator();
while (it.hasNext()) {
String en = it.next();
bout.write(en);
if(it.hasNext())
bout.write(delim);
}
if(it1.hasNext())
bout.write("\n");
}
bout.close();
} catch (Exception e) {
System.err.println(e.toString());
e.printStackTrace();
}
}
/**
* 写多值Map,Map结构为HashMap>
* @param map HashMap>
* @param file
* @return
* @throws IOException
*/
public static HashSet> loadSetSet(String file) throws IOException {
HashSet> dict = new HashSet> ();
BufferedReader bfr;
try {
bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8"));
} catch (Exception e) {
return dict;
}
String line = null;
int count=0;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
HashSet set = new HashSet();
String[] toks = line.split("\\s");
for(String t:toks){
set.add(t);
}
dict.add(set);
}
return dict;
}
public static int isContain(THashSet set,
ArrayList subwords) {
int i = 0;
for(String s: subwords){
if(set.contains(s))
i++;
}
return i;
}
public static int getLength(THashSet set) {
int i = 0;
TObjectHashIterator it = set.iterator();
while(it.hasNext()){
String s = it.next();
if(s.length()>i)
i=s.length();
}
return i;
}
/**
* 从文件读入字符串数组
* @param file
* @param delim 分隔符
* @return
* @throws IOException
*/
public static ArrayList loadList(String file,String delim) throws IOException {
ArrayList list= new ArrayList();
BufferedReader bfr = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf8"));
String line = null;
while ((line = bfr.readLine()) != null) {
if(line.length()==0)
continue;
if(delim!=null){
String[] toks = line.split(delim);
for(String t:toks){
list.add(t);
}
}else{
list.add(line);
}
}
bfr.close();
return list;
}
public static List asList(String[] strs) {
ArrayList list= new ArrayList();
for(int i=0;i c2e,
String c2ePath) {
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy