org.ansj.library.UserDefineLibrary Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ansj_seg Show documentation
Show all versions of ansj_seg Show documentation
best java chinese word seg !
package org.ansj.library;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import love.cq.domain.Forest;
import love.cq.domain.Value;
import love.cq.library.Library;
import love.cq.util.IOUtil;
import love.cq.util.StringUtil;
import org.ansj.util.MyStaticValue;
import static org.ansj.util.MyStaticValue.LIBRARYLOG;
;
/**
* 用户自定义词典操作类
*
* @author ansj
*/
public class UserDefineLibrary {
public static final String DEFAULT_NATURE = "userDefine";
public static final Integer DEFAULT_FREQ = 1000;
public static final String DEFAULT_FREQ_STR = "1000";
public static Forest FOREST = null;
public static Forest ambiguityForest = null;
private static final HashMap userForestMap = new HashMap();
static {
initUserLibrary();
initAmbiguityLibrary();
}
/**
* 关键词增加
*
* @param keyWord
* 所要增加的关键词
* @param nature
* 关键词的词性
* @param freq
* 关键词的词频
*/
public static void insertWord(String keyword, String nature, int freq) {
String[] paramers = new String[2];
paramers[0] = nature;
paramers[1] = String.valueOf(freq);
Value value = new Value(keyword, paramers);
Library.insertWord(FOREST, value);
}
/**
* 加载纠正词典
*/
private static void initAmbiguityLibrary() {
// TODO Auto-generated method stub
String ambiguityLibrary = MyStaticValue.ambiguityLibrary;
if (StringUtil.isBlank(ambiguityLibrary)) {
LIBRARYLOG.warning("init ambiguity waring :" + ambiguityLibrary
+ " because : not find that file or can not to read !");
return;
}
try {
ambiguityLibrary = MyStaticValue.ambiguityLibrary;
} catch (Exception exception) {
LIBRARYLOG.warning("init ambiguity waring :" + ambiguityLibrary
+ " because : not find that file or can not to read !");
}
File file = new File(ambiguityLibrary);
if (file.isFile() && file.canRead()) {
try {
ambiguityForest = Library.makeForest(ambiguityLibrary);
} catch (Exception e) {
// TODO Auto-generated catch block
LIBRARYLOG.warning("init ambiguity error :" + ambiguityLibrary
+ " because : not find that file or can not to read !");
e.printStackTrace();
}
LIBRARYLOG.info("init ambiguityLibrary ok!");
} else {
LIBRARYLOG.warning("init ambiguity waring :" + ambiguityLibrary
+ " because : not find that file or can not to read !");
}
}
/**
* 加载用户自定义词典和补充词典
*/
private static void initUserLibrary() {
// TODO Auto-generated method stub
try {
FOREST = new Forest();
// 加载用户自定义词典
String userLibrary = MyStaticValue.userLibrary;
loadLibrary(FOREST, userLibrary);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// 单个文件加载词典
public static void loadFile(Forest forest, File file) {
// TODO Auto-generated method stub
if (!file.canRead()) {
LIBRARYLOG.warning("file in path " + file.getAbsolutePath() + " can not to read!");
return;
}
String temp = null;
BufferedReader br = null;
String[] strs = null;
Value value = null;
try {
br = IOUtil.getReader(new FileInputStream(file), "UTF-8");
while ((temp = br.readLine()) != null) {
if (StringUtil.isBlank(temp)) {
continue;
} else {
strs = temp.split("\t");
if (strs.length != 3) {
value = new Value(strs[0], DEFAULT_NATURE, DEFAULT_FREQ_STR);
} else {
value = new Value(strs[0], strs[1], strs[2]);
}
Library.insertWord(forest, value);
}
}
LIBRARYLOG.info("init user userLibrary ok path is : " + file.getAbsolutePath());
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
IOUtil.close(br);
br = null;
}
}
/**
* 加载词典,传入一本词典的路径.或者目录.词典后缀必须为.dic
*/
public static void loadLibrary(Forest forest, String path) {
// 加载用户自定义词典
File file = null;
if (path != null) {
file = new File(path);
if (!file.canRead() || file.isHidden()) {
LIBRARYLOG.warning("init userLibrary waring :" + path
+ " because : not find that file or can not to read !");
return;
}
if (file.isFile()) {
loadFile(forest, file);
} else if (file.isDirectory()) {
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].getName().trim().endsWith(".dic")) {
loadFile(forest, files[i]);
}
}
} else {
LIBRARYLOG.warning("init user library error :" + path
+ " because : not find that file !");
}
}
}
/**
* 删除关键词
*/
public static void removeWord(String word) {
Library.removeWord(FOREST, word);
}
/**
* 将用户自定义词典清空
*/
public static void clear() {
FOREST.clear();
}
public static HashMap getUserForestMap() {
return userForestMap;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy