net.paoding.analysis.knife.PaodingMaker Maven / Gradle / Ivy
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package net.paoding.analysis.knife;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Method;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import net.paoding.analysis.Constants;
import net.paoding.analysis.analyzer.impl.MostWordsModeDictionariesCompiler;
import net.paoding.analysis.analyzer.impl.SortingDictionariesCompiler;
import net.paoding.analysis.dictionary.support.detection.Difference;
import net.paoding.analysis.dictionary.support.detection.DifferenceListener;
import net.paoding.analysis.exception.PaodingAnalysisException;
import net.paoding.analysis.ext.PaodingAnalyzerListener;
import org.apache.lucene.store.FSLockFactory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.NativeFSLockFactory;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
/**
*
* @author Zhiliang Wang [[email protected]]
*
* @since 2.0.0
*/
public class PaodingMaker {
public static final String DEFAULT_PROPERTIES_PATH = "classpath:paoding-analysis.properties";
private static final ESLogger log = Loggers.getLogger(PaodingMaker.class);
private static ObjectHolder propertiesHolder = new ObjectHolder();
private static ObjectHolder paodingHolder = new ObjectHolder();
public static PaodingAnalyzerListener listener = null;
// private static Dictionaries outDictionaries = null;
private PaodingMaker() {
}
// ----------------获取Paoding对象的方法-----------------------
/**
*
* 读取类路径下的paoding-analysis.properties文件,据之获取一个Paoding对象.
*
* 第一次调用本方法时,从该属性文件中读取配置,并创建一个新的Paoding对象,之后,如果
* 属性文件没有变更过,则每次调用本方法都将返回先前创建的Paoding对象。而不重新构建 Paoding对象。
*
*
* 如果配置文件没有变更,但词典文件有变更。仍然是返回同样的Paoding对象。而且是,只要
* 词典文件发生了变更,Paoding对象在一定时间内会收到更新的。所以返回的Paoding对象 一定是最新配置的。
*
*
*
* @return
*/
public static Paoding make() {
return make(DEFAULT_PROPERTIES_PATH);
}
/**
* 读取类指定路径的配置文件(如果配置文件放置在类路径下,则应该加"classpath:"为前缀),据之获取一个新的Paoding对象.
*
*
* 第一次调用本方法时,从该属性文件中读取配置,并创建一个新的Paoding对象,之后,如果
* 属性文件没有变更过,则每次调用本方法都将返回先前创建的Paoding对象。而不重新构建 Paoding对象。
*
*
* 如果配置文件没有变更,但词典文件有变更。仍然是返回同样的Paoding对象。而且是,只要
* 词典文件发生了变更,Paoding对象在一定时间内会收到更新的。所以返回的Paoding对象 一定是最新配置的。
*
* @param propertiesPath
* @return
*/
public static Paoding make(String propertiesPath) {
return make(getProperties(propertiesPath));
}
/**
* 根据给定的属性对象获取一个Paoding对象.
*
*
* @param p
* @return
*/
public static Paoding make(Properties p) {
postPropertiesLoaded(p);
return implMake(p);
}
// --------------------------------------------------
public static Properties getProperties() {
return getProperties(DEFAULT_PROPERTIES_PATH);
}
public static void setAnalyzerListener(PaodingAnalyzerListener listener) {
PaodingMaker.listener = listener;
}
public static Properties getProperties(String path) {
if (path == null) {
throw new NullPointerException("path should not be null!");
}
try {
//
Properties p = (Properties) propertiesHolder.get(path);
if (p == null || modified(p)) {
p = loadProperties(new Properties(), path);
propertiesHolder.set(path, p);
paodingHolder.remove(path);
postPropertiesLoaded(p);
String absolutePaths = p.getProperty("paoding.analysis.properties.files.absolutepaths");
log.info("config paoding analysis from: " + absolutePaths);
}
return p;
} catch (IOException e) {
throw new PaodingAnalysisException(e);
}
}
// -------------------私有 或 辅助方法----------------------------------
private static boolean modified(Properties p) throws IOException {
String lastModifieds = p.getProperty("paoding.analysis.properties.lastModifieds");
String[] lastModifedsArray = lastModifieds.split(";");
String files = p.getProperty("paoding.analysis.properties.files");
String[] filesArray = files.split(";");
for (int i = 0; i < filesArray.length; i++) {
File file = getFile(filesArray[i]);
if (file.exists() && !String.valueOf(getFileLastModified(file)).equals(lastModifedsArray[i])) {
return true;
}
}
return false;
}
private static Properties loadProperties(Properties p, String path) throws IOException {
URL url;
File file;
String absolutePath;
InputStream in;
// 若ifexists为真表示如果该文件存在则读取他的内容,不存在则忽略它
boolean skipWhenNotExists = false;
if (path.startsWith("ifexists:")) {
skipWhenNotExists = true;
path = path.substring("ifexists:".length());
}
if (path.startsWith("classpath:")) {
path = path.substring("classpath:".length());
url = getClassLoader().getResource(path);
if (url == null) {
if (skipWhenNotExists) {
return p;
}
throw new FileNotFoundException("Not found " + path + " in classpath.");
}
/*
* Fix issue 42 : 读取配置文件的一个Bug
*/
file = new File(getUrlPath(url));
in = url.openStream();
} else {
if (path.startsWith("dic-home:")) {
File dicHome = new File(getDicHome(p));
path = path.substring("dic-home:".length());
file = new File(dicHome, path);
} else {
file = new File(path);
}
if (skipWhenNotExists && !file.exists()) {
return p;
}
in = new FileInputStream(file);
}
absolutePath = file.getAbsolutePath();
p.load(in);
in.close();
String lastModifieds = p.getProperty("paoding.analysis.properties.lastModifieds");
String files = p.getProperty("paoding.analysis.properties.files");
String absolutePaths = p.getProperty("paoding.analysis.properties.files.absolutepaths");
if (lastModifieds == null) {
p.setProperty("paoding.dic.properties.path", path);
lastModifieds = String.valueOf(getFileLastModified(file));
files = path;
absolutePaths = absolutePath;
} else {
lastModifieds = lastModifieds + ";" + getFileLastModified(file);
files = files + ";" + path;
absolutePaths = absolutePaths + ";" + absolutePath;
}
p.setProperty("paoding.analysis.properties.lastModifieds", lastModifieds);
p.setProperty("paoding.analysis.properties.files", files);
p.setProperty("paoding.analysis.properties.files.absolutepaths", absolutePaths);
String importsValue = p.getProperty("paoding.imports");
if (importsValue != null) {
p.remove("paoding.imports");
String[] imports = importsValue.split(";");
for (int i = 0; i < imports.length; i++) {
loadProperties(p, imports[i]);
}
}
return p;
}
private static long getFileLastModified(File file) throws IOException {
String path = file.getPath();
int jarIndex = path.indexOf(".jar!");
if (jarIndex == -1) {
return file.lastModified();
} else {
path = path.replaceAll("%20", " ").replaceAll("\\\\", "/");
jarIndex = path.indexOf(".jar!");
int protocalIndex = path.indexOf(":");
String jarPath = path.substring(protocalIndex + ":".length(), jarIndex + ".jar".length());
File jarPathFile = new File(jarPath);
JarFile jarFile;
try {
jarFile = new JarFile(jarPathFile);
String entryPath = path.substring(jarIndex + ".jar!/".length());
JarEntry entry = jarFile.getJarEntry(entryPath);
return entry.getTime();
} catch (IOException e) {
System.err.println("error in handler path=" + path);
System.err.println("error in handler jarPath=" + jarPath);
throw e;
}
}
}
private static String getDicHome(Properties p) {
setDicHomeProperties(p);
return p.getProperty("paoding.dic.home.absolute.path");
}
private static void postPropertiesLoaded(Properties p) {
if ("done".equals(p.getProperty("paoding.analysis.postPropertiesLoaded"))) {
return;
}
setDicHomeProperties(p);
p.setProperty("paoding.analysis.postPropertiesLoaded", "done");
}
private static void setDicHomeProperties(Properties p) {
String dicHomeAbsultePath = p.getProperty("paoding.dic.home.absolute.path");
if (dicHomeAbsultePath != null) {
return;
}
// 获取词典安装目录配置:
// 如配置了PAODING_DIC_HOME环境变量,则将其作为字典的安装主目录
// 否则使用属性文件的paoding.dic.home配置
// 但是如果属性文件中强制配置paoding.dic.home.config-first=this,
// 则优先考虑属性文件的paoding.dic.home配置,
// 此时只有当属性文件没有配置paoding.dic.home时才会采用环境变量的配置
String dicHomeBySystemEnv = null;
try {
dicHomeBySystemEnv = getSystemEnv(Constants.ENV_PAODING_DIC_HOME);
} catch (Error e) {
log.warn("System.getenv() is not supported in JDK1.4. ");
}
String dicHome = getProperty(p, Constants.DIC_HOME);
if (dicHomeBySystemEnv != null) {
String first = getProperty(p, Constants.DIC_HOME_CONFIG_FIRST);
if (first != null && first.equalsIgnoreCase("this")) {
if (dicHome == null) {
dicHome = dicHomeBySystemEnv;
}
} else {
dicHome = dicHomeBySystemEnv;
}
}
// 如果环境变量和属性文件都没有配置词典安转目录
// 则尝试在当前目录和类路径下寻找是否有dic目录,
// 若有,则采纳他为paoding.dic.home
// 如果尝试后均失败,则抛出PaodingAnalysisException异常
if (dicHome == null) {
File f = new File("dic");
if (f.exists()) {
dicHome = "dic/";
} else {
URL url = PaodingMaker.class.getClassLoader().getResource("dic");
if (url != null) {
dicHome = "classpath:dic/";
}
}
}
if (dicHome == null) {
throw new PaodingAnalysisException(
"please set a system env PAODING_DIC_HOME or Config paoding.dic.home in paoding-dic-home.properties point to the dictionaries!");
}
// 规范化dicHome,并设置到属性文件对象中
dicHome = dicHome.replace('\\', '/');
if (!dicHome.endsWith("/")) {
dicHome = dicHome + "/";
}
p.setProperty(Constants.DIC_HOME, dicHome);// writer to the properites
// object
// 将dicHome转化为一个系统唯一的绝对路径,记录在属性对象中
File dicHomeFile = getFile(dicHome);
if (!dicHomeFile.exists()) {
throw new PaodingAnalysisException("not found the dic home dirctory! " + dicHomeFile.getAbsolutePath());
}
if (!dicHomeFile.isDirectory()) {
throw new PaodingAnalysisException("dic home should not be a file, but a directory!");
}
p.setProperty("paoding.dic.home.absolute.path", dicHomeFile.getAbsolutePath());
}
private static Paoding implMake(final Properties p) {
// 将要返回的Paoding对象,它可能是新创建的,也可能使用paodingHolder中已有的Paoding对象
Paoding paoding;
// 作为本次返回的Paoding对象在paodingHolder中的key,使之后同样的key不会重复创建Paoding对象
final Object paodingKey;
// 如果该属性对象是通过PaodingMaker由文件读入的,则必然存在paoding.dic.properties.path属性
// 详细请参考loadProperties方法)
String path = p.getProperty("paoding.dic.properties.path");
// 如果该属性由文件读入,则文件地址作为Paoding对象在paodingHolder中的key
if (path != null) {
paodingKey = path;
// 否则以属性文件作为其key,之后只要进来的是同一个属性对象,都返回同一个Paoding对象
} else {
paodingKey = p;
}
paoding = paodingHolder.get(paodingKey);
if (paoding != null) {
return paoding;
}
try {
paoding = createPaodingWithKnives(p);
final Paoding finalPaoding = paoding;
//
String compilerClassName = getProperty(p, Constants.ANALYZER_DICTIONARIES_COMPILER);
Class> compilerClass = null;
if (compilerClassName != null) {
compilerClass = Class.forName(compilerClassName);
}
if (compilerClass == null) {
String analyzerMode = getProperty(p, Constants.ANALYZER_MODE);
if ("most-words".equalsIgnoreCase(analyzerMode) || "default".equalsIgnoreCase(analyzerMode)) {
compilerClass = MostWordsModeDictionariesCompiler.class;
} else {
compilerClass = SortingDictionariesCompiler.class;
}
}
final DictionariesCompiler compiler = (DictionariesCompiler) compilerClass.newInstance();
new Function() {
public void run() throws Exception {
String LOCK_FILE = "write.lock";
String dicHome = p.getProperty("paoding.dic.home.absolute.path");
FSLockFactory FileLockFactory = new NativeFSLockFactory(dicHome);
Lock lock = FileLockFactory.makeLock(LOCK_FILE);
boolean obtained = false;
try {
obtained = lock.obtain(90000);
if (obtained) {
// 编译词典-对词典进行可能的处理,以符合分词器的要求
if (compiler.shouldCompile(p)) {
Dictionaries dictionaries = readUnCompiledDictionaries(p);
dictionaries.setAnalyzerListener(listener);
Paoding tempPaoding = createPaodingWithKnives(p);
setDictionaries(tempPaoding, dictionaries);
compiler.compile(dictionaries, tempPaoding, p);
}
// 使用编译后的词典
final Dictionaries dictionaries = compiler.readCompliedDictionaries(p);
dictionaries.setAnalyzerListener(listener);
setDictionaries(finalPaoding, dictionaries);
// 启动字典动态转载/卸载检测器
// 侦测时间间隔(秒)。默认为60秒。如果设置为0或负数则表示不需要进行检测
String intervalStr = getProperty(p, Constants.DIC_DETECTOR_INTERVAL);
int interval = Integer.parseInt(intervalStr);
if (interval > 0) {
dictionaries.startDetecting(interval, new DifferenceListener() {
public void on(Difference diff) throws Exception {
dictionaries.stopDetecting();
// 此处调用run方法,以当检测到**编译后**的词典变更/删除/增加时,
// 重新编译源词典、重新创建并启动dictionaries自检测
run();
}
});
}
}
} catch (LockObtainFailedException ex) {
log.error("Obtain " + LOCK_FILE + " in " + dicHome + " failed:" + ex.getMessage());
throw ex;
} catch (IOException ex) {
log.error("Obtain " + LOCK_FILE + " in " + dicHome + " failed:" + ex.getMessage());
throw ex;
} finally {
if (obtained) {
try {
lock.close(); //lock.release() 此方法被刪除了
} catch (Exception ex) {
}
}
}
}
}.run();
// Paoding对象创建成功!此时可以将它寄放到paodingHolder中,给下次重复利用
paodingHolder.set(paodingKey, paoding);
return paoding;
} catch (Exception e) {
throw new PaodingAnalysisException("", e);
}
}
private static Paoding createPaodingWithKnives(Properties p) throws Exception {
// 如果PaodingHolder中并没有缓存该属性文件或对象对应的Paoding对象,
// 则根据给定的属性创建一个新的Paoding对象,并在返回之前存入paodingHolder
Paoding paoding = new Paoding();
// 寻找传说中的Knife。。。。
final Map knifeMap = new HashMap();
final List knifeList = new LinkedList();
final List functions = new LinkedList();
Iterator> iter = p.entrySet().iterator();
while (iter.hasNext()) {
Map.Entry