All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.paoding.analysis.knife.PaodingMaker Maven / Gradle / Ivy

/**
 * Copyright 2007 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.paoding.analysis.knife;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Method;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;

import net.paoding.analysis.Constants;
import net.paoding.analysis.analyzer.impl.MostWordsModeDictionariesCompiler;
import net.paoding.analysis.analyzer.impl.SortingDictionariesCompiler;
import net.paoding.analysis.dictionary.support.detection.Difference;
import net.paoding.analysis.dictionary.support.detection.DifferenceListener;
import net.paoding.analysis.exception.PaodingAnalysisException;
import net.paoding.analysis.ext.PaodingAnalyzerListener;

import org.apache.lucene.store.FSLockFactory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.NativeFSLockFactory;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;

/**
 * 
 * @author Zhiliang Wang [[email protected]]
 * 
 * @since 2.0.0
 */
public class PaodingMaker {

	public static final String DEFAULT_PROPERTIES_PATH = "classpath:paoding-analysis.properties";

	private static final ESLogger log = Loggers.getLogger(PaodingMaker.class);

	private static ObjectHolder propertiesHolder = new ObjectHolder();

	private static ObjectHolder paodingHolder = new ObjectHolder();

	public static PaodingAnalyzerListener listener = null;

	//	private static Dictionaries outDictionaries = null;

	private PaodingMaker() {

	}

	// ----------------获取Paoding对象的方法-----------------------

	/**
	 * 
	 * 读取类路径下的paoding-analysis.properties文件,据之获取一个Paoding对象.
	 * 

* 第一次调用本方法时,从该属性文件中读取配置,并创建一个新的Paoding对象,之后,如果 * 属性文件没有变更过,则每次调用本方法都将返回先前创建的Paoding对象。而不重新构建 Paoding对象。 *

* * 如果配置文件没有变更,但词典文件有变更。仍然是返回同样的Paoding对象。而且是,只要 * 词典文件发生了变更,Paoding对象在一定时间内会收到更新的。所以返回的Paoding对象 一定是最新配置的。 * * * * @return */ public static Paoding make() { return make(DEFAULT_PROPERTIES_PATH); } /** * 读取类指定路径的配置文件(如果配置文件放置在类路径下,则应该加"classpath:"为前缀),据之获取一个新的Paoding对象. *

* * 第一次调用本方法时,从该属性文件中读取配置,并创建一个新的Paoding对象,之后,如果 * 属性文件没有变更过,则每次调用本方法都将返回先前创建的Paoding对象。而不重新构建 Paoding对象。 *

* * 如果配置文件没有变更,但词典文件有变更。仍然是返回同样的Paoding对象。而且是,只要 * 词典文件发生了变更,Paoding对象在一定时间内会收到更新的。所以返回的Paoding对象 一定是最新配置的。 * * @param propertiesPath * @return */ public static Paoding make(String propertiesPath) { return make(getProperties(propertiesPath)); } /** * 根据给定的属性对象获取一个Paoding对象. *

* * @param p * @return */ public static Paoding make(Properties p) { postPropertiesLoaded(p); return implMake(p); } // -------------------------------------------------- public static Properties getProperties() { return getProperties(DEFAULT_PROPERTIES_PATH); } public static void setAnalyzerListener(PaodingAnalyzerListener listener) { PaodingMaker.listener = listener; } public static Properties getProperties(String path) { if (path == null) { throw new NullPointerException("path should not be null!"); } try { // Properties p = (Properties) propertiesHolder.get(path); if (p == null || modified(p)) { p = loadProperties(new Properties(), path); propertiesHolder.set(path, p); paodingHolder.remove(path); postPropertiesLoaded(p); String absolutePaths = p.getProperty("paoding.analysis.properties.files.absolutepaths"); log.info("config paoding analysis from: " + absolutePaths); } return p; } catch (IOException e) { throw new PaodingAnalysisException(e); } } // -------------------私有 或 辅助方法---------------------------------- private static boolean modified(Properties p) throws IOException { String lastModifieds = p.getProperty("paoding.analysis.properties.lastModifieds"); String[] lastModifedsArray = lastModifieds.split(";"); String files = p.getProperty("paoding.analysis.properties.files"); String[] filesArray = files.split(";"); for (int i = 0; i < filesArray.length; i++) { File file = getFile(filesArray[i]); if (file.exists() && !String.valueOf(getFileLastModified(file)).equals(lastModifedsArray[i])) { return true; } } return false; } private static Properties loadProperties(Properties p, String path) throws IOException { URL url; File file; String absolutePath; InputStream in; // 若ifexists为真表示如果该文件存在则读取他的内容,不存在则忽略它 boolean skipWhenNotExists = false; if (path.startsWith("ifexists:")) { skipWhenNotExists = true; path = path.substring("ifexists:".length()); } if (path.startsWith("classpath:")) { path = path.substring("classpath:".length()); url = getClassLoader().getResource(path); if (url == null) { if (skipWhenNotExists) { return p; } throw new FileNotFoundException("Not found " + path + " in classpath."); } /* * Fix issue 42 : 读取配置文件的一个Bug */ file = new File(getUrlPath(url)); in = url.openStream(); } else { if (path.startsWith("dic-home:")) { File dicHome = new File(getDicHome(p)); path = path.substring("dic-home:".length()); file = new File(dicHome, path); } else { file = new File(path); } if (skipWhenNotExists && !file.exists()) { return p; } in = new FileInputStream(file); } absolutePath = file.getAbsolutePath(); p.load(in); in.close(); String lastModifieds = p.getProperty("paoding.analysis.properties.lastModifieds"); String files = p.getProperty("paoding.analysis.properties.files"); String absolutePaths = p.getProperty("paoding.analysis.properties.files.absolutepaths"); if (lastModifieds == null) { p.setProperty("paoding.dic.properties.path", path); lastModifieds = String.valueOf(getFileLastModified(file)); files = path; absolutePaths = absolutePath; } else { lastModifieds = lastModifieds + ";" + getFileLastModified(file); files = files + ";" + path; absolutePaths = absolutePaths + ";" + absolutePath; } p.setProperty("paoding.analysis.properties.lastModifieds", lastModifieds); p.setProperty("paoding.analysis.properties.files", files); p.setProperty("paoding.analysis.properties.files.absolutepaths", absolutePaths); String importsValue = p.getProperty("paoding.imports"); if (importsValue != null) { p.remove("paoding.imports"); String[] imports = importsValue.split(";"); for (int i = 0; i < imports.length; i++) { loadProperties(p, imports[i]); } } return p; } private static long getFileLastModified(File file) throws IOException { String path = file.getPath(); int jarIndex = path.indexOf(".jar!"); if (jarIndex == -1) { return file.lastModified(); } else { path = path.replaceAll("%20", " ").replaceAll("\\\\", "/"); jarIndex = path.indexOf(".jar!"); int protocalIndex = path.indexOf(":"); String jarPath = path.substring(protocalIndex + ":".length(), jarIndex + ".jar".length()); File jarPathFile = new File(jarPath); JarFile jarFile; try { jarFile = new JarFile(jarPathFile); String entryPath = path.substring(jarIndex + ".jar!/".length()); JarEntry entry = jarFile.getJarEntry(entryPath); return entry.getTime(); } catch (IOException e) { System.err.println("error in handler path=" + path); System.err.println("error in handler jarPath=" + jarPath); throw e; } } } private static String getDicHome(Properties p) { setDicHomeProperties(p); return p.getProperty("paoding.dic.home.absolute.path"); } private static void postPropertiesLoaded(Properties p) { if ("done".equals(p.getProperty("paoding.analysis.postPropertiesLoaded"))) { return; } setDicHomeProperties(p); p.setProperty("paoding.analysis.postPropertiesLoaded", "done"); } private static void setDicHomeProperties(Properties p) { String dicHomeAbsultePath = p.getProperty("paoding.dic.home.absolute.path"); if (dicHomeAbsultePath != null) { return; } // 获取词典安装目录配置: // 如配置了PAODING_DIC_HOME环境变量,则将其作为字典的安装主目录 // 否则使用属性文件的paoding.dic.home配置 // 但是如果属性文件中强制配置paoding.dic.home.config-first=this, // 则优先考虑属性文件的paoding.dic.home配置, // 此时只有当属性文件没有配置paoding.dic.home时才会采用环境变量的配置 String dicHomeBySystemEnv = null; try { dicHomeBySystemEnv = getSystemEnv(Constants.ENV_PAODING_DIC_HOME); } catch (Error e) { log.warn("System.getenv() is not supported in JDK1.4. "); } String dicHome = getProperty(p, Constants.DIC_HOME); if (dicHomeBySystemEnv != null) { String first = getProperty(p, Constants.DIC_HOME_CONFIG_FIRST); if (first != null && first.equalsIgnoreCase("this")) { if (dicHome == null) { dicHome = dicHomeBySystemEnv; } } else { dicHome = dicHomeBySystemEnv; } } // 如果环境变量和属性文件都没有配置词典安转目录 // 则尝试在当前目录和类路径下寻找是否有dic目录, // 若有,则采纳他为paoding.dic.home // 如果尝试后均失败,则抛出PaodingAnalysisException异常 if (dicHome == null) { File f = new File("dic"); if (f.exists()) { dicHome = "dic/"; } else { URL url = PaodingMaker.class.getClassLoader().getResource("dic"); if (url != null) { dicHome = "classpath:dic/"; } } } if (dicHome == null) { throw new PaodingAnalysisException( "please set a system env PAODING_DIC_HOME or Config paoding.dic.home in paoding-dic-home.properties point to the dictionaries!"); } // 规范化dicHome,并设置到属性文件对象中 dicHome = dicHome.replace('\\', '/'); if (!dicHome.endsWith("/")) { dicHome = dicHome + "/"; } p.setProperty(Constants.DIC_HOME, dicHome);// writer to the properites // object // 将dicHome转化为一个系统唯一的绝对路径,记录在属性对象中 File dicHomeFile = getFile(dicHome); if (!dicHomeFile.exists()) { throw new PaodingAnalysisException("not found the dic home dirctory! " + dicHomeFile.getAbsolutePath()); } if (!dicHomeFile.isDirectory()) { throw new PaodingAnalysisException("dic home should not be a file, but a directory!"); } p.setProperty("paoding.dic.home.absolute.path", dicHomeFile.getAbsolutePath()); } private static Paoding implMake(final Properties p) { // 将要返回的Paoding对象,它可能是新创建的,也可能使用paodingHolder中已有的Paoding对象 Paoding paoding; // 作为本次返回的Paoding对象在paodingHolder中的key,使之后同样的key不会重复创建Paoding对象 final Object paodingKey; // 如果该属性对象是通过PaodingMaker由文件读入的,则必然存在paoding.dic.properties.path属性 // 详细请参考loadProperties方法) String path = p.getProperty("paoding.dic.properties.path"); // 如果该属性由文件读入,则文件地址作为Paoding对象在paodingHolder中的key if (path != null) { paodingKey = path; // 否则以属性文件作为其key,之后只要进来的是同一个属性对象,都返回同一个Paoding对象 } else { paodingKey = p; } paoding = paodingHolder.get(paodingKey); if (paoding != null) { return paoding; } try { paoding = createPaodingWithKnives(p); final Paoding finalPaoding = paoding; // String compilerClassName = getProperty(p, Constants.ANALYZER_DICTIONARIES_COMPILER); Class compilerClass = null; if (compilerClassName != null) { compilerClass = Class.forName(compilerClassName); } if (compilerClass == null) { String analyzerMode = getProperty(p, Constants.ANALYZER_MODE); if ("most-words".equalsIgnoreCase(analyzerMode) || "default".equalsIgnoreCase(analyzerMode)) { compilerClass = MostWordsModeDictionariesCompiler.class; } else { compilerClass = SortingDictionariesCompiler.class; } } final DictionariesCompiler compiler = (DictionariesCompiler) compilerClass.newInstance(); new Function() { public void run() throws Exception { String LOCK_FILE = "write.lock"; String dicHome = p.getProperty("paoding.dic.home.absolute.path"); FSLockFactory FileLockFactory = new NativeFSLockFactory(dicHome); Lock lock = FileLockFactory.makeLock(LOCK_FILE); boolean obtained = false; try { obtained = lock.obtain(90000); if (obtained) { // 编译词典-对词典进行可能的处理,以符合分词器的要求 if (compiler.shouldCompile(p)) { Dictionaries dictionaries = readUnCompiledDictionaries(p); dictionaries.setAnalyzerListener(listener); Paoding tempPaoding = createPaodingWithKnives(p); setDictionaries(tempPaoding, dictionaries); compiler.compile(dictionaries, tempPaoding, p); } // 使用编译后的词典 final Dictionaries dictionaries = compiler.readCompliedDictionaries(p); dictionaries.setAnalyzerListener(listener); setDictionaries(finalPaoding, dictionaries); // 启动字典动态转载/卸载检测器 // 侦测时间间隔(秒)。默认为60秒。如果设置为0或负数则表示不需要进行检测 String intervalStr = getProperty(p, Constants.DIC_DETECTOR_INTERVAL); int interval = Integer.parseInt(intervalStr); if (interval > 0) { dictionaries.startDetecting(interval, new DifferenceListener() { public void on(Difference diff) throws Exception { dictionaries.stopDetecting(); // 此处调用run方法,以当检测到**编译后**的词典变更/删除/增加时, // 重新编译源词典、重新创建并启动dictionaries自检测 run(); } }); } } } catch (LockObtainFailedException ex) { log.error("Obtain " + LOCK_FILE + " in " + dicHome + " failed:" + ex.getMessage()); throw ex; } catch (IOException ex) { log.error("Obtain " + LOCK_FILE + " in " + dicHome + " failed:" + ex.getMessage()); throw ex; } finally { if (obtained) { try { lock.close(); //lock.release() 此方法被刪除了 } catch (Exception ex) { } } } } }.run(); // Paoding对象创建成功!此时可以将它寄放到paodingHolder中,给下次重复利用 paodingHolder.set(paodingKey, paoding); return paoding; } catch (Exception e) { throw new PaodingAnalysisException("", e); } } private static Paoding createPaodingWithKnives(Properties p) throws Exception { // 如果PaodingHolder中并没有缓存该属性文件或对象对应的Paoding对象, // 则根据给定的属性创建一个新的Paoding对象,并在返回之前存入paodingHolder Paoding paoding = new Paoding(); // 寻找传说中的Knife。。。。 final Map knifeMap = new HashMap(); final List knifeList = new LinkedList(); final List functions = new LinkedList(); Iterator> iter = p.entrySet().iterator(); while (iter.hasNext()) { Map.Entry e = iter.next(); final String key = (String) e.getKey(); final String value = (String) e.getValue(); int index = key.indexOf(Constants.KNIFE_CLASS); if (index == 0 && key.length() > Constants.KNIFE_CLASS.length()) { final int end = key.indexOf('.', Constants.KNIFE_CLASS.length()); if (end == -1) { Class clazz = Class.forName(value); Knife knife = (Knife) clazz.newInstance(); knifeList.add(knife); knifeMap.put(key, knife); log.info("add knike: " + value); } else { // 由于属性对象属于hash表,key的读取顺序不和文件的顺序一致,不能保证属性设置时,knife对象已经创建 // 所以这里只定义函数放到functions中,待到所有的knife都创建之后,在执行该程序 functions.add(new Function() { public void run() throws Exception { String knifeName = key.substring(0, end); Object obj = knifeMap.get(knifeName); if (!obj.getClass().getName().equals("org.springframework.beans.BeanWrapperImpl")) { Class beanWrapperImplClass = Class.forName("org.springframework.beans.BeanWrapperImpl"); Method setWrappedInstance = beanWrapperImplClass.getMethod("setWrappedInstance", new Class[] { Object.class }); Object beanWrapperImpl = beanWrapperImplClass.newInstance(); setWrappedInstance.invoke(beanWrapperImpl, new Object[] { obj }); knifeMap.put(knifeName, (Knife) beanWrapperImpl); obj = beanWrapperImpl; } String propertyName = key.substring(end + 1); Method setPropertyValue = obj.getClass().getMethod("setPropertyValue", new Class[] { String.class, Object.class }); setPropertyValue.invoke(obj, new Object[] { propertyName, value }); } }); } } } // 完成所有留后执行的程序 for (Iterator iterator = functions.iterator(); iterator.hasNext();) { Function function = (Function) iterator.next(); function.run(); } // 把刀交给庖丁 paoding.setKnives(knifeList); return paoding; } private static Dictionaries readUnCompiledDictionaries(Properties p) { String skipPrefix = getProperty(p, Constants.DIC_SKIP_PREFIX); String noiseCharactor = getProperty(p, Constants.DIC_NOISE_CHARACTOR); String noiseWord = getProperty(p, Constants.DIC_NOISE_WORD); String unit = getProperty(p, Constants.DIC_UNIT); String confucianFamilyName = getProperty(p, Constants.DIC_CONFUCIAN_FAMILY_NAME); String combinatorics = getProperty(p, Constants.DIC_FOR_COMBINATORICS); String charsetName = getProperty(p, Constants.DIC_CHARSET); int maxWordLen = Integer.valueOf(getProperty(p, Constants.DIC_MAXWORDLEN)); Dictionaries dictionaries = new FileDictionaries(getDicHome(p), skipPrefix, noiseCharactor, noiseWord, unit, confucianFamilyName, combinatorics, charsetName, maxWordLen); return dictionaries; } private static void setDictionaries(Paoding paoding, Dictionaries dictionaries) { for (Knife knife : paoding.getKnives()) { if (knife instanceof DictionariesWare) { ((DictionariesWare) knife).setDictionaries(dictionaries); } } } private static String getUrlPath(URL url) { if (url == null) return null; String urlPath = null; try { urlPath = url.toURI().getPath(); } catch (URISyntaxException e) { } if (urlPath == null) { urlPath = url.getFile(); } return urlPath; } private static File getFile(String path) { File file; URL url; if (path.startsWith("classpath:")) { path = path.substring("classpath:".length()); url = getClassLoader().getResource(path); /* * Fix issue 42 : 读取配置文件的一个Bug */ if (url != null) { path = getUrlPath(url); } final boolean fileExist = url != null; file = new File(path) { private static final long serialVersionUID = 4009013298629147887L; public boolean exists() { return fileExist; } }; } else { file = new File(path); } return file; } private static ClassLoader getClassLoader() { ClassLoader loader = Thread.currentThread().getContextClassLoader(); if (loader == null) { loader = PaodingMaker.class.getClassLoader(); } return loader; } private static String getProperty(Properties p, String name) { return Constants.getProperty(p, name); } // -------------------------------------------------------------------- private static class ObjectHolder { private ObjectHolder() { } private Map objects = new HashMap(); public T get(Object name) { return objects.get(name); } public void set(Object name, T object) { objects.put(name, object); } public void remove(Object name) { objects.remove(name); } } private static interface Function { public void run() throws Exception; } private static String getSystemEnv(String name) { try { return System.getenv(name); } catch (Error error) { String osName = System.getProperty("os.name").toLowerCase(); try { String cmd; if (osName.indexOf("win") != -1) { cmd = "cmd /c SET"; } else { cmd = "/usr/bin/printenv"; } Process process = Runtime.getRuntime().exec(cmd); InputStreamReader isr = new InputStreamReader(process.getInputStream()); BufferedReader br = new BufferedReader(isr); String line; while ((line = br.readLine()) != null && line.startsWith(name)) { int index = line.indexOf(name + "="); if (index != -1) { return line.substring(index + name.length() + 1); } } } catch (Exception e) { log.warn("unable to read env from os." + e.getMessage(), e); } } return null; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy