com.jaeksoft.searchlib.analysis.Analyzer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2008-2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.analysis;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import javax.xml.xpath.XPathExpressionException;
import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.tokenizer.TokenizerFactory;
import com.jaeksoft.searchlib.config.Config;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
public class Analyzer {
final private ReadWriteLock rwl = new ReadWriteLock();
private TokenizerFactory queryTokenizer;
private TokenizerFactory indexTokenizer;
private List filters;
private String name;
private LanguageEnum lang;
private Config config;
private CompiledAnalyzer queryAnalyzer;
private CompiledAnalyzer indexAnalyzer;
public Analyzer(Config config) throws SearchLibException, ClassNotFoundException {
name = null;
this.config = config;
lang = LanguageEnum.UNDEFINED;
setQueryTokenizer(TokenizerFactory.getDefaultTokenizer(config));
setIndexTokenizer(TokenizerFactory.getDefaultTokenizer(config));
filters = new ArrayList();
queryAnalyzer = null;
indexAnalyzer = null;
}
public void copyFrom(Analyzer source) throws SearchLibException, ClassNotFoundException {
rwl.w.lock();
try {
source.copyTo(this);
} finally {
rwl.w.unlock();
}
}
private void copyTo(Analyzer target) throws SearchLibException, ClassNotFoundException {
rwl.r.lock();
try {
target.name = this.name;
target.lang = this.lang;
target.queryTokenizer = (TokenizerFactory) ClassFactory.create(queryTokenizer);
target.indexTokenizer = (TokenizerFactory) ClassFactory.create(indexTokenizer);
target.filters = new ArrayList();
for (FilterFactory filter : filters)
target.filters.add((FilterFactory) FilterFactory.create(filter));
target.config = this.config;
target.queryAnalyzer = null;
target.indexAnalyzer = null;
} finally {
rwl.r.unlock();
}
}
private Analyzer(Config config, XPathParser xpp, Node node) throws SearchLibException {
this.config = config;
this.name = XPathParser.getAttributeString(node, "name");
this.lang = LanguageEnum.findByCode(XPathParser.getAttributeString(node, "lang"));
this.filters = new ArrayList();
this.queryAnalyzer = null;
this.indexAnalyzer = null;
}
public void recompile() {
rwl.w.lock();
try {
if (queryAnalyzer != null)
queryAnalyzer.close();
queryAnalyzer = null;
if (indexAnalyzer != null)
indexAnalyzer.close();
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the tokenizer
*/
public TokenizerFactory getIndexTokenizer() {
rwl.r.lock();
try {
return indexTokenizer;
} finally {
rwl.r.unlock();
}
}
/**
* @return the tokenizer
*/
public TokenizerFactory getQueryTokenizer() {
rwl.r.lock();
try {
return queryTokenizer;
} finally {
rwl.r.unlock();
}
}
/**
* @param tokenizer
* the tokenizer to set
* @throws SearchLibException
* inherited error
* @throws ClassNotFoundException
* inherited error
*/
public void setIndexTokenizer(TokenizerFactory tokenizer) throws SearchLibException, ClassNotFoundException {
rwl.w.lock();
try {
this.indexTokenizer = TokenizerFactory.create(tokenizer);
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @param tokenizer
* the tokenizer to set
* @throws SearchLibException
* inherited error
* @throws ClassNotFoundException
* inherited error
*/
public void setQueryTokenizer(TokenizerFactory tokenizer) throws SearchLibException, ClassNotFoundException {
rwl.w.lock();
try {
this.queryTokenizer = TokenizerFactory.create(tokenizer);
queryAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the filters
*/
public List getFilters() {
rwl.r.lock();
try {
return filters;
} finally {
rwl.r.unlock();
}
}
/**
* @param filters
* the filters to set
*/
public void setFilters(List filters) {
rwl.w.lock();
try {
this.filters = filters;
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the name
*/
public String getName() {
rwl.r.lock();
try {
return name;
} finally {
rwl.r.unlock();
}
}
/**
* @param name
* the name to set
*/
public void setName(String name) {
rwl.w.lock();
try {
this.name = name == null ? null : name.intern();
} finally {
rwl.w.unlock();
}
}
/**
* @return the lang
*/
public LanguageEnum getLang() {
rwl.r.lock();
try {
return lang;
} finally {
rwl.r.unlock();
}
}
/**
* @param lang
* the lang to set
*/
public void setLang(LanguageEnum lang) {
rwl.w.lock();
try {
this.lang = lang;
} finally {
rwl.w.unlock();
}
}
public void add(FilterFactory filter) throws SearchLibException {
rwl.w.lock();
try {
filters.add(filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
public void add(Collection filters) {
rwl.w.lock();
try {
this.filters.addAll(filters);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* Replace the old filter by the new one
*
* @param oldFilter
* removed filter
* @param newFilter
* new filter
*/
public void replace(FilterFactory oldFilter, FilterFactory newFilter) {
rwl.w.lock();
try {
int pos = 0;
for (FilterFactory filter : filters)
if (filter == oldFilter) {
filters.set(pos, newFilter);
queryAnalyzer = null;
indexAnalyzer = null;
return;
} else
pos++;
} finally {
rwl.w.unlock();
}
}
/**
*
* @param config
* current config
* @param xpp
* and XPathParser instance
* @param node
* the current node
* @return a new analyzer
* @throws SearchLibException
* inherited error
* @throws XPathExpressionException
* inherited error
* @throws ClassNotFoundException
* inherited error
* @throws DOMException
* inherited error
*/
public static Analyzer fromXmlConfig(Config config, XPathParser xpp, Node node)
throws SearchLibException, XPathExpressionException, DOMException, ClassNotFoundException {
if (node == null)
return null;
Analyzer analyzer = new Analyzer(config, xpp, node);
String indexTokenizer = XPathParser.getAttributeString(node, "tokenizer");
if (indexTokenizer != null)
analyzer.setIndexTokenizer(TokenizerFactory.create(config, indexTokenizer));
NodeList nodes = xpp.getNodeList(node, "tokenizer");
if (nodes.getLength() > 0)
analyzer.setIndexTokenizer(TokenizerFactory.create(config, nodes.item(0)));
nodes = xpp.getNodeList(node, "queryTokenizer");
if (nodes.getLength() > 0)
analyzer.setQueryTokenizer(TokenizerFactory.create(config, nodes.item(0)));
else
analyzer.setQueryTokenizer(TokenizerFactory.create(analyzer.indexTokenizer));
nodes = xpp.getNodeList(node, "filter");
for (int i = 0; i < nodes.getLength(); i++) {
Node n = nodes.item(i);
FilterFactory filter = FilterFactory.create(config, n);
analyzer.add(filter);
}
return analyzer;
}
public void writeXmlConfig(XmlWriter writer) throws SAXException {
rwl.r.lock();
try {
writer.startElement("analyzer", "name", getName(), "lang", lang != null ? lang.getCode() : null);
if (indexTokenizer != null)
indexTokenizer.writeXmlConfig("tokenizer", writer);
if (queryTokenizer != null)
queryTokenizer.writeXmlConfig("queryTokenizer", writer);
if (filters != null && filters.size() > 0)
for (FilterFactory filter : filters)
filter.writeXmlConfig(writer);
writer.endElement();
} finally {
rwl.r.unlock();
}
}
/**
* Move filter up
*
* @param filter
* the filter to move
*/
public void filterUp(FilterFactory filter) {
rwl.w.lock();
try {
int i = filters.indexOf(filter);
if (i == -1 || i == 0)
return;
filters.remove(i);
filters.add(i - 1, filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* Move filter down
*
* @param filter
* the filter to move
*/
public void filterDown(FilterFactory filter) {
rwl.w.lock();
try {
int i = filters.indexOf(filter);
if (i == -1 || i == filters.size() - 1)
return;
filters.remove(i);
filters.add(i + 1, filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* Remove the filter
*
* @param filter
* the filter to remove
*/
public void filterRemove(FilterFactory filter) {
rwl.w.lock();
try {
filters.remove(filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the compiled analyzer for queries
* @throws SearchLibException
* inherited error
*/
public CompiledAnalyzer getQueryAnalyzer() throws SearchLibException {
rwl.r.lock();
try {
if (queryAnalyzer != null)
return queryAnalyzer;
} finally {
rwl.r.unlock();
}
rwl.w.lock();
try {
if (queryAnalyzer != null)
return queryAnalyzer;
queryAnalyzer = new CompiledAnalyzer(queryTokenizer, filters, FilterScope.QUERY);
return queryAnalyzer;
} finally {
rwl.w.unlock();
}
}
/**
*
* @return the compiled analyzer for indexation
* @throws SearchLibException
* inherited error
*/
public CompiledAnalyzer getIndexAnalyzer() throws SearchLibException {
rwl.r.lock();
try {
if (indexAnalyzer != null)
return indexAnalyzer;
} finally {
rwl.r.unlock();
}
rwl.w.lock();
try {
if (indexAnalyzer != null)
return indexAnalyzer;
indexAnalyzer = new CompiledAnalyzer(indexTokenizer, filters, FilterScope.INDEX);
return indexAnalyzer;
} finally {
rwl.w.unlock();
}
}
}