
com.jaeksoft.searchlib.analysis.Analyzer Maven / Gradle / Ivy
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2008-2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.analysis;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import javax.xml.xpath.XPathExpressionException;
import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.tokenizer.TokenizerFactory;
import com.jaeksoft.searchlib.config.Config;
import com.jaeksoft.searchlib.util.ReadWriteLock;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
public class Analyzer {
final private ReadWriteLock rwl = new ReadWriteLock();
private TokenizerFactory queryTokenizer;
private TokenizerFactory indexTokenizer;
private List filters;
private String name;
private LanguageEnum lang;
private Config config;
private CompiledAnalyzer queryAnalyzer;
private CompiledAnalyzer indexAnalyzer;
public Analyzer(Config config) throws SearchLibException,
ClassNotFoundException {
name = null;
this.config = config;
lang = LanguageEnum.UNDEFINED;
setQueryTokenizer(TokenizerFactory.getDefaultTokenizer(config));
setIndexTokenizer(TokenizerFactory.getDefaultTokenizer(config));
filters = new ArrayList();
queryAnalyzer = null;
indexAnalyzer = null;
}
public void copyFrom(Analyzer source) throws SearchLibException,
ClassNotFoundException {
rwl.w.lock();
try {
source.copyTo(this);
} finally {
rwl.w.unlock();
}
}
private void copyTo(Analyzer target) throws SearchLibException,
ClassNotFoundException {
rwl.r.lock();
try {
target.name = this.name;
target.lang = this.lang;
target.queryTokenizer = (TokenizerFactory) ClassFactory
.create(queryTokenizer);
target.indexTokenizer = (TokenizerFactory) ClassFactory
.create(indexTokenizer);
target.filters = new ArrayList();
for (FilterFactory filter : filters)
target.filters
.add((FilterFactory) FilterFactory.create(filter));
target.config = this.config;
target.queryAnalyzer = null;
target.indexAnalyzer = null;
} finally {
rwl.r.unlock();
}
}
private Analyzer(Config config, XPathParser xpp, Node node)
throws SearchLibException {
this.config = config;
this.name = XPathParser.getAttributeString(node, "name");
this.lang = LanguageEnum.findByCode(XPathParser.getAttributeString(
node, "lang"));
this.filters = new ArrayList();
this.queryAnalyzer = null;
this.indexAnalyzer = null;
}
public void recompile() {
rwl.w.lock();
try {
if (queryAnalyzer != null)
queryAnalyzer.close();
queryAnalyzer = null;
if (indexAnalyzer != null)
indexAnalyzer.close();
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the tokenizer
*/
public TokenizerFactory getIndexTokenizer() {
rwl.r.lock();
try {
return indexTokenizer;
} finally {
rwl.r.unlock();
}
}
/**
* @return the tokenizer
*/
public TokenizerFactory getQueryTokenizer() {
rwl.r.lock();
try {
return queryTokenizer;
} finally {
rwl.r.unlock();
}
}
/**
* @param tokenizer
* the tokenizer to set
* @throws SearchLibException
* @throws ClassNotFoundException
*/
public void setIndexTokenizer(TokenizerFactory tokenizer)
throws SearchLibException, ClassNotFoundException {
rwl.w.lock();
try {
this.indexTokenizer = TokenizerFactory.create(tokenizer);
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @param tokenizer
* the tokenizer to set
* @throws SearchLibException
* @throws ClassNotFoundException
*/
public void setQueryTokenizer(TokenizerFactory tokenizer)
throws SearchLibException, ClassNotFoundException {
rwl.w.lock();
try {
this.queryTokenizer = TokenizerFactory.create(tokenizer);
queryAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the filters
*/
public List getFilters() {
rwl.r.lock();
try {
return filters;
} finally {
rwl.r.unlock();
}
}
/**
* @param filters
* the filters to set
*/
public void setFilters(List filters) {
rwl.w.lock();
try {
this.filters = filters;
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the name
*/
public String getName() {
rwl.r.lock();
try {
return name;
} finally {
rwl.r.unlock();
}
}
/**
* @param name
* the name to set
*/
public void setName(String name) {
rwl.w.lock();
try {
this.name = name == null ? null : name.intern();
} finally {
rwl.w.unlock();
}
}
/**
* @return the lang
*/
public LanguageEnum getLang() {
rwl.r.lock();
try {
return lang;
} finally {
rwl.r.unlock();
}
}
/**
* @param lang
* the lang to set
*/
public synchronized void setLang(LanguageEnum lang) {
rwl.w.lock();
try {
this.lang = lang;
} finally {
rwl.w.unlock();
}
}
public void add(FilterFactory filter) throws SearchLibException {
rwl.w.lock();
try {
filters.add(filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
public void add(Collection filters) {
rwl.w.lock();
try {
this.filters.addAll(filters);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* Replace the old filter by the new one
*
* @param oldFilter
* @param newFilter
*/
public void replace(FilterFactory oldFilter, FilterFactory newFilter) {
rwl.w.lock();
try {
int pos = 0;
for (FilterFactory filter : filters)
if (filter == oldFilter) {
filters.set(pos, newFilter);
queryAnalyzer = null;
indexAnalyzer = null;
return;
} else
pos++;
} finally {
rwl.w.unlock();
}
}
/**
*
* @param config
* @param xpp
* @param node
* @return
* @throws SearchLibException
* @throws XPathExpressionException
* @throws ClassNotFoundException
* @throws DOMException
*/
public static Analyzer fromXmlConfig(Config config, XPathParser xpp,
Node node) throws SearchLibException, XPathExpressionException,
DOMException, ClassNotFoundException {
if (node == null)
return null;
Analyzer analyzer = new Analyzer(config, xpp, node);
String indexTokenizer = XPathParser.getAttributeString(node,
"tokenizer");
if (indexTokenizer != null)
analyzer.setIndexTokenizer(TokenizerFactory.create(config,
indexTokenizer));
NodeList nodes = xpp.getNodeList(node, "tokenizer");
if (nodes.getLength() > 0)
analyzer.setIndexTokenizer(TokenizerFactory.create(config,
nodes.item(0)));
nodes = xpp.getNodeList(node, "queryTokenizer");
if (nodes.getLength() > 0)
analyzer.setQueryTokenizer(TokenizerFactory.create(config,
nodes.item(0)));
else
analyzer.setQueryTokenizer(TokenizerFactory
.create(analyzer.indexTokenizer));
nodes = xpp.getNodeList(node, "filter");
for (int i = 0; i < nodes.getLength(); i++) {
Node n = nodes.item(i);
FilterFactory filter = FilterFactory.create(config, n);
analyzer.add(filter);
}
return analyzer;
}
public void writeXmlConfig(XmlWriter writer) throws SAXException {
rwl.r.lock();
try {
writer.startElement("analyzer", "name", getName(), "lang",
lang != null ? lang.getCode() : null);
if (indexTokenizer != null)
indexTokenizer.writeXmlConfig("tokenizer", writer);
if (queryTokenizer != null)
queryTokenizer.writeXmlConfig("queryTokenizer", writer);
if (filters != null && filters.size() > 0)
for (FilterFactory filter : filters)
filter.writeXmlConfig(writer);
writer.endElement();
} finally {
rwl.r.unlock();
}
}
/**
* Move filter up
*
* @param filter
*/
public void filterUp(FilterFactory filter) {
rwl.w.lock();
try {
int i = filters.indexOf(filter);
if (i == -1 || i == 0)
return;
filters.remove(i);
filters.add(i - 1, filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* Move filter down
*
* @param filter
*/
public void filterDown(FilterFactory filter) {
rwl.w.lock();
try {
int i = filters.indexOf(filter);
if (i == -1 || i == filters.size() - 1)
return;
filters.remove(i);
filters.add(i + 1, filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* Remove the filter
*
* @param filter
*/
public void filterRemove(FilterFactory filter) {
rwl.w.lock();
try {
filters.remove(filter);
queryAnalyzer = null;
indexAnalyzer = null;
} finally {
rwl.w.unlock();
}
}
/**
* Returns the compiled analyzer for queries
*
* @return
* @throws SearchLibException
*/
public CompiledAnalyzer getQueryAnalyzer() throws SearchLibException {
rwl.r.lock();
try {
if (queryAnalyzer != null)
return queryAnalyzer;
} finally {
rwl.r.unlock();
}
rwl.w.lock();
try {
if (queryAnalyzer != null)
return queryAnalyzer;
queryAnalyzer = new CompiledAnalyzer(queryTokenizer, filters,
FilterScope.QUERY);
return queryAnalyzer;
} finally {
rwl.w.unlock();
}
}
/**
* Returns the compiled analyzer for indexation
*
* @return
* @throws SearchLibException
*/
public CompiledAnalyzer getIndexAnalyzer() throws SearchLibException {
rwl.r.lock();
try {
if (indexAnalyzer != null)
return indexAnalyzer;
} finally {
rwl.r.unlock();
}
rwl.w.lock();
try {
if (indexAnalyzer != null)
return indexAnalyzer;
indexAnalyzer = new CompiledAnalyzer(indexTokenizer, filters,
FilterScope.INDEX);
return indexAnalyzer;
} finally {
rwl.w.unlock();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy