com.jaeksoft.searchlib.request.MoreLikeThisRequest Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2012-2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.request;
import java.io.IOException;
import java.io.StringReader;
import java.util.Set;
import javax.xml.xpath.XPathExpressionException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.Analyzer;
import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.analysis.filter.stop.WordArray;
import com.jaeksoft.searchlib.config.Config;
import com.jaeksoft.searchlib.filter.FilterAbstract;
import com.jaeksoft.searchlib.filter.FilterList;
import com.jaeksoft.searchlib.filter.QueryFilter;
import com.jaeksoft.searchlib.function.expression.SyntaxError;
import com.jaeksoft.searchlib.index.IndexAbstract;
import com.jaeksoft.searchlib.index.ReaderInterface;
import com.jaeksoft.searchlib.index.ReaderLocal;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.result.AbstractResult;
import com.jaeksoft.searchlib.result.AbstractResultSearch;
import com.jaeksoft.searchlib.result.ResultMoreLikeThis;
import com.jaeksoft.searchlib.schema.SchemaFieldList;
import com.jaeksoft.searchlib.util.StringUtils;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
import com.jaeksoft.searchlib.web.ServletTransaction;
public class MoreLikeThisRequest extends AbstractRequest implements
RequestInterfaces.FilterListInterface,
RequestInterfaces.ReturnedFieldInterface {
private String docQuery;
private String likeText;
private LanguageEnum lang;
private String analyzerName;
private ReturnFieldList fieldList;
private int minWordLen;
private int maxWordLen;
private int minDocFreq;
private int minTermFreq;
private int maxNumTokensParsed;
private int maxQueryTerms;
private boolean boost;
private String stopWords;
private ReturnFieldList returnFieldList;
private FilterList filterList;
private int start;
private int rows;
private Query mltQuery;
public MoreLikeThisRequest() {
super(null, RequestTypeEnum.MoreLikeThisRequest);
}
public MoreLikeThisRequest(Config config) {
super(config, RequestTypeEnum.MoreLikeThisRequest);
}
@Override
protected void setDefaultValues() {
super.setDefaultValues();
this.filterList = new FilterList(this.config);
this.returnFieldList = new ReturnFieldList();
this.docQuery = null;
this.likeText = null;
this.lang = LanguageEnum.UNDEFINED;
this.analyzerName = null;
this.fieldList = new ReturnFieldList();
this.minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
this.maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
this.minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ;
this.minTermFreq = MoreLikeThis.DEFAULT_MIN_TERM_FREQ;
this.maxNumTokensParsed = MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED;
this.maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
this.boost = true;
this.stopWords = null;
this.start = 0;
this.rows = 10;
this.mltQuery = null;
}
@Override
public void copyFrom(AbstractRequest request) {
super.copyFrom(request);
MoreLikeThisRequest mltRequest = (MoreLikeThisRequest) request;
this.analyzerName = mltRequest.analyzerName;
this.lang = mltRequest.lang;
this.fieldList = new ReturnFieldList(mltRequest.fieldList);
this.minWordLen = mltRequest.minWordLen;
this.maxWordLen = mltRequest.maxWordLen;
this.minDocFreq = mltRequest.minDocFreq;
this.minTermFreq = mltRequest.minTermFreq;
this.stopWords = mltRequest.stopWords;
this.docQuery = mltRequest.docQuery;
this.likeText = mltRequest.likeText;
this.boost = mltRequest.boost;
this.maxNumTokensParsed = mltRequest.maxNumTokensParsed;
this.maxQueryTerms = mltRequest.maxQueryTerms;
this.filterList = new FilterList(mltRequest.filterList);
this.returnFieldList = new ReturnFieldList(mltRequest.returnFieldList);
this.mltQuery = mltRequest.mltQuery;
}
@Override
public Query getQuery() throws SearchLibException, IOException {
rwl.r.lock();
try {
if (mltQuery != null)
return mltQuery;
} finally {
rwl.r.unlock();
}
rwl.w.lock();
try {
if (mltQuery != null)
return mltQuery;
Config config = getConfig();
IndexAbstract index = config.getIndexAbstract();
MoreLikeThis mlt = index.getMoreLikeThis();
mlt.setMinWordLen(minWordLen);
mlt.setMaxWordLen(maxWordLen);
mlt.setMinDocFreq(minDocFreq);
mlt.setMinTermFreq(minTermFreq);
mlt.setMaxNumTokensParsed(maxNumTokensParsed);
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setFieldNames(fieldList.getArrayName());
mlt.setBoost(boost);
if (analyzerName != null) {
Analyzer analyzer = config.getSchema().getAnalyzerList()
.get(analyzerName, lang);
if (analyzer != null)
mlt.setAnalyzer(analyzer.getQueryAnalyzer());
}
if (stopWords != null && stopWords.length() > 0) {
WordArray wordArray = getConfig().getStopWordsManager()
.getWordArray(stopWords, false);
if (wordArray != null) {
Set stopWords = wordArray.getWordSet();
if (stopWords != null)
mlt.setStopWords(stopWords);
}
}
if (docQuery != null && docQuery.length() > 0) {
AbstractSearchRequest searchRequest = new SearchPatternRequest(
config);
searchRequest.setRows(1);
searchRequest.setQueryString(docQuery);
AbstractResultSearch> result = (AbstractResultSearch>) index
.request(searchRequest);
if (result.getNumFound() == 0)
return mlt.like(new StringReader(""));
int docId = result.getDocs().getIds()[0];
mltQuery = mlt.like(docId);
} else if (likeText != null & likeText.length() > 0) {
mltQuery = mlt.like(new StringReader(likeText));
}
return mltQuery;
} finally {
rwl.w.unlock();
}
}
/**
* @return the docQuery
*/
public String getDocQuery() {
rwl.r.lock();
try {
return docQuery;
} finally {
rwl.r.unlock();
}
}
/**
* @param docQuery
* the docQuery to set
*/
public void setDocQuery(String docQuery) {
rwl.w.lock();
try {
this.docQuery = docQuery;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the likeText
*/
public String getLikeText() {
rwl.r.lock();
try {
return likeText;
} finally {
rwl.r.unlock();
}
}
/**
* @param likeText
* the likeText to set
*/
public void setLikeText(String likeText) {
rwl.w.lock();
try {
this.likeText = likeText;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
*
* @return the LanguageEnum
*/
public LanguageEnum getLang() {
rwl.r.lock();
try {
return this.lang;
} finally {
rwl.r.unlock();
}
}
/**
*
* @param lang
* The language to set
*/
public void setLang(LanguageEnum lang) {
rwl.w.lock();
try {
this.lang = lang;
} finally {
rwl.w.unlock();
}
}
/**
*
* @return the AnalyzerName
*/
public String getAnalyzerName() {
rwl.r.lock();
try {
return this.analyzerName;
} finally {
rwl.r.unlock();
}
}
/**
*
* @param lang
* The language to set
*/
public void setAnalyzerName(String analyzerName) {
rwl.w.lock();
try {
this.analyzerName = analyzerName;
} finally {
rwl.w.unlock();
}
}
/**
* @return the fieldList
*/
public ReturnFieldList getFieldList() {
rwl.r.lock();
try {
return fieldList;
} finally {
rwl.r.unlock();
}
}
/**
* @return the minWordLen
*/
public int getMinWordLen() {
rwl.r.lock();
try {
return minWordLen;
} finally {
rwl.r.unlock();
}
}
/**
* @param minWordLen
* the minWordLen to set
*/
public void setMinWordLen(int minWordLen) {
rwl.w.lock();
try {
this.minWordLen = minWordLen;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the maxWordLen
*/
public int getMaxWordLen() {
rwl.r.lock();
try {
return maxWordLen;
} finally {
rwl.r.unlock();
}
}
/**
* @param maxWordLen
* the maxWordLen to set
*/
public void setMaxWordLen(int maxWordLen) {
rwl.w.lock();
try {
this.maxWordLen = maxWordLen;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the minDocFreq
*/
public int getMinDocFreq() {
rwl.r.lock();
try {
return minDocFreq;
} finally {
rwl.r.unlock();
}
}
/**
* @param minDocFreq
* the minDocFreq to set
*/
public void setMinDocFreq(int minDocFreq) {
rwl.w.lock();
try {
this.minDocFreq = minDocFreq;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the minTermFreq
*/
public int getMinTermFreq() {
rwl.r.lock();
try {
return minTermFreq;
} finally {
rwl.r.unlock();
}
}
/**
* @param minTermFreq
* the minTermFreq to set
*/
public void setMinTermFreq(int minTermFreq) {
rwl.w.lock();
try {
this.minTermFreq = minTermFreq;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the stopWords
*/
public String getStopWords() {
rwl.r.lock();
try {
return stopWords;
} finally {
rwl.r.unlock();
}
}
/**
* @param stopWords
* the stopWords to set
*/
public void setStopWords(String stopWords) {
rwl.w.lock();
try {
this.stopWords = stopWords;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the boost
*/
public boolean getBoost() {
rwl.r.lock();
try {
return boost;
} finally {
rwl.r.unlock();
}
}
/**
* @param boost
* the boost to set
*/
public void setBoost(boolean boost) {
rwl.w.lock();
try {
this.boost = boost;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
@Override
public FilterList getFilterList() {
rwl.r.lock();
try {
return this.filterList;
} finally {
rwl.r.unlock();
}
}
@Override
public void addFilter(String req, boolean negative) throws ParseException {
rwl.w.lock();
try {
this.filterList.add(new QueryFilter(req, negative,
FilterAbstract.Source.REQUEST, null));
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
@Override
public ReturnFieldList getReturnFieldList() {
rwl.r.lock();
try {
return this.returnFieldList;
} finally {
rwl.r.unlock();
}
}
@Override
public void addReturnField(String fieldName) {
rwl.w.lock();
try {
returnFieldList.put(new ReturnField(config.getSchema()
.getFieldList().get(fieldName).getName()));
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
@Override
protected void fromXmlConfigNoLock(Config config, XPathParser xpp, Node node)
throws XPathExpressionException, DOMException, ParseException,
InstantiationException, IllegalAccessException,
ClassNotFoundException {
super.fromXmlConfigNoLock(config, xpp, node);
setLang(LanguageEnum.findByCode(XPathParser.getAttributeString(node,
"lang")));
setAnalyzerName(XPathParser.getAttributeString(node, "analyzer"));
setMinWordLen(XPathParser.getAttributeValue(node, "minWordLen"));
setMaxWordLen(XPathParser.getAttributeValue(node, "maxWordLen"));
setMinTermFreq(XPathParser.getAttributeValue(node, "minTermFreq"));
setMinDocFreq(XPathParser.getAttributeValue(node, "minDocFreq"));
setMaxNumTokensParsed(XPathParser.getAttributeValue(node,
"maxNumTokensParsed"));
setMaxQueryTerms(XPathParser.getAttributeValue(node, "maxQueryTerms"));
setBoost(Boolean.TRUE.toString().equalsIgnoreCase(
XPathParser.getAttributeString(node, "boost")));
setStopWords(XPathParser.getAttributeString(node, "stopWords"));
setStart(XPathParser.getAttributeValue(node, "start"));
setRows(XPathParser.getAttributeValue(node, "rows"));
NodeList mltFieldsNodes = xpp.getNodeList(node, "fields/field");
if (mltFieldsNodes != null) {
ReturnFieldList moreLikeThisFields = getFieldList();
for (int i = 0; i < mltFieldsNodes.getLength(); i++) {
ReturnField field = ReturnField.fromXmlConfig(mltFieldsNodes
.item(i));
if (field != null)
moreLikeThisFields.put(field);
}
}
Node mltDocQueryNode = xpp.getNode(node, "docQuery");
if (mltDocQueryNode != null)
setDocQuery(xpp.getNodeString(mltDocQueryNode, false));
Node mltDocLikeText = xpp.getNode(node, "likeText");
if (mltDocLikeText != null)
setLikeText(xpp.getNodeString(mltDocLikeText, false));
NodeList nodes = xpp.getNodeList(node, "filters/filter");
for (int i = 0; i < nodes.getLength(); i++) {
Node n = nodes.item(i);
filterList.add(new QueryFilter(xpp.getNodeString(n, false), "yes"
.equals(XPathParser.getAttributeString(n, "negative")),
FilterAbstract.Source.CONFIGXML, null));
}
SchemaFieldList fieldList = config.getSchema().getFieldList();
returnFieldList.filterCopy(fieldList,
xpp.getNodeString(node, "returnFields"));
nodes = xpp.getNodeList(node, "returnFields/field");
for (int i = 0; i < nodes.getLength(); i++) {
ReturnField field = ReturnField.fromXmlConfig(nodes.item(i));
if (field != null)
returnFieldList.put(field);
}
}
@Override
public void writeXmlConfig(XmlWriter xmlWriter) throws SAXException {
rwl.r.lock();
try {
xmlWriter.startElement(XML_NODE_REQUEST, XML_ATTR_NAME,
getRequestName(), XML_ATTR_TYPE, getType().name(),
"minWordLen", Integer.toString(minWordLen), "maxWordLen",
Integer.toString(maxWordLen), "minDocFreq",
Integer.toString(minDocFreq), "minTermFreq",
Integer.toString(minTermFreq), "maxNumTokensParsed",
Integer.toString(maxNumTokensParsed), "maxQueryTerms",
Integer.toString(maxQueryTerms), "boost",
Boolean.toString(boost), "stopWords", stopWords, "start",
Integer.toString(start), "rows", Integer.toString(rows),
"lang", lang != null ? lang.getCode() : null, "analyzer",
analyzerName);
if (fieldList.size() > 0) {
xmlWriter.startElement("fields");
fieldList.writeXmlConfig(xmlWriter);
xmlWriter.endElement();
}
if (docQuery != null && docQuery.length() > 0) {
xmlWriter.startElement("docQuery");
xmlWriter.textNode(docQuery);
xmlWriter.endElement();
}
if (likeText != null && likeText.length() > 0) {
xmlWriter.startElement("likeText");
xmlWriter.textNode(likeText);
xmlWriter.endElement();
}
if (returnFieldList.size() > 0) {
xmlWriter.startElement("returnFields");
returnFieldList.writeXmlConfig(xmlWriter);
xmlWriter.endElement();
}
if (filterList.size() > 0)
filterList.writeXmlConfig(xmlWriter, "filters");
xmlWriter.endElement();
} finally {
rwl.r.unlock();
}
}
@Override
final protected void setFromServletNoLock(
final ServletTransaction transaction, final String prefix) {
String p;
Integer i;
Boolean b;
if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
"mlt.docquery"))) != null)
setDocQuery(p);
if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
"mlt.liketext"))) != null)
setLikeText(p);
if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
"mlt.minwordlen"))) != null)
setMinWordLen(i);
if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
"mlt.maxwordlen"))) != null)
setMaxWordLen(i);
if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
"mlt.mindocfreq"))) != null)
setMinDocFreq(i);
if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
"mlt.mintermfreq"))) != null)
setMinTermFreq(i);
if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
"mlt.stopwords"))) != null)
setStopWords(p);
if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
"lang"))) != null)
setLang(LanguageEnum.findByCode(p));
else if ((p = transaction.getParameterString(StringUtils.fastConcat(
prefix, "mlt.lang"))) != null)
setLang(LanguageEnum.findByCode(p));
if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
"mlt.analyzer"))) != null)
setAnalyzerName(p);
if ((b = transaction.getParameterBoolean(StringUtils.fastConcat(prefix,
"mlt.boost"))) != null)
setBoost(b);
if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
"start"))) != null)
setStart(i);
if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
"rows"))) != null)
setRows(i);
}
@Override
protected void resetNoLock() {
mltQuery = null;
}
@Override
public AbstractResult execute(ReaderInterface reader)
throws SearchLibException {
try {
return new ResultMoreLikeThis((ReaderLocal) reader, this);
} catch (IOException e) {
throw new SearchLibException(e);
} catch (ParseException e) {
throw new SearchLibException(e);
} catch (SyntaxError e) {
throw new SearchLibException(e);
} catch (InstantiationException e) {
throw new SearchLibException(e);
} catch (IllegalAccessException e) {
throw new SearchLibException(e);
} catch (ClassNotFoundException e) {
throw new SearchLibException(e);
}
}
@Override
public String getInfo() {
rwl.r.lock();
try {
StringBuilder sb = new StringBuilder();
if (docQuery != null) {
sb.append(docQuery);
sb.append(' ');
}
if (likeText != null) {
sb.append(likeText);
sb.append(' ');
}
if (fieldList != null)
sb.append(fieldList.toString());
return sb.toString();
} finally {
rwl.r.unlock();
}
}
/**
* @return the start
*/
public int getStart() {
rwl.r.lock();
try {
return start;
} finally {
rwl.r.unlock();
}
}
public int getEnd() {
rwl.r.lock();
try {
return start + rows;
} finally {
rwl.r.unlock();
}
}
/**
* @param start
* the start to set
*/
public void setStart(int start) {
rwl.w.lock();
try {
this.start = start;
} finally {
rwl.w.unlock();
}
}
/**
* @return the rows
*/
public int getRows() {
rwl.r.lock();
try {
return rows;
} finally {
rwl.r.unlock();
}
}
/**
* @param rows
* the rows to set
*/
public void setRows(int rows) {
rwl.w.lock();
try {
this.rows = rows;
} finally {
rwl.w.unlock();
}
}
/**
* @return the maxNumTokensParsed
*/
public int getMaxNumTokensParsed() {
rwl.r.lock();
try {
return maxNumTokensParsed;
} finally {
rwl.r.unlock();
}
}
/**
* @param maxNumTokensParsed
* the maxNumTokensParsed to set
*/
public void setMaxNumTokensParsed(int maxNumTokensParsed) {
rwl.w.lock();
try {
this.maxNumTokensParsed = maxNumTokensParsed;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
/**
* @return the maxQueryTerms
*/
public int getMaxQueryTerms() {
rwl.r.lock();
try {
return maxQueryTerms;
} finally {
rwl.r.unlock();
}
}
/**
* @param maxQueryTerms
* the maxQueryTerms to set
*/
public void setMaxQueryTerms(int maxQueryTerms) {
rwl.w.lock();
try {
this.maxQueryTerms = maxQueryTerms;
mltQuery = null;
} finally {
rwl.w.unlock();
}
}
}