All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.jaeksoft.searchlib.request.MoreLikeThisRequest Maven / Gradle / Ivy

Go to download

OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.

The newest version!
/**   
 * License Agreement for OpenSearchServer
 *
 * Copyright (C) 2012-2014 Emmanuel Keller / Jaeksoft
 * 
 * http://www.open-search-server.com
 * 
 * This file is part of OpenSearchServer.
 *
 * OpenSearchServer is free software: you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 * OpenSearchServer is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with OpenSearchServer. 
 *  If not, see .
 **/

package com.jaeksoft.searchlib.request;

import java.io.IOException;
import java.io.StringReader;
import java.util.Set;

import javax.xml.xpath.XPathExpressionException;

import org.apache.lucene.search.Query;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.Analyzer;
import com.jaeksoft.searchlib.analysis.LanguageEnum;
import com.jaeksoft.searchlib.analysis.filter.stop.WordArray;
import com.jaeksoft.searchlib.config.Config;
import com.jaeksoft.searchlib.filter.FilterAbstract;
import com.jaeksoft.searchlib.filter.FilterList;
import com.jaeksoft.searchlib.filter.QueryFilter;
import com.jaeksoft.searchlib.function.expression.SyntaxError;
import com.jaeksoft.searchlib.index.IndexAbstract;
import com.jaeksoft.searchlib.index.ReaderInterface;
import com.jaeksoft.searchlib.index.ReaderLocal;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.result.AbstractResult;
import com.jaeksoft.searchlib.result.AbstractResultSearch;
import com.jaeksoft.searchlib.result.ResultMoreLikeThis;
import com.jaeksoft.searchlib.schema.SchemaFieldList;
import com.jaeksoft.searchlib.util.StringUtils;
import com.jaeksoft.searchlib.util.XPathParser;
import com.jaeksoft.searchlib.util.XmlWriter;
import com.jaeksoft.searchlib.web.ServletTransaction;

public class MoreLikeThisRequest extends AbstractRequest implements
		RequestInterfaces.FilterListInterface,
		RequestInterfaces.ReturnedFieldInterface {

	private String docQuery;
	private String likeText;
	private LanguageEnum lang;
	private String analyzerName;
	private ReturnFieldList fieldList;
	private int minWordLen;
	private int maxWordLen;
	private int minDocFreq;
	private int minTermFreq;
	private int maxNumTokensParsed;
	private int maxQueryTerms;
	private boolean boost;
	private String stopWords;
	private ReturnFieldList returnFieldList;
	private FilterList filterList;
	private int start;
	private int rows;
	private Query mltQuery;

	public MoreLikeThisRequest() {
		super(null, RequestTypeEnum.MoreLikeThisRequest);
	}

	public MoreLikeThisRequest(Config config) {
		super(config, RequestTypeEnum.MoreLikeThisRequest);
	}

	@Override
	protected void setDefaultValues() {
		super.setDefaultValues();
		this.filterList = new FilterList(this.config);
		this.returnFieldList = new ReturnFieldList();
		this.docQuery = null;
		this.likeText = null;
		this.lang = LanguageEnum.UNDEFINED;
		this.analyzerName = null;
		this.fieldList = new ReturnFieldList();
		this.minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
		this.maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
		this.minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ;
		this.minTermFreq = MoreLikeThis.DEFAULT_MIN_TERM_FREQ;
		this.maxNumTokensParsed = MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED;
		this.maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
		this.boost = true;
		this.stopWords = null;
		this.start = 0;
		this.rows = 10;
		this.mltQuery = null;
	}

	@Override
	public void copyFrom(AbstractRequest request) {
		super.copyFrom(request);
		MoreLikeThisRequest mltRequest = (MoreLikeThisRequest) request;
		this.analyzerName = mltRequest.analyzerName;
		this.lang = mltRequest.lang;
		this.fieldList = new ReturnFieldList(mltRequest.fieldList);
		this.minWordLen = mltRequest.minWordLen;
		this.maxWordLen = mltRequest.maxWordLen;
		this.minDocFreq = mltRequest.minDocFreq;
		this.minTermFreq = mltRequest.minTermFreq;
		this.stopWords = mltRequest.stopWords;
		this.docQuery = mltRequest.docQuery;
		this.likeText = mltRequest.likeText;
		this.boost = mltRequest.boost;
		this.maxNumTokensParsed = mltRequest.maxNumTokensParsed;
		this.maxQueryTerms = mltRequest.maxQueryTerms;
		this.filterList = new FilterList(mltRequest.filterList);
		this.returnFieldList = new ReturnFieldList(mltRequest.returnFieldList);
		this.mltQuery = mltRequest.mltQuery;
	}

	@Override
	public Query getQuery() throws SearchLibException, IOException {
		rwl.r.lock();
		try {
			if (mltQuery != null)
				return mltQuery;
		} finally {
			rwl.r.unlock();
		}
		rwl.w.lock();
		try {
			if (mltQuery != null)
				return mltQuery;
			Config config = getConfig();
			IndexAbstract index = config.getIndexAbstract();

			MoreLikeThis mlt = index.getMoreLikeThis();
			mlt.setMinWordLen(minWordLen);
			mlt.setMaxWordLen(maxWordLen);
			mlt.setMinDocFreq(minDocFreq);
			mlt.setMinTermFreq(minTermFreq);
			mlt.setMaxNumTokensParsed(maxNumTokensParsed);
			mlt.setMaxQueryTerms(maxQueryTerms);
			mlt.setFieldNames(fieldList.getArrayName());
			mlt.setBoost(boost);

			if (analyzerName != null) {
				Analyzer analyzer = config.getSchema().getAnalyzerList()
						.get(analyzerName, lang);
				if (analyzer != null)
					mlt.setAnalyzer(analyzer.getQueryAnalyzer());
			}
			if (stopWords != null && stopWords.length() > 0) {
				WordArray wordArray = getConfig().getStopWordsManager()
						.getWordArray(stopWords, false);
				if (wordArray != null) {
					Set stopWords = wordArray.getWordSet();
					if (stopWords != null)
						mlt.setStopWords(stopWords);
				}
			}

			if (docQuery != null && docQuery.length() > 0) {
				AbstractSearchRequest searchRequest = new SearchPatternRequest(
						config);
				searchRequest.setRows(1);
				searchRequest.setQueryString(docQuery);
				AbstractResultSearch result = (AbstractResultSearch) index
						.request(searchRequest);
				if (result.getNumFound() == 0)
					return mlt.like(new StringReader(""));
				int docId = result.getDocs().getIds()[0];
				mltQuery = mlt.like(docId);
			} else if (likeText != null & likeText.length() > 0) {
				mltQuery = mlt.like(new StringReader(likeText));
			}
			return mltQuery;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the docQuery
	 */
	public String getDocQuery() {
		rwl.r.lock();
		try {
			return docQuery;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param docQuery
	 *            the docQuery to set
	 */
	public void setDocQuery(String docQuery) {
		rwl.w.lock();
		try {
			this.docQuery = docQuery;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the likeText
	 */
	public String getLikeText() {
		rwl.r.lock();
		try {
			return likeText;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param likeText
	 *            the likeText to set
	 */
	public void setLikeText(String likeText) {
		rwl.w.lock();
		try {
			this.likeText = likeText;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * 
	 * @return the LanguageEnum
	 */
	public LanguageEnum getLang() {
		rwl.r.lock();
		try {
			return this.lang;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * 
	 * @param lang
	 *            The language to set
	 */
	public void setLang(LanguageEnum lang) {
		rwl.w.lock();
		try {
			this.lang = lang;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * 
	 * @return the AnalyzerName
	 */
	public String getAnalyzerName() {
		rwl.r.lock();
		try {
			return this.analyzerName;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * 
	 * @param lang
	 *            The language to set
	 */
	public void setAnalyzerName(String analyzerName) {
		rwl.w.lock();
		try {
			this.analyzerName = analyzerName;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the fieldList
	 */
	public ReturnFieldList getFieldList() {
		rwl.r.lock();
		try {
			return fieldList;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @return the minWordLen
	 */
	public int getMinWordLen() {
		rwl.r.lock();
		try {
			return minWordLen;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param minWordLen
	 *            the minWordLen to set
	 */
	public void setMinWordLen(int minWordLen) {
		rwl.w.lock();
		try {
			this.minWordLen = minWordLen;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the maxWordLen
	 */
	public int getMaxWordLen() {
		rwl.r.lock();
		try {
			return maxWordLen;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param maxWordLen
	 *            the maxWordLen to set
	 */
	public void setMaxWordLen(int maxWordLen) {
		rwl.w.lock();
		try {
			this.maxWordLen = maxWordLen;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the minDocFreq
	 */
	public int getMinDocFreq() {
		rwl.r.lock();
		try {
			return minDocFreq;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param minDocFreq
	 *            the minDocFreq to set
	 */
	public void setMinDocFreq(int minDocFreq) {
		rwl.w.lock();
		try {
			this.minDocFreq = minDocFreq;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the minTermFreq
	 */
	public int getMinTermFreq() {
		rwl.r.lock();
		try {
			return minTermFreq;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param minTermFreq
	 *            the minTermFreq to set
	 */
	public void setMinTermFreq(int minTermFreq) {
		rwl.w.lock();
		try {
			this.minTermFreq = minTermFreq;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the stopWords
	 */
	public String getStopWords() {
		rwl.r.lock();
		try {
			return stopWords;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param stopWords
	 *            the stopWords to set
	 */
	public void setStopWords(String stopWords) {
		rwl.w.lock();
		try {
			this.stopWords = stopWords;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the boost
	 */
	public boolean getBoost() {
		rwl.r.lock();
		try {
			return boost;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param boost
	 *            the boost to set
	 */
	public void setBoost(boolean boost) {
		rwl.w.lock();
		try {
			this.boost = boost;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	@Override
	public FilterList getFilterList() {
		rwl.r.lock();
		try {
			return this.filterList;
		} finally {
			rwl.r.unlock();
		}
	}

	@Override
	public void addFilter(String req, boolean negative) throws ParseException {
		rwl.w.lock();
		try {
			this.filterList.add(new QueryFilter(req, negative,
					FilterAbstract.Source.REQUEST, null));
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	@Override
	public ReturnFieldList getReturnFieldList() {
		rwl.r.lock();
		try {
			return this.returnFieldList;
		} finally {
			rwl.r.unlock();
		}
	}

	@Override
	public void addReturnField(String fieldName) {
		rwl.w.lock();
		try {
			returnFieldList.put(new ReturnField(config.getSchema()
					.getFieldList().get(fieldName).getName()));
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	@Override
	protected void fromXmlConfigNoLock(Config config, XPathParser xpp, Node node)
			throws XPathExpressionException, DOMException, ParseException,
			InstantiationException, IllegalAccessException,
			ClassNotFoundException {
		super.fromXmlConfigNoLock(config, xpp, node);
		setLang(LanguageEnum.findByCode(XPathParser.getAttributeString(node,
				"lang")));
		setAnalyzerName(XPathParser.getAttributeString(node, "analyzer"));
		setMinWordLen(XPathParser.getAttributeValue(node, "minWordLen"));
		setMaxWordLen(XPathParser.getAttributeValue(node, "maxWordLen"));
		setMinTermFreq(XPathParser.getAttributeValue(node, "minTermFreq"));
		setMinDocFreq(XPathParser.getAttributeValue(node, "minDocFreq"));
		setMaxNumTokensParsed(XPathParser.getAttributeValue(node,
				"maxNumTokensParsed"));
		setMaxQueryTerms(XPathParser.getAttributeValue(node, "maxQueryTerms"));
		setBoost(Boolean.TRUE.toString().equalsIgnoreCase(
				XPathParser.getAttributeString(node, "boost")));
		setStopWords(XPathParser.getAttributeString(node, "stopWords"));
		setStart(XPathParser.getAttributeValue(node, "start"));
		setRows(XPathParser.getAttributeValue(node, "rows"));

		NodeList mltFieldsNodes = xpp.getNodeList(node, "fields/field");
		if (mltFieldsNodes != null) {
			ReturnFieldList moreLikeThisFields = getFieldList();
			for (int i = 0; i < mltFieldsNodes.getLength(); i++) {
				ReturnField field = ReturnField.fromXmlConfig(mltFieldsNodes
						.item(i));
				if (field != null)
					moreLikeThisFields.put(field);
			}
		}

		Node mltDocQueryNode = xpp.getNode(node, "docQuery");
		if (mltDocQueryNode != null)
			setDocQuery(xpp.getNodeString(mltDocQueryNode, false));

		Node mltDocLikeText = xpp.getNode(node, "likeText");
		if (mltDocLikeText != null)
			setLikeText(xpp.getNodeString(mltDocLikeText, false));

		NodeList nodes = xpp.getNodeList(node, "filters/filter");
		for (int i = 0; i < nodes.getLength(); i++) {
			Node n = nodes.item(i);
			filterList.add(new QueryFilter(xpp.getNodeString(n, false), "yes"
					.equals(XPathParser.getAttributeString(n, "negative")),
					FilterAbstract.Source.CONFIGXML, null));
		}

		SchemaFieldList fieldList = config.getSchema().getFieldList();
		returnFieldList.filterCopy(fieldList,
				xpp.getNodeString(node, "returnFields"));
		nodes = xpp.getNodeList(node, "returnFields/field");
		for (int i = 0; i < nodes.getLength(); i++) {
			ReturnField field = ReturnField.fromXmlConfig(nodes.item(i));
			if (field != null)
				returnFieldList.put(field);
		}
	}

	@Override
	public void writeXmlConfig(XmlWriter xmlWriter) throws SAXException {
		rwl.r.lock();
		try {
			xmlWriter.startElement(XML_NODE_REQUEST, XML_ATTR_NAME,
					getRequestName(), XML_ATTR_TYPE, getType().name(),
					"minWordLen", Integer.toString(minWordLen), "maxWordLen",
					Integer.toString(maxWordLen), "minDocFreq",
					Integer.toString(minDocFreq), "minTermFreq",
					Integer.toString(minTermFreq), "maxNumTokensParsed",
					Integer.toString(maxNumTokensParsed), "maxQueryTerms",
					Integer.toString(maxQueryTerms), "boost",
					Boolean.toString(boost), "stopWords", stopWords, "start",
					Integer.toString(start), "rows", Integer.toString(rows),
					"lang", lang != null ? lang.getCode() : null, "analyzer",
					analyzerName);

			if (fieldList.size() > 0) {
				xmlWriter.startElement("fields");
				fieldList.writeXmlConfig(xmlWriter);
				xmlWriter.endElement();
			}
			if (docQuery != null && docQuery.length() > 0) {
				xmlWriter.startElement("docQuery");
				xmlWriter.textNode(docQuery);
				xmlWriter.endElement();
			}
			if (likeText != null && likeText.length() > 0) {
				xmlWriter.startElement("likeText");
				xmlWriter.textNode(likeText);
				xmlWriter.endElement();
			}
			if (returnFieldList.size() > 0) {
				xmlWriter.startElement("returnFields");
				returnFieldList.writeXmlConfig(xmlWriter);
				xmlWriter.endElement();
			}
			if (filterList.size() > 0)
				filterList.writeXmlConfig(xmlWriter, "filters");
			xmlWriter.endElement();
		} finally {
			rwl.r.unlock();
		}
	}

	@Override
	final protected void setFromServletNoLock(
			final ServletTransaction transaction, final String prefix) {
		String p;
		Integer i;
		Boolean b;

		if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
				"mlt.docquery"))) != null)
			setDocQuery(p);

		if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
				"mlt.liketext"))) != null)
			setLikeText(p);

		if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
				"mlt.minwordlen"))) != null)
			setMinWordLen(i);

		if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
				"mlt.maxwordlen"))) != null)
			setMaxWordLen(i);

		if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
				"mlt.mindocfreq"))) != null)
			setMinDocFreq(i);

		if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
				"mlt.mintermfreq"))) != null)
			setMinTermFreq(i);

		if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
				"mlt.stopwords"))) != null)
			setStopWords(p);

		if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
				"lang"))) != null)
			setLang(LanguageEnum.findByCode(p));
		else if ((p = transaction.getParameterString(StringUtils.fastConcat(
				prefix, "mlt.lang"))) != null)
			setLang(LanguageEnum.findByCode(p));

		if ((p = transaction.getParameterString(StringUtils.fastConcat(prefix,
				"mlt.analyzer"))) != null)
			setAnalyzerName(p);

		if ((b = transaction.getParameterBoolean(StringUtils.fastConcat(prefix,
				"mlt.boost"))) != null)
			setBoost(b);

		if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
				"start"))) != null)
			setStart(i);

		if ((i = transaction.getParameterInteger(StringUtils.fastConcat(prefix,
				"rows"))) != null)
			setRows(i);
	}

	@Override
	protected void resetNoLock() {
		mltQuery = null;
	}

	@Override
	public AbstractResult execute(ReaderInterface reader)
			throws SearchLibException {
		try {
			return new ResultMoreLikeThis((ReaderLocal) reader, this);
		} catch (IOException e) {
			throw new SearchLibException(e);
		} catch (ParseException e) {
			throw new SearchLibException(e);
		} catch (SyntaxError e) {
			throw new SearchLibException(e);
		} catch (InstantiationException e) {
			throw new SearchLibException(e);
		} catch (IllegalAccessException e) {
			throw new SearchLibException(e);
		} catch (ClassNotFoundException e) {
			throw new SearchLibException(e);
		}
	}

	@Override
	public String getInfo() {
		rwl.r.lock();
		try {
			StringBuilder sb = new StringBuilder();
			if (docQuery != null) {
				sb.append(docQuery);
				sb.append(' ');
			}
			if (likeText != null) {
				sb.append(likeText);
				sb.append(' ');
			}
			if (fieldList != null)
				sb.append(fieldList.toString());
			return sb.toString();
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @return the start
	 */
	public int getStart() {
		rwl.r.lock();
		try {
			return start;
		} finally {
			rwl.r.unlock();
		}
	}

	public int getEnd() {
		rwl.r.lock();
		try {
			return start + rows;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param start
	 *            the start to set
	 */
	public void setStart(int start) {
		rwl.w.lock();
		try {
			this.start = start;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the rows
	 */
	public int getRows() {
		rwl.r.lock();
		try {
			return rows;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param rows
	 *            the rows to set
	 */
	public void setRows(int rows) {
		rwl.w.lock();
		try {
			this.rows = rows;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the maxNumTokensParsed
	 */
	public int getMaxNumTokensParsed() {
		rwl.r.lock();
		try {
			return maxNumTokensParsed;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param maxNumTokensParsed
	 *            the maxNumTokensParsed to set
	 */
	public void setMaxNumTokensParsed(int maxNumTokensParsed) {
		rwl.w.lock();
		try {
			this.maxNumTokensParsed = maxNumTokensParsed;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

	/**
	 * @return the maxQueryTerms
	 */
	public int getMaxQueryTerms() {
		rwl.r.lock();
		try {
			return maxQueryTerms;
		} finally {
			rwl.r.unlock();
		}
	}

	/**
	 * @param maxQueryTerms
	 *            the maxQueryTerms to set
	 */
	public void setMaxQueryTerms(int maxQueryTerms) {
		rwl.w.lock();
		try {
			this.maxQueryTerms = maxQueryTerms;
			mltQuery = null;
		} finally {
			rwl.w.unlock();
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy