org.apache.solr.handler.MoreLikeThisHandler Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import java.io.IOException;
import java.io.Reader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.MoreLikeThisParams.TermStyle;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.SolrPluginUtils;
/**
* Solr MoreLikeThis --
*
* Return similar documents either based on a single document or based on posted text.
*
* @since solr 1.3
*/
public class MoreLikeThisHandler extends RequestHandlerBase
{
// Pattern is thread safe -- TODO? share this with general 'fl' param
private static final Pattern splitList = Pattern.compile(",| ");
@Override
public void init(NamedList args) {
super.init(args);
}
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
{
SolrParams params = req.getParams();
SolrIndexSearcher searcher = req.getSearcher();
MoreLikeThisHelper mlt = new MoreLikeThisHelper( params, searcher );
List filters = SolrPluginUtils.parseFilterQueries(req);
// Hold on to the interesting terms if relevant
TermStyle termStyle = TermStyle.get( params.get( MoreLikeThisParams.INTERESTING_TERMS ) );
List interesting = (termStyle == TermStyle.NONE )
? null : new ArrayList( mlt.mlt.getMaxQueryTerms() );
DocListAndSet mltDocs = null;
String q = params.get( CommonParams.Q );
// Parse Required Params
// This will either have a single Reader or valid query
Reader reader = null;
try {
if (q == null || q.trim().length() < 1) {
Iterable streams = req.getContentStreams();
if (streams != null) {
Iterator iter = streams.iterator();
if (iter.hasNext()) {
reader = iter.next().getReader();
}
if (iter.hasNext()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"MoreLikeThis does not support multiple ContentStreams");
}
}
}
// What fields do we need to return
String fl = params.get(CommonParams.FL);
int flags = 0;
if (fl != null) {
flags |= SolrPluginUtils.setReturnFields(fl, rsp);
}
int start = params.getInt(CommonParams.START, 0);
int rows = params.getInt(CommonParams.ROWS, 10);
// Find documents MoreLikeThis - either with a reader or a query
// --------------------------------------------------------------------------------
if (reader != null) {
mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters,
interesting, flags);
} else if (q != null) {
// Matching options
boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE,
true);
int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
// Find the base match
Query query = QueryParsing.parseQuery(q, params.get(CommonParams.DF),
params, req.getSchema());
DocList match = searcher.getDocList(query, null, null, matchOffset, 1,
flags); // only get the first one...
if (includeMatch) {
rsp.add("match", match);
}
// This is an iterator, but we only handle the first match
DocIterator iterator = match.iterator();
if (iterator.hasNext()) {
// do a MoreLikeThis query for each document in results
int id = iterator.nextDoc();
mltDocs = mlt.getMoreLikeThis(id, start, rows, filters, interesting,
flags);
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"MoreLikeThis requires either a query (?q=) or text to find similar documents.");
}
} finally {
if (reader != null) {
reader.close();
}
}
if( mltDocs == null ) {
mltDocs = new DocListAndSet(); // avoid NPE
}
rsp.add( "response", mltDocs.docList );
if( interesting != null ) {
if( termStyle == TermStyle.DETAILS ) {
NamedList it = new NamedList();
for( InterestingTerm t : interesting ) {
it.add( t.term.toString(), t.boost );
}
rsp.add( "interestingTerms", it );
}
else {
List it = new ArrayList( interesting.size() );
for( InterestingTerm t : interesting ) {
it.add( t.term.text());
}
rsp.add( "interestingTerms", it );
}
}
// maybe facet the results
if (params.getBool(FacetParams.FACET,false)) {
if( mltDocs.docSet == null ) {
rsp.add( "facet_counts", null );
}
else {
SimpleFacets f = new SimpleFacets(req, mltDocs.docSet, params );
rsp.add( "facet_counts", f.getFacetCounts() );
}
}
// Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
try {
NamedList