org.apache.solr.handler.component.MoreLikeThisComponent Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Show all versions of solr-core Show documentation
Apache Solr (module: core)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import static org.apache.solr.common.params.CommonParams.SORT;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.MoreLikeThisParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.MoreLikeThisHandler;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.QueryLimits;
import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SolrReturnFields;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* TODO!
*
* @since solr 1.3
*/
public class MoreLikeThisComponent extends SearchComponent {
public static final String COMPONENT_NAME = "mlt";
public static final String DIST_DOC_ID = "mlt.dist.id";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (rb.req.getParams().getBool(MoreLikeThisParams.MLT, false)) {
rb.setNeedDocList(true);
}
}
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (params.getBool(MoreLikeThisParams.MLT, false)) {
ReturnFields returnFields = new SolrReturnFields(rb.req);
int flags = 0;
if (returnFields.wantsScore()) {
flags |= SolrIndexSearcher.GET_SCORES;
}
rb.setFieldFlags(flags);
if (log.isDebugEnabled()) {
log.debug(
"Starting MoreLikeThis.Process. isShard: {}", params.getBool(ShardParams.IS_SHARD));
}
SolrIndexSearcher searcher = rb.req.getSearcher();
if (params.getBool(ShardParams.IS_SHARD, false)) {
if (params.get(MoreLikeThisComponent.DIST_DOC_ID) == null) {
if (rb.getResults().docList.size() == 0) {
// return empty response
rb.rsp.add("moreLikeThis", new NamedList());
return;
}
MoreLikeThisHandler.MoreLikeThisHelper mlt =
new MoreLikeThisHandler.MoreLikeThisHelper(params, searcher);
NamedList> mltQueryByDocKey = new NamedList<>();
QueryLimits queryLimits = QueryLimits.getCurrentLimits();
for (DocIterator results = rb.getResults().docList.iterator(); results.hasNext(); ) {
int docId = results.nextDoc();
final List interestingTerms =
mlt.getInterestingTerms(mlt.getBoostedMLTQuery(docId), -1);
if (interestingTerms.isEmpty()) {
continue;
}
if (queryLimits.maybeExitWithPartialResults("MoreLikeThis process")) {
break;
}
final String uniqueKey = rb.req.getSchema().getUniqueKeyField().getName();
final Document document = rb.req.getSearcher().doc(docId);
final String uniqueVal = rb.req.getSchema().printableUniqueKey(document);
final NamedList mltQ =
mltViaQueryParams(rb.req.getSchema(), interestingTerms, uniqueKey, uniqueVal);
mltQueryByDocKey.add(uniqueVal, mltQ);
}
rb.rsp.add("moreLikeThis", mltQueryByDocKey);
} else {
NamedList sim =
getMoreLikeThese(rb, rb.req.getSearcher(), rb.getResults().docList, flags);
rb.rsp.add("moreLikeThis", sim);
}
} else {
// non distrib case
NamedList sim =
getMoreLikeThese(rb, rb.req.getSearcher(), rb.getResults().docList, flags);
rb.rsp.add("moreLikeThis", sim);
}
}
}
private static NamedList mltViaQueryParams(
IndexSchema schema,
List terms,
String uniqueField,
String uniqueVal) {
final NamedList mltQ = new NamedList<>();
StringBuilder q = new StringBuilder("{!bool");
q.append(" must_not=$");
int cnt = 0;
String param = "mltq" + (cnt++);
q.append(param);
mltQ.add(param, "{!field f=" + uniqueField + "}" + uniqueVal);
final StringBuilder reuseStr = new StringBuilder();
final CharsRefBuilder reuseChar = new CharsRefBuilder();
for (MoreLikeThisHandler.InterestingTerm term : terms) {
param = "mltq" + (cnt++);
q.append(" should=$");
q.append(param);
mltQ.add(param, toParserParam(schema, term.term, term.boost, reuseStr, reuseChar));
}
q.append("}");
mltQ.add(CommonParams.Q, q.toString());
return mltQ;
}
private static String toParserParam(
IndexSchema schema,
Term term1,
float boost,
StringBuilder reuseStr,
CharsRefBuilder reuseChar) {
reuseStr.setLength(0);
if (boost != 1f) {
reuseStr.append("{!boost b=");
reuseStr.append(boost);
reuseStr.append("}");
}
final String field = term1.field();
final CharsRef val =
schema.getField(field).getType().indexedToReadable(term1.bytes(), reuseChar);
reuseStr.append("{!term f=");
reuseStr.append(ClientUtils.encodeLocalParamVal(field));
reuseStr.append("}");
reuseStr.append(val);
return reuseStr.toString();
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0
&& rb.req.getParams().getBool(COMPONENT_NAME, false)) {
if (log.isDebugEnabled()) {
log.debug("ShardRequest.response.size: {}", sreq.responses.size());
}
for (ShardResponse r : sreq.responses) {
if (r.getException() != null) {
// This should only happen in case of using shards.tolerant=true. Omit this ShardResponse
continue;
}
@SuppressWarnings("unchecked")
NamedList> moreLikeThisReponse =
(NamedList>) r.getSolrResponse().getResponse().get("moreLikeThis");
if (log.isDebugEnabled()) {
log.debug("ShardRequest.response.shard: {}", r.getShard());
}
if (moreLikeThisReponse != null) {
for (Entry> entry : moreLikeThisReponse) {
if (log.isDebugEnabled()) {
log.debug("id: '{}' Query: '{}'", entry.getKey(), entry.getValue());
}
ShardRequest s = buildShardQuery(rb, entry.getValue(), entry.getKey());
rb.addRequest(this, s);
}
}
}
}
if ((sreq.purpose & ShardRequest.PURPOSE_GET_MLT_RESULTS) != 0) {
for (ShardResponse r : sreq.responses) {
if (log.isDebugEnabled()) {
log.debug("MLT Query returned: {}", r.getSolrResponse().getResponse());
}
}
}
}
@Override
public void finishStage(ResponseBuilder rb) {
// Handling Responses in finishStage, because solrResponse will put
// moreLikeThis xml
// segment ahead of result/response.
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS
&& rb.req.getParams().getBool(COMPONENT_NAME, false)) {
Map