org.apache.lucene.queries.mlt.MoreLikeThisQuery Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queries.mlt;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
/**
* A simple wrapper for MoreLikeThis for use in scenarios where a Query object is required eg in
* custom QueryParser extensions. At query.rewrite() time the reader is used to construct the actual
* MoreLikeThis object and obtain the real Query object.
*/
public class MoreLikeThisQuery extends Query {
private String likeText;
private String[] moreLikeFields;
private Analyzer analyzer;
private final String fieldName;
private float percentTermsToMatch = 0.3f;
private int minTermFrequency = 1;
private int maxQueryTerms = 5;
private Set> stopWords = null;
private int minDocFreq = -1;
/**
* @param moreLikeFields fields used for similarity measure
*/
public MoreLikeThisQuery(
String likeText, String[] moreLikeFields, Analyzer analyzer, String fieldName) {
this.likeText = Objects.requireNonNull(likeText);
this.moreLikeFields = Objects.requireNonNull(moreLikeFields);
this.analyzer = Objects.requireNonNull(analyzer);
this.fieldName = Objects.requireNonNull(fieldName);
}
@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
MoreLikeThis mlt = new MoreLikeThis(indexSearcher.getIndexReader());
mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);
mlt.setMinTermFreq(minTermFrequency);
if (minDocFreq >= 0) {
mlt.setMinDocFreq(minDocFreq);
}
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setStopWords(stopWords);
BooleanQuery bq = (BooleanQuery) mlt.like(fieldName, new StringReader(likeText));
BooleanQuery.Builder newBq = new BooleanQuery.Builder();
for (BooleanClause clause : bq) {
newBq.add(clause);
}
// make at least half the terms match
newBq.setMinimumNumberShouldMatch((int) (bq.clauses().size() * percentTermsToMatch));
return newBq.build();
}
/* (non-Javadoc)
* @see org.apache.lucene.search.Query#toString(java.lang.String)
*/
@Override
public String toString(String field) {
return "like:" + likeText;
}
public float getPercentTermsToMatch() {
return percentTermsToMatch;
}
public void setPercentTermsToMatch(float percentTermsToMatch) {
this.percentTermsToMatch = percentTermsToMatch;
}
public Analyzer getAnalyzer() {
return analyzer;
}
public void setAnalyzer(Analyzer analyzer) {
this.analyzer = analyzer;
}
public String getLikeText() {
return likeText;
}
public void setLikeText(String likeText) {
this.likeText = likeText;
}
public int getMaxQueryTerms() {
return maxQueryTerms;
}
public void setMaxQueryTerms(int maxQueryTerms) {
this.maxQueryTerms = maxQueryTerms;
}
public int getMinTermFrequency() {
return minTermFrequency;
}
public void setMinTermFrequency(int minTermFrequency) {
this.minTermFrequency = minTermFrequency;
}
public String[] getMoreLikeFields() {
return moreLikeFields;
}
public void setMoreLikeFields(String[] moreLikeFields) {
this.moreLikeFields = moreLikeFields;
}
public Set> getStopWords() {
return stopWords;
}
public void setStopWords(Set> stopWords) {
this.stopWords = stopWords;
}
public int getMinDocFreq() {
return minDocFreq;
}
public void setMinDocFreq(int minDocFreq) {
this.minDocFreq = minDocFreq;
}
@Override
public int hashCode() {
final int prime = 31;
int result = classHash();
result = prime * result + Objects.hash(analyzer, fieldName, likeText, stopWords);
result = prime * result + maxQueryTerms;
result = prime * result + minDocFreq;
result = prime * result + minTermFrequency;
result = prime * result + Arrays.hashCode(moreLikeFields);
result = prime * result + Float.floatToIntBits(percentTermsToMatch);
return result;
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && equalsTo(getClass().cast(other));
}
private boolean equalsTo(MoreLikeThisQuery other) {
return maxQueryTerms == other.maxQueryTerms
&& minDocFreq == other.minDocFreq
&& minTermFrequency == other.minTermFrequency
&& Float.floatToIntBits(percentTermsToMatch)
== Float.floatToIntBits(other.percentTermsToMatch)
&& analyzer.equals(other.analyzer)
&& fieldName.equals(other.fieldName)
&& likeText.equals(other.likeText)
&& Arrays.equals(moreLikeFields, other.moreLikeFields)
&& Objects.equals(stopWords, other.stopWords);
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
}