All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.queries.mlt.MoreLikeThisQuery Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.queries.mlt;

import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;

/**
 * A simple wrapper for MoreLikeThis for use in scenarios where a Query object is required eg in
 * custom QueryParser extensions. At query.rewrite() time the reader is used to construct the actual
 * MoreLikeThis object and obtain the real Query object.
 */
public class MoreLikeThisQuery extends Query {

  private String likeText;
  private String[] moreLikeFields;
  private Analyzer analyzer;
  private final String fieldName;
  private float percentTermsToMatch = 0.3f;
  private int minTermFrequency = 1;
  private int maxQueryTerms = 5;
  private Set stopWords = null;
  private int minDocFreq = -1;

  /**
   * @param moreLikeFields fields used for similarity measure
   */
  public MoreLikeThisQuery(
      String likeText, String[] moreLikeFields, Analyzer analyzer, String fieldName) {
    this.likeText = Objects.requireNonNull(likeText);
    this.moreLikeFields = Objects.requireNonNull(moreLikeFields);
    this.analyzer = Objects.requireNonNull(analyzer);
    this.fieldName = Objects.requireNonNull(fieldName);
  }

  @Override
  public Query rewrite(IndexSearcher indexSearcher) throws IOException {
    MoreLikeThis mlt = new MoreLikeThis(indexSearcher.getIndexReader());

    mlt.setFieldNames(moreLikeFields);
    mlt.setAnalyzer(analyzer);
    mlt.setMinTermFreq(minTermFrequency);
    if (minDocFreq >= 0) {
      mlt.setMinDocFreq(minDocFreq);
    }
    mlt.setMaxQueryTerms(maxQueryTerms);
    mlt.setStopWords(stopWords);
    BooleanQuery bq = (BooleanQuery) mlt.like(fieldName, new StringReader(likeText));
    BooleanQuery.Builder newBq = new BooleanQuery.Builder();
    for (BooleanClause clause : bq) {
      newBq.add(clause);
    }
    // make at least half the terms match
    newBq.setMinimumNumberShouldMatch((int) (bq.clauses().size() * percentTermsToMatch));
    return newBq.build();
  }

  /* (non-Javadoc)
   * @see org.apache.lucene.search.Query#toString(java.lang.String)
   */
  @Override
  public String toString(String field) {
    return "like:" + likeText;
  }

  public float getPercentTermsToMatch() {
    return percentTermsToMatch;
  }

  public void setPercentTermsToMatch(float percentTermsToMatch) {
    this.percentTermsToMatch = percentTermsToMatch;
  }

  public Analyzer getAnalyzer() {
    return analyzer;
  }

  public void setAnalyzer(Analyzer analyzer) {
    this.analyzer = analyzer;
  }

  public String getLikeText() {
    return likeText;
  }

  public void setLikeText(String likeText) {
    this.likeText = likeText;
  }

  public int getMaxQueryTerms() {
    return maxQueryTerms;
  }

  public void setMaxQueryTerms(int maxQueryTerms) {
    this.maxQueryTerms = maxQueryTerms;
  }

  public int getMinTermFrequency() {
    return minTermFrequency;
  }

  public void setMinTermFrequency(int minTermFrequency) {
    this.minTermFrequency = minTermFrequency;
  }

  public String[] getMoreLikeFields() {
    return moreLikeFields;
  }

  public void setMoreLikeFields(String[] moreLikeFields) {
    this.moreLikeFields = moreLikeFields;
  }

  public Set getStopWords() {
    return stopWords;
  }

  public void setStopWords(Set stopWords) {
    this.stopWords = stopWords;
  }

  public int getMinDocFreq() {
    return minDocFreq;
  }

  public void setMinDocFreq(int minDocFreq) {
    this.minDocFreq = minDocFreq;
  }

  @Override
  public int hashCode() {
    final int prime = 31;
    int result = classHash();
    result = prime * result + Objects.hash(analyzer, fieldName, likeText, stopWords);
    result = prime * result + maxQueryTerms;
    result = prime * result + minDocFreq;
    result = prime * result + minTermFrequency;
    result = prime * result + Arrays.hashCode(moreLikeFields);
    result = prime * result + Float.floatToIntBits(percentTermsToMatch);
    return result;
  }

  @Override
  public boolean equals(Object other) {
    return sameClassAs(other) && equalsTo(getClass().cast(other));
  }

  private boolean equalsTo(MoreLikeThisQuery other) {
    return maxQueryTerms == other.maxQueryTerms
        && minDocFreq == other.minDocFreq
        && minTermFrequency == other.minTermFrequency
        && Float.floatToIntBits(percentTermsToMatch)
            == Float.floatToIntBits(other.percentTermsToMatch)
        && analyzer.equals(other.analyzer)
        && fieldName.equals(other.fieldName)
        && likeText.equals(other.likeText)
        && Arrays.equals(moreLikeFields, other.moreLikeFields)
        && Objects.equals(stopWords, other.stopWords);
  }

  @Override
  public void visit(QueryVisitor visitor) {
    visitor.visitLeaf(this);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy