All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.anserini.search.query.SdmQueryGenerator Maven / Gradle / Ivy

/*
 * Anserini: A Lucene toolkit for reproducible information retrieval research
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.anserini.search.query;

import io.anserini.analysis.AnalyzerUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import java.util.List;

/* Build the Term Dependency query. See:
 * D. Metzler and W. B. Croft. A markov random field model for term dependencies. In SIGIR ’05.
 */
public class SdmQueryGenerator extends QueryGenerator {
  private final float termWeight;
  private final float orderWindowWeight;
  private final float unorderWindowWeight;
  
  public SdmQueryGenerator() {
    this.termWeight = 0.85f;
    this.orderWindowWeight = 0.1f;
    this.unorderWindowWeight = 0.05f;
  }
  
  public SdmQueryGenerator(float termWeight, float orderWindowWeight, float unorderWindowWeight) {
    this.termWeight = termWeight;
    this.orderWindowWeight = orderWindowWeight;
    this.unorderWindowWeight = unorderWindowWeight;
  }
  
  /*
  * Sequential Dependency Model
  */
  @Override
  public Query buildQuery(String field, Analyzer analyzer, String queryText) {
    List tokens = AnalyzerUtils.analyze(analyzer, queryText);
    
    BooleanQuery.Builder termsBuilder = new BooleanQuery.Builder();
    if (tokens.size() == 1) {
      termsBuilder.add(new TermQuery(new Term(field, tokens.get(0))), BooleanClause.Occur.SHOULD);
      return termsBuilder.build();
    }
    
    BooleanQuery.Builder orderedWindowBuilder = new BooleanQuery.Builder();
    BooleanQuery.Builder unorderedWindowBuilder = new BooleanQuery.Builder();
    for (int i = 0; i < tokens.size()-1; i++) {
      termsBuilder.add(new TermQuery(new Term(field, tokens.get(i))), BooleanClause.Occur.SHOULD);
      
      SpanTermQuery t1 = new SpanTermQuery(new Term(field, tokens.get(i)));
      SpanTermQuery t2 = new SpanTermQuery(new Term(field, tokens.get(i+1)));
      SpanNearQuery orderedQ = new SpanNearQuery(new SpanQuery[] {t1, t2}, 1, true);
      SpanNearQuery unorderedQ = new SpanNearQuery(new SpanQuery[] {t1, t2}, 8, false);
      
      orderedWindowBuilder.add(orderedQ, BooleanClause.Occur.SHOULD);
      unorderedWindowBuilder.add(unorderedQ, BooleanClause.Occur.SHOULD);
    }
    termsBuilder.add(new TermQuery(new Term(field, tokens.get(tokens.size()-1))), BooleanClause.Occur.SHOULD);
    
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new BoostQuery(termsBuilder.build(), termWeight), BooleanClause.Occur.SHOULD);
    builder.add(new BoostQuery(orderedWindowBuilder.build(), orderWindowWeight), BooleanClause.Occur.SHOULD);
    builder.add(new BoostQuery(unorderedWindowBuilder.build(), unorderWindowWeight), BooleanClause.Occur.SHOULD);
    
    return builder.build();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy