org.apache.lucene.search.Weight Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.servicemix.bundles.lucene
This OSGi bundle wraps ${pkgArtifactId} ${pkgVersion} jar file.
There is a newer version: 6.4.2_1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;

/**
 * Expert: Calculate query weights and build query scorers.
 *
 * The purpose of {@link Weight} is to ensure searching does not modify a {@link Query}, so that
 * a {@link Query} instance can be reused.
 *
 * 
{@link IndexSearcher} dependent state of the query should reside in the {@link Weight}.
 *
 * 
{@link org.apache.lucene.index.LeafReader} dependent state should reside in the {@link
 * Scorer}.
 *
 * 
Since {@link Weight} creates {@link Scorer} instances for a given {@link
 * org.apache.lucene.index.LeafReaderContext} ({@link
 * #scorer(org.apache.lucene.index.LeafReaderContext)}) callers must maintain the relationship
 * between the searcher's top-level {@link IndexReaderContext} and the context used to create a
 * {@link Scorer}.
 *
 * 
A Weight is used in the following way:
 *
 * 

 *   A Weight is constructed by a top-level query, given a IndexSearcher
 *        ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}).
 *   
A Scorer is constructed by {@link
 *       #scorer(org.apache.lucene.index.LeafReaderContext)}.
 * 
 *
 * @since 2.9
 */
public abstract class Weight implements SegmentCacheable {

  protected final Query parentQuery;

  /**
   * Sole constructor, typically invoked by sub-classes.
   *
   * @param query the parent query
   */
  protected Weight(Query query) {
    this.parentQuery = query;
  }

  /**
   * Returns {@link Matches} for a specific document, or {@code null} if the document does not match
   * the parent query
   *
   * A query match that contains no position information (for example, a Point or DocValues
   * query) will return {@link MatchesUtils#MATCH_WITH_NO_TERMS}
   *
   * @param context the reader's context to create the {@link Matches} for
   * @param doc the document's id relative to the given context's reader
   * @lucene.experimental
   */
  public Matches matches(LeafReaderContext context, int doc) throws IOException {
    ScorerSupplier scorerSupplier = scorerSupplier(context);
    if (scorerSupplier == null) {
      return null;
    }
    Scorer scorer = scorerSupplier.get(1);
    final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator();
    if (twoPhase == null) {
      if (scorer.iterator().advance(doc) != doc) {
        return null;
      }
    } else {
      if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) {
        return null;
      }
    }
    return MatchesUtils.MATCH_WITH_NO_TERMS;
  }

  /**
   * An explanation of the score computation for the named document.
   *
   * @param context the readers context to create the {@link Explanation} for.
   * @param doc the document's id relative to the given context's reader
   * @return an Explanation for the score
   * @throws IOException if an {@link IOException} occurs
   */
  public abstract Explanation explain(LeafReaderContext context, int doc) throws IOException;

  /** The query that this concerns. */
  public final Query getQuery() {
    return parentQuery;
  }

  /**
   * Optional method that delegates to scorerSupplier.
   *
   * 
Returns a {@link Scorer} which can iterate in order over all matching documents and assign
   * them a score. A scorer for the same {@link LeafReaderContext} instance may be requested
   * multiple times as part of a single search call.
   *
   * 
NOTE: null can be returned if no documents will be scored by this query.
   *
   * 
NOTE: The returned {@link Scorer} does not have {@link LeafReader#getLiveDocs()}
   * applied, they need to be checked on top.
   *
   * @param context the {@link org.apache.lucene.index.LeafReaderContext} for which to return the
   *     {@link Scorer}.
   * @return a {@link Scorer} which scores documents in/out-of order.
   * @throws IOException if there is a low-level I/O error
   */
  public final Scorer scorer(LeafReaderContext context) throws IOException {
    ScorerSupplier scorerSupplier = scorerSupplier(context);
    if (scorerSupplier == null) {
      return null;
    }
    return scorerSupplier.get(Long.MAX_VALUE);
  }

  /**
   * Get a {@link ScorerSupplier}, which allows knowing the cost of the {@link Scorer} before
   * building it. A scorer supplier for the same {@link LeafReaderContext} instance may be requested
   * multiple times as part of a single search call.
   *
   * 
Note: It must return null if the scorer is null.
   *
   * @param context the leaf reader context
   * @return a {@link ScorerSupplier} providing the scorer, or null if scorer is null
   * @throws IOException if an IOException occurs
   * @see Scorer
   * @see DefaultScorerSupplier
   */
  public abstract ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException;

  /**
   * Helper method that delegates to {@link #scorerSupplier(LeafReaderContext)}. It is implemented
   * as
   *
   * 
   * ScorerSupplier scorerSupplier = scorerSupplier(context);
   * if (scorerSupplier == null) {
   *   // No docs match
   *   return null;
   * }
   *
   * scorerSupplier.setTopLevelScoringClause();
   * return scorerSupplier.bulkScorer();
   * 
   *
   * A bulk scorer for the same {@link LeafReaderContext} instance may be requested multiple times
   * as part of a single search call.
   */
  public final BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
    ScorerSupplier scorerSupplier = scorerSupplier(context);
    if (scorerSupplier == null) {
      // No docs match
      return null;
    }

    scorerSupplier.setTopLevelScoringClause();
    return scorerSupplier.bulkScorer();
  }

  /**
   * Counts the number of live documents that match a given {@link Weight#parentQuery} in a leaf.
   *
   * The default implementation returns -1 for every query. This indicates that the count could
   * not be computed in sub-linear time.
   *
   * 
Specific query classes should override it to provide other accurate sub-linear
   * implementations (that actually return the count). Look at {@link
   * MatchAllDocsQuery#createWeight(IndexSearcher, ScoreMode, float)} for an example
   *
   * We use this property of the function to count hits in {@link IndexSearcher#count(Query)}.
   *
   * @param context the {@link org.apache.lucene.index.LeafReaderContext} for which to return the
   *     count.
   * @return integer count of the number of matches
   * @throws IOException if there is a low-level I/O error
   */
  public int count(LeafReaderContext context) throws IOException {
    return -1;
  }

  /**
   * A wrap for default scorer supplier.
   *
   * @lucene.internal
   */
  protected static final class DefaultScorerSupplier extends ScorerSupplier {
    private final Scorer scorer;

    public DefaultScorerSupplier(Scorer scorer) {
      this.scorer = Objects.requireNonNull(scorer, "Scorer must not be null");
    }

    @Override
    public Scorer get(long leadCost) throws IOException {
      return scorer;
    }

    @Override
    public long cost() {
      return scorer.iterator().cost();
    }
  }

  /**
   * Just wraps a Scorer and performs top scoring using it.
   *
   * @lucene.internal
   */
  protected static class DefaultBulkScorer extends BulkScorer {
    private final Scorer scorer;
    private final DocIdSetIterator iterator;
    private final TwoPhaseIterator twoPhase;

    /** Sole constructor. */
    public DefaultBulkScorer(Scorer scorer) {
      this.scorer = Objects.requireNonNull(scorer);
      this.twoPhase = scorer.twoPhaseIterator();
      if (twoPhase == null) {
        this.iterator = scorer.iterator();
      } else {
        this.iterator = twoPhase.approximation();
      }
    }

    @Override
    public long cost() {
      return iterator.cost();
    }

    @Override
    public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
        throws IOException {
      collector.setScorer(scorer);
      DocIdSetIterator competitiveIterator = collector.competitiveIterator();

      if (competitiveIterator != null) {
        if (competitiveIterator.docID() > min) {
          min = competitiveIterator.docID();
          // The competitive iterator may not match any docs in the range.
          min = Math.min(min, max);
        }
      }

      if (iterator.docID() < min) {
        if (iterator.docID() == min - 1) {
          iterator.nextDoc();
        } else {
          iterator.advance(min);
        }
      }

      // These various specializations help save some null checks in a hot loop, but as importantly
      // if not more importantly, they help reduce the polymorphism of calls sites to nextDoc() and
      // collect() because only a subset of collectors produce a competitive iterator, and the set
      // of implementing classes for two-phase approximations is smaller than the set of doc id set
      // iterator implementations.
      if (twoPhase == null && competitiveIterator == null) {
        // Optimize simple iterators with collectors that can't skip
        scoreIterator(collector, acceptDocs, iterator, max);
      } else if (competitiveIterator == null) {
        scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max);
      } else if (twoPhase == null) {
        scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max);
      } else {
        scoreTwoPhaseOrCompetitiveIterator(
            collector, acceptDocs, iterator, twoPhase, competitiveIterator, max);
      }

      return iterator.docID();
    }

    private static void scoreIterator(
        LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max)
        throws IOException {
      for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
        if (acceptDocs == null || acceptDocs.get(doc)) {
          collector.collect(doc);
        }
      }
    }

    private static void scoreTwoPhaseIterator(
        LeafCollector collector,
        Bits acceptDocs,
        DocIdSetIterator iterator,
        TwoPhaseIterator twoPhase,
        int max)
        throws IOException {
      for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) {
        if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
          collector.collect(doc);
        }
      }
    }

    private static void scoreCompetitiveIterator(
        LeafCollector collector,
        Bits acceptDocs,
        DocIdSetIterator iterator,
        DocIdSetIterator competitiveIterator,
        int max)
        throws IOException {
      for (int doc = iterator.docID(); doc < max; ) {
        assert competitiveIterator.docID() <= doc; // invariant
        if (competitiveIterator.docID() < doc) {
          int competitiveNext = competitiveIterator.advance(doc);
          if (competitiveNext != doc) {
            doc = iterator.advance(competitiveNext);
            continue;
          }
        }

        if ((acceptDocs == null || acceptDocs.get(doc))) {
          collector.collect(doc);
        }

        doc = iterator.nextDoc();
      }
    }

    private static void scoreTwoPhaseOrCompetitiveIterator(
        LeafCollector collector,
        Bits acceptDocs,
        DocIdSetIterator iterator,
        TwoPhaseIterator twoPhase,
        DocIdSetIterator competitiveIterator,
        int max)
        throws IOException {
      for (int doc = iterator.docID(); doc < max; ) {
        assert competitiveIterator.docID() <= doc; // invariant
        if (competitiveIterator.docID() < doc) {
          int competitiveNext = competitiveIterator.advance(doc);
          if (competitiveNext != doc) {
            doc = iterator.advance(competitiveNext);
            continue;
          }
        }

        if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
          collector.collect(doc);
        }

        doc = iterator.nextDoc();
      }
    }
  }
}