All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.Weight Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;

/**
 * Expert: Calculate query weights and build query scorers.
 *
 * 

The purpose of {@link Weight} is to ensure searching does not modify a {@link Query}, so that * a {@link Query} instance can be reused. * *

{@link IndexSearcher} dependent state of the query should reside in the {@link Weight}. * *

{@link org.apache.lucene.index.LeafReader} dependent state should reside in the {@link * Scorer}. * *

Since {@link Weight} creates {@link Scorer} instances for a given {@link * org.apache.lucene.index.LeafReaderContext} ({@link * #scorer(org.apache.lucene.index.LeafReaderContext)}) callers must maintain the relationship * between the searcher's top-level {@link IndexReaderContext} and the context used to create a * {@link Scorer}. * *

A Weight is used in the following way: * *

    *
  1. A Weight is constructed by a top-level query, given a IndexSearcher * ({@link Query#createWeight(IndexSearcher, ScoreMode, float)}). *
  2. A Scorer is constructed by {@link * #scorer(org.apache.lucene.index.LeafReaderContext)}. *
* * @since 2.9 */ public abstract class Weight implements SegmentCacheable { protected final Query parentQuery; /** * Sole constructor, typically invoked by sub-classes. * * @param query the parent query */ protected Weight(Query query) { this.parentQuery = query; } /** * Returns {@link Matches} for a specific document, or {@code null} if the document does not match * the parent query * *

A query match that contains no position information (for example, a Point or DocValues * query) will return {@link MatchesUtils#MATCH_WITH_NO_TERMS} * * @param context the reader's context to create the {@link Matches} for * @param doc the document's id relative to the given context's reader * @lucene.experimental */ public Matches matches(LeafReaderContext context, int doc) throws IOException { ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } Scorer scorer = scorerSupplier.get(1); final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator(); if (twoPhase == null) { if (scorer.iterator().advance(doc) != doc) { return null; } } else { if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) { return null; } } return MatchesUtils.MATCH_WITH_NO_TERMS; } /** * An explanation of the score computation for the named document. * * @param context the readers context to create the {@link Explanation} for. * @param doc the document's id relative to the given context's reader * @return an Explanation for the score * @throws IOException if an {@link IOException} occurs */ public abstract Explanation explain(LeafReaderContext context, int doc) throws IOException; /** The query that this concerns. */ public final Query getQuery() { return parentQuery; } /** * Optional method that delegates to scorerSupplier. * *

Returns a {@link Scorer} which can iterate in order over all matching documents and assign * them a score. A scorer for the same {@link LeafReaderContext} instance may be requested * multiple times as part of a single search call. * *

NOTE: null can be returned if no documents will be scored by this query. * *

NOTE: The returned {@link Scorer} does not have {@link LeafReader#getLiveDocs()} * applied, they need to be checked on top. * * @param context the {@link org.apache.lucene.index.LeafReaderContext} for which to return the * {@link Scorer}. * @return a {@link Scorer} which scores documents in/out-of order. * @throws IOException if there is a low-level I/O error */ public final Scorer scorer(LeafReaderContext context) throws IOException { ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } return scorerSupplier.get(Long.MAX_VALUE); } /** * Get a {@link ScorerSupplier}, which allows knowing the cost of the {@link Scorer} before * building it. A scorer supplier for the same {@link LeafReaderContext} instance may be requested * multiple times as part of a single search call. * *

Note: It must return null if the scorer is null. * * @param context the leaf reader context * @return a {@link ScorerSupplier} providing the scorer, or null if scorer is null * @throws IOException if an IOException occurs * @see Scorer * @see DefaultScorerSupplier */ public abstract ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException; /** * Helper method that delegates to {@link #scorerSupplier(LeafReaderContext)}. It is implemented * as * *

   * ScorerSupplier scorerSupplier = scorerSupplier(context);
   * if (scorerSupplier == null) {
   *   // No docs match
   *   return null;
   * }
   *
   * scorerSupplier.setTopLevelScoringClause();
   * return scorerSupplier.bulkScorer();
   * 
* * A bulk scorer for the same {@link LeafReaderContext} instance may be requested multiple times * as part of a single search call. */ public final BulkScorer bulkScorer(LeafReaderContext context) throws IOException { ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { // No docs match return null; } scorerSupplier.setTopLevelScoringClause(); return scorerSupplier.bulkScorer(); } /** * Counts the number of live documents that match a given {@link Weight#parentQuery} in a leaf. * *

The default implementation returns -1 for every query. This indicates that the count could * not be computed in sub-linear time. * *

Specific query classes should override it to provide other accurate sub-linear * implementations (that actually return the count). Look at {@link * MatchAllDocsQuery#createWeight(IndexSearcher, ScoreMode, float)} for an example * *

We use this property of the function to count hits in {@link IndexSearcher#count(Query)}. * * @param context the {@link org.apache.lucene.index.LeafReaderContext} for which to return the * count. * @return integer count of the number of matches * @throws IOException if there is a low-level I/O error */ public int count(LeafReaderContext context) throws IOException { return -1; } /** * A wrap for default scorer supplier. * * @lucene.internal */ protected static final class DefaultScorerSupplier extends ScorerSupplier { private final Scorer scorer; public DefaultScorerSupplier(Scorer scorer) { this.scorer = Objects.requireNonNull(scorer, "Scorer must not be null"); } @Override public Scorer get(long leadCost) throws IOException { return scorer; } @Override public long cost() { return scorer.iterator().cost(); } } /** * Just wraps a Scorer and performs top scoring using it. * * @lucene.internal */ protected static class DefaultBulkScorer extends BulkScorer { private final Scorer scorer; private final DocIdSetIterator iterator; private final TwoPhaseIterator twoPhase; /** Sole constructor. */ public DefaultBulkScorer(Scorer scorer) { this.scorer = Objects.requireNonNull(scorer); this.twoPhase = scorer.twoPhaseIterator(); if (twoPhase == null) { this.iterator = scorer.iterator(); } else { this.iterator = twoPhase.approximation(); } } @Override public long cost() { return iterator.cost(); } @Override public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { collector.setScorer(scorer); DocIdSetIterator competitiveIterator = collector.competitiveIterator(); if (competitiveIterator != null) { if (competitiveIterator.docID() > min) { min = competitiveIterator.docID(); // The competitive iterator may not match any docs in the range. min = Math.min(min, max); } } if (iterator.docID() < min) { if (iterator.docID() == min - 1) { iterator.nextDoc(); } else { iterator.advance(min); } } // These various specializations help save some null checks in a hot loop, but as importantly // if not more importantly, they help reduce the polymorphism of calls sites to nextDoc() and // collect() because only a subset of collectors produce a competitive iterator, and the set // of implementing classes for two-phase approximations is smaller than the set of doc id set // iterator implementations. if (twoPhase == null && competitiveIterator == null) { // Optimize simple iterators with collectors that can't skip scoreIterator(collector, acceptDocs, iterator, max); } else if (competitiveIterator == null) { scoreTwoPhaseIterator(collector, acceptDocs, iterator, twoPhase, max); } else if (twoPhase == null) { scoreCompetitiveIterator(collector, acceptDocs, iterator, competitiveIterator, max); } else { scoreTwoPhaseOrCompetitiveIterator( collector, acceptDocs, iterator, twoPhase, competitiveIterator, max); } return iterator.docID(); } private static void scoreIterator( LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, int max) throws IOException { for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) { if (acceptDocs == null || acceptDocs.get(doc)) { collector.collect(doc); } } } private static void scoreTwoPhaseIterator( LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, int max) throws IOException { for (int doc = iterator.docID(); doc < max; doc = iterator.nextDoc()) { if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) { collector.collect(doc); } } } private static void scoreCompetitiveIterator( LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, DocIdSetIterator competitiveIterator, int max) throws IOException { for (int doc = iterator.docID(); doc < max; ) { assert competitiveIterator.docID() <= doc; // invariant if (competitiveIterator.docID() < doc) { int competitiveNext = competitiveIterator.advance(doc); if (competitiveNext != doc) { doc = iterator.advance(competitiveNext); continue; } } if ((acceptDocs == null || acceptDocs.get(doc))) { collector.collect(doc); } doc = iterator.nextDoc(); } } private static void scoreTwoPhaseOrCompetitiveIterator( LeafCollector collector, Bits acceptDocs, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, DocIdSetIterator competitiveIterator, int max) throws IOException { for (int doc = iterator.docID(); doc < max; ) { assert competitiveIterator.docID() <= doc; // invariant if (competitiveIterator.docID() < doc) { int competitiveNext = competitiveIterator.advance(doc); if (competitiveNext != doc) { doc = iterator.advance(competitiveNext); continue; } } if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) { collector.collect(doc); } doc = iterator.nextDoc(); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy