org.apache.lucene.search.BooleanWeight Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
/**
* Expert: the Weight for BooleanQuery, used to
* normalize, score and explain these queries.
*/
final class BooleanWeight extends Weight {
/** The Similarity implementation. */
final Similarity similarity;
final BooleanQuery query;
final ArrayList weights;
final int maxCoord; // num optional + num required
final boolean disableCoord;
final boolean needsScores;
final float coords[];
BooleanWeight(BooleanQuery query, IndexSearcher searcher, boolean needsScores, boolean disableCoord) throws IOException {
super(query);
this.query = query;
this.needsScores = needsScores;
this.similarity = searcher.getSimilarity(needsScores);
weights = new ArrayList<>();
int i = 0;
int maxCoord = 0;
for (BooleanClause c : query) {
Weight w = searcher.createWeight(c.getQuery(), needsScores && c.isScoring());
weights.add(w);
if (c.isScoring()) {
maxCoord++;
}
i += 1;
}
this.maxCoord = maxCoord;
// precompute coords (0..N, N).
// set disableCoord when its explicit, scores are not needed, no scoring clauses, or the sim doesn't use it.
coords = new float[maxCoord+1];
Arrays.fill(coords, 1F);
coords[0] = 0f;
if (maxCoord > 0 && needsScores && disableCoord == false) {
// compute coords from the similarity, look for any actual ones.
boolean seenActualCoord = false;
for (i = 1; i < coords.length; i++) {
coords[i] = coord(i, maxCoord);
seenActualCoord |= (coords[i] != 1F);
}
this.disableCoord = seenActualCoord == false;
} else {
this.disableCoord = true;
}
}
@Override
public void extractTerms(Set terms) {
int i = 0;
for (BooleanClause clause : query) {
if (clause.isScoring() || (needsScores == false && clause.isProhibited() == false)) {
weights.get(i).extractTerms(terms);
}
i++;
}
}
@Override
public float getValueForNormalization() throws IOException {
float sum = 0.0f;
int i = 0;
for (BooleanClause clause : query) {
// call sumOfSquaredWeights for all clauses in case of side effects
float s = weights.get(i).getValueForNormalization(); // sum sub weights
if (clause.isScoring()) {
// only add to sum for scoring clauses
sum += s;
}
i += 1;
}
return sum ;
}
public float coord(int overlap, int maxOverlap) {
if (overlap == 0) {
// special case that there are only non-scoring clauses
return 0F;
} else if (maxOverlap == 1) {
// LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
// so coord() is not applied. But when BQ cannot optimize itself away
// for a single clause (minNrShouldMatch, prohibited clauses, etc), it's
// important not to apply coord(1,1) for consistency, it might not be 1.0F
return 1F;
} else {
// common case: use the similarity to compute the coord
return similarity.coord(overlap, maxOverlap);
}
}
@Override
public void normalize(float norm, float boost) {
for (Weight w : weights) {
// normalize all clauses, (even if non-scoring in case of side affects)
w.normalize(norm, boost);
}
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
final int minShouldMatch = query.getMinimumNumberShouldMatch();
List subs = new ArrayList<>();
int coord = 0;
float sum = 0.0f;
boolean fail = false;
int matchCount = 0;
int shouldMatchCount = 0;
Iterator cIter = query.iterator();
for (Iterator wIter = weights.iterator(); wIter.hasNext();) {
Weight w = wIter.next();
BooleanClause c = cIter.next();
Explanation e = w.explain(context, doc);
if (e.isMatch()) {
if (c.isScoring()) {
subs.add(e);
sum += e.getValue();
coord++;
} else if (c.isRequired()) {
subs.add(Explanation.match(0f, "match on required clause, product of:",
Explanation.match(0f, Occur.FILTER + " clause"), e));
} else if (c.isProhibited()) {
subs.add(Explanation.noMatch("match on prohibited clause (" + c.getQuery().toString() + ")", e));
fail = true;
}
if (!c.isProhibited()) {
matchCount++;
}
if (c.getOccur() == Occur.SHOULD) {
shouldMatchCount++;
}
} else if (c.isRequired()) {
subs.add(Explanation.noMatch("no match on required clause (" + c.getQuery().toString() + ")", e));
fail = true;
}
}
if (fail) {
return Explanation.noMatch("Failure to meet condition(s) of required/prohibited clause(s)", subs);
} else if (matchCount == 0) {
return Explanation.noMatch("No matching clauses", subs);
} else if (shouldMatchCount < minShouldMatch) {
return Explanation.noMatch("Failure to match minimum number of optional clauses: " + minShouldMatch, subs);
} else {
// we have a match
Explanation result = Explanation.match(sum, "sum of:", subs);
final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
if (coordFactor != 1f) {
result = Explanation.match(sum * coordFactor, "product of:",
result, Explanation.match(coordFactor, "coord("+coord+"/"+maxCoord+")"));
}
return result;
}
}
static BulkScorer disableScoring(final BulkScorer scorer) {
return new BulkScorer() {
@Override
public int score(final LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
final LeafCollector noScoreCollector = new LeafCollector() {
FakeScorer fake = new FakeScorer();
@Override
public void setScorer(Scorer scorer) throws IOException {
collector.setScorer(fake);
}
@Override
public void collect(int doc) throws IOException {
fake.doc = doc;
collector.collect(doc);
}
};
return scorer.score(noScoreCollector, acceptDocs, min, max);
}
@Override
public long cost() {
return scorer.cost();
}
};
}
// Return a BulkScorer for the optional clauses only,
// or null if it is not applicable
// pkg-private for forcing use of BooleanScorer in tests
BulkScorer optionalBulkScorer(LeafReaderContext context) throws IOException {
List optional = new ArrayList();
Iterator cIter = query.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
if (c.getOccur() != Occur.SHOULD) {
continue;
}
BulkScorer subScorer = w.bulkScorer(context);
if (subScorer != null) {
optional.add(subScorer);
}
}
if (optional.size() == 0) {
return null;
}
if (query.getMinimumNumberShouldMatch() > optional.size()) {
return null;
}
if (optional.size() == 1) {
BulkScorer opt = optional.get(0);
if (!disableCoord && maxCoord > 1) {
return new BooleanTopLevelScorers.BoostedBulkScorer(opt, coord(1, maxCoord));
} else {
return opt;
}
}
return new BooleanScorer(this, disableCoord, maxCoord, optional, Math.max(1, query.getMinimumNumberShouldMatch()), needsScores);
}
// Return a BulkScorer for the required clauses only,
// or null if it is not applicable
private BulkScorer requiredBulkScorer(LeafReaderContext context) throws IOException {
BulkScorer scorer = null;
Iterator cIter = query.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
if (c.isRequired() == false) {
continue;
}
if (scorer != null) {
// we don't have a BulkScorer for conjunctions
return null;
}
scorer = w.bulkScorer(context);
if (scorer == null) {
// no matches
return null;
}
if (c.isScoring() == false) {
if (needsScores) {
scorer = disableScoring(scorer);
}
} else {
assert maxCoord == 1;
}
}
return scorer;
}
/** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer}
* cannot be used. */
BulkScorer booleanScorer(LeafReaderContext context) throws IOException {
final int numOptionalClauses = query.getClauses(Occur.SHOULD).size();
final int numRequiredClauses = query.getClauses(Occur.MUST).size() + query.getClauses(Occur.FILTER).size();
BulkScorer positiveScorer;
if (numRequiredClauses == 0) {
positiveScorer = optionalBulkScorer(context);
if (positiveScorer == null) {
return null;
}
// TODO: what is the right heuristic here?
final long costThreshold;
if (query.getMinimumNumberShouldMatch() <= 1) {
// when all clauses are optional, use BooleanScorer aggressively
// TODO: is there actually a threshold under which we should rather
// use the regular scorer?
costThreshold = -1;
} else {
// when a minimum number of clauses should match, BooleanScorer is
// going to score all windows that have at least minNrShouldMatch
// matches in the window. But there is no way to know if there is
// an intersection (all clauses might match a different doc ID and
// there will be no matches in the end) so we should only use
// BooleanScorer if matches are very dense
costThreshold = context.reader().maxDoc() / 3;
}
if (positiveScorer.cost() < costThreshold) {
return null;
}
} else if (numRequiredClauses == 1
&& numOptionalClauses == 0
&& query.getMinimumNumberShouldMatch() == 0) {
positiveScorer = requiredBulkScorer(context);
} else {
// TODO: there are some cases where BooleanScorer
// would handle conjunctions faster than
// BooleanScorer2...
return null;
}
if (positiveScorer == null) {
return null;
}
List prohibited = new ArrayList<>();
Iterator cIter = query.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
if (c.isProhibited()) {
Scorer scorer = w.scorer(context);
if (scorer != null) {
prohibited.add(scorer);
}
}
}
if (prohibited.isEmpty()) {
return positiveScorer;
} else {
Scorer prohibitedScorer = opt(prohibited, 1, true);
if (prohibitedScorer.twoPhaseIterator() != null) {
// ReqExclBulkScorer can't deal efficiently with two-phased prohibited clauses
return null;
}
return new ReqExclBulkScorer(positiveScorer, prohibitedScorer.iterator());
}
}
@Override
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
final BulkScorer bulkScorer = booleanScorer(context);
if (bulkScorer != null) {
// bulk scoring is applicable, use it
return bulkScorer;
} else {
// use a Scorer-based impl (BS2)
return super.bulkScorer(context);
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
// initially the user provided value,
// but if minNrShouldMatch == optional.size(),
// we will optimize and move these to required, making this 0
int minShouldMatch = query.getMinimumNumberShouldMatch();
List required = new ArrayList<>();
// clauses that are required AND participate in scoring, subset of 'required'
List requiredScoring = new ArrayList<>();
List prohibited = new ArrayList<>();
List optional = new ArrayList<>();
Iterator cIter = query.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
Scorer subScorer = w.scorer(context);
if (subScorer == null) {
if (c.isRequired()) {
return null;
}
} else if (c.isRequired()) {
required.add(subScorer);
if (c.isScoring()) {
requiredScoring.add(subScorer);
}
} else if (c.isProhibited()) {
prohibited.add(subScorer);
} else {
optional.add(subScorer);
}
}
// scorer simplifications:
if (optional.size() == minShouldMatch) {
// any optional clauses are in fact required
required.addAll(optional);
requiredScoring.addAll(optional);
optional.clear();
minShouldMatch = 0;
}
if (required.isEmpty() && optional.isEmpty()) {
// no required and optional clauses.
return null;
} else if (optional.size() < minShouldMatch) {
// either >1 req scorer, or there are 0 req scorers and at least 1
// optional scorer. Therefore if there are not enough optional scorers
// no documents will be matched by the query
return null;
}
// we don't need scores, so if we have required clauses, drop optional clauses completely
if (!needsScores && minShouldMatch == 0 && required.size() > 0) {
optional.clear();
}
// three cases: conjunction, disjunction, or mix
// pure conjunction
if (optional.isEmpty()) {
return excl(req(required, requiredScoring, disableCoord), prohibited);
}
// pure disjunction
if (required.isEmpty()) {
return excl(opt(optional, minShouldMatch, disableCoord), prohibited);
}
// conjunction-disjunction mix:
// we create the required and optional pieces with coord disabled, and then
// combine the two: if minNrShouldMatch > 0, then it's a conjunction: because the
// optional side must match. otherwise it's required + optional, factoring the
// number of optional terms into the coord calculation
Scorer req = excl(req(required, requiredScoring, true), prohibited);
Scorer opt = opt(optional, minShouldMatch, true);
// TODO: clean this up: it's horrible
if (disableCoord) {
if (minShouldMatch > 0) {
return new ConjunctionScorer(this, Arrays.asList(req, opt), Arrays.asList(req, opt), 1F);
} else {
return new ReqOptSumScorer(req, opt);
}
} else if (optional.size() == 1) {
if (minShouldMatch > 0) {
return new ConjunctionScorer(this, Arrays.asList(req, opt), Arrays.asList(req, opt), coord(requiredScoring.size()+1, maxCoord));
} else {
float coordReq = coord(requiredScoring.size(), maxCoord);
float coordBoth = coord(requiredScoring.size() + 1, maxCoord);
return new BooleanTopLevelScorers.ReqSingleOptScorer(req, opt, coordReq, coordBoth);
}
} else {
if (minShouldMatch > 0) {
return new BooleanTopLevelScorers.CoordinatingConjunctionScorer(this, coords, req, requiredScoring.size(), opt);
} else {
return new BooleanTopLevelScorers.ReqMultiOptScorer(req, opt, requiredScoring.size(), coords);
}
}
}
/** Create a new scorer for the given required clauses. Note that
* {@code requiredScoring} is a subset of {@code required} containing
* required clauses that should participate in scoring. */
private Scorer req(List required, List requiredScoring, boolean disableCoord) {
if (required.size() == 1) {
Scorer req = required.get(0);
if (needsScores == false) {
return req;
}
if (requiredScoring.isEmpty()) {
// Scores are needed but we only have a filter clause
// BooleanWeight expects that calling score() is ok so we need to wrap
// to prevent score() from being propagated
return new FilterScorer(req) {
@Override
public float score() throws IOException {
return 0f;
}
@Override
public int freq() throws IOException {
return 0;
}
};
}
float boost = 1f;
if (disableCoord == false) {
boost = coord(1, maxCoord);
}
if (boost == 1f) {
return req;
}
return new BooleanTopLevelScorers.BoostedScorer(req, boost);
} else {
return new ConjunctionScorer(this, required, requiredScoring,
disableCoord ? 1.0F : coord(requiredScoring.size(), maxCoord));
}
}
private Scorer excl(Scorer main, List prohibited) throws IOException {
if (prohibited.isEmpty()) {
return main;
} else if (prohibited.size() == 1) {
return new ReqExclScorer(main, prohibited.get(0));
} else {
float coords[] = new float[prohibited.size()+1];
Arrays.fill(coords, 1F);
return new ReqExclScorer(main, new DisjunctionSumScorer(this, prohibited, coords, false));
}
}
private Scorer opt(List optional, int minShouldMatch, boolean disableCoord) throws IOException {
if (optional.size() == 1) {
Scorer opt = optional.get(0);
if (!disableCoord && maxCoord > 1) {
return new BooleanTopLevelScorers.BoostedScorer(opt, coord(1, maxCoord));
} else {
return opt;
}
} else {
float coords[];
if (disableCoord) {
// sneaky: when we do a mixed conjunction/disjunction, we need a fake for the disjunction part.
coords = new float[optional.size()+1];
Arrays.fill(coords, 1F);
} else {
coords = this.coords;
}
if (minShouldMatch > 1) {
return new MinShouldMatchSumScorer(this, optional, minShouldMatch, coords);
} else {
return new DisjunctionSumScorer(this, optional, coords, needsScores);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy