All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.join.ToParentBlockJoinQuery Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.join;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;

import static org.apache.lucene.search.ScoreMode.COMPLETE;

/**
 * This query requires that you index
 * children and parent docs as a single block, using the
 * {@link IndexWriter#addDocuments IndexWriter.addDocuments()} or {@link
 * IndexWriter#updateDocuments IndexWriter.updateDocuments()} API.  In each block, the
 * child documents must appear first, ending with the parent
 * document.  At search time you provide a Filter
 * identifying the parents, however this Filter must provide
 * an {@link BitSet} per sub-reader.
 *
 * 

Once the block index is built, use this query to wrap * any sub-query matching only child docs and join matches in that * child document space up to the parent document space. * You can then use this Query as a clause with * other queries in the parent document space.

* *

See {@link ToChildBlockJoinQuery} if you need to join * in the reverse order. * *

The child documents must be orthogonal to the parent * documents: the wrapped child query must never * return a parent document.

* *

See {@link org.apache.lucene.search.join} for an * overview.

* * @lucene.experimental */ public class ToParentBlockJoinQuery extends Query { private final BitSetProducer parentsFilter; private final Query childQuery; private final ScoreMode scoreMode; /** Create a ToParentBlockJoinQuery. * * @param childQuery Query matching child documents. * @param parentsFilter Filter identifying the parent documents. * @param scoreMode How to aggregate multiple child scores * into a single parent score. **/ public ToParentBlockJoinQuery(Query childQuery, BitSetProducer parentsFilter, ScoreMode scoreMode) { super(); this.childQuery = childQuery; this.parentsFilter = parentsFilter; this.scoreMode = scoreMode; } @Override public void visit(QueryVisitor visitor) { visitor.visitLeaf(this); } @Override public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode weightScoreMode, float boost) throws IOException { ScoreMode childScoreMode = weightScoreMode.needsScores() ? scoreMode : ScoreMode.None; final Weight childWeight; if (childScoreMode == ScoreMode.None) { // we don't need to compute a score for the child query so we wrap // it under a constant score query that can early terminate if the // minimum score is greater than 0 and the total hits that match the // query is not requested. childWeight = searcher.rewrite(new ConstantScoreQuery(childQuery)).createWeight(searcher, weightScoreMode, 0f); } else { // if the score is needed we force the collection mode to COMPLETE because the child query cannot skip // non-competitive documents. childWeight = childQuery.createWeight(searcher, weightScoreMode.needsScores() ? COMPLETE : weightScoreMode, boost); } return new BlockJoinWeight(this, childWeight, parentsFilter, childScoreMode); } /** Return our child query. */ public Query getChildQuery() { return childQuery; } private static class BlockJoinWeight extends FilterWeight { private final BitSetProducer parentsFilter; private final ScoreMode scoreMode; public BlockJoinWeight(Query joinQuery, Weight childWeight, BitSetProducer parentsFilter, ScoreMode scoreMode) { super(joinQuery, childWeight); this.parentsFilter = parentsFilter; this.scoreMode = scoreMode; } @Override public Scorer scorer(LeafReaderContext context) throws IOException { final ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } return scorerSupplier.get(Long.MAX_VALUE); } // NOTE: acceptDocs applies (and is checked) only in the // parent document space @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { final ScorerSupplier childScorerSupplier = in.scorerSupplier(context); if (childScorerSupplier == null) { return null; } // NOTE: this does not take accept docs into account, the responsibility // to not match deleted docs is on the scorer final BitSet parents = parentsFilter.getBitSet(context); if (parents == null) { // No matches return null; } return new ScorerSupplier() { @Override public Scorer get(long leadCost) throws IOException { return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(leadCost), parents, scoreMode); } @Override public long cost() { return childScorerSupplier.cost(); } }; } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { BlockJoinScorer scorer = (BlockJoinScorer) scorer(context); if (scorer != null && scorer.iterator().advance(doc) == doc) { return scorer.explain(context, in); } return Explanation.noMatch("Not a match"); } @Override public Matches matches(LeafReaderContext context, int doc) throws IOException { // The default implementation would delegate to the joinQuery's Weight, which // matches on children. We need to match on the parent instead Scorer scorer = scorer(context); if (scorer == null) { return null; } final TwoPhaseIterator twoPhase = scorer.twoPhaseIterator(); if (twoPhase == null) { if (scorer.iterator().advance(doc) != doc) { return null; } } else { if (twoPhase.approximation().advance(doc) != doc || twoPhase.matches() == false) { return null; } } return MatchesUtils.MATCH_WITH_NO_TERMS; } } private static class ParentApproximation extends DocIdSetIterator { private final DocIdSetIterator childApproximation; private final BitSet parentBits; private int doc = -1; ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) { this.childApproximation = childApproximation; this.parentBits = parentBits; } @Override public int docID() { return doc; } @Override public int nextDoc() throws IOException { return advance(doc + 1); } @Override public int advance(int target) throws IOException { if (target >= parentBits.length()) { return doc = NO_MORE_DOCS; } final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1; int childDoc = childApproximation.docID(); if (childDoc < firstChildTarget) { childDoc = childApproximation.advance(firstChildTarget); } if (childDoc >= parentBits.length() - 1) { return doc = NO_MORE_DOCS; } return doc = parentBits.nextSetBit(childDoc + 1); } @Override public long cost() { return childApproximation.cost(); } } private static class ParentTwoPhase extends TwoPhaseIterator { private final ParentApproximation parentApproximation; private final DocIdSetIterator childApproximation; private final TwoPhaseIterator childTwoPhase; ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) { super(parentApproximation); this.parentApproximation = parentApproximation; this.childApproximation = childTwoPhase.approximation(); this.childTwoPhase = childTwoPhase; } @Override public boolean matches() throws IOException { assert childApproximation.docID() < parentApproximation.docID(); do { if (childTwoPhase.matches()) { return true; } } while (childApproximation.nextDoc() < parentApproximation.docID()); return false; } @Override public float matchCost() { // TODO: how could we compute a match cost? return childTwoPhase.matchCost() + 10; } } static class BlockJoinScorer extends Scorer { private final Scorer childScorer; private final BitSet parentBits; private final ScoreMode scoreMode; private final DocIdSetIterator childApproximation; private final TwoPhaseIterator childTwoPhase; private final ParentApproximation parentApproximation; private final ParentTwoPhase parentTwoPhase; private float score; public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) { super(weight); //System.out.println("Q.init firstChildDoc=" + firstChildDoc); this.parentBits = parentBits; this.childScorer = childScorer; this.scoreMode = scoreMode; childTwoPhase = childScorer.twoPhaseIterator(); if (childTwoPhase == null) { childApproximation = childScorer.iterator(); parentApproximation = new ParentApproximation(childApproximation, parentBits); parentTwoPhase = null; } else { childApproximation = childTwoPhase.approximation(); parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits); parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase); } } @Override public Collection getChildren() { return Collections.singleton(new ChildScorable(childScorer, "BLOCK_JOIN")); } @Override public DocIdSetIterator iterator() { if (parentTwoPhase == null) { // the approximation is exact return parentApproximation; } else { return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase); } } @Override public TwoPhaseIterator twoPhaseIterator() { return parentTwoPhase; } @Override public int docID() { return parentApproximation.docID(); } @Override public float score() throws IOException { setScoreAndFreq(); return score; } @Override public float getMaxScore(int upTo) throws IOException { if (scoreMode == ScoreMode.None) { return childScorer.getMaxScore(upTo); } return Float.POSITIVE_INFINITY; } @Override public void setMinCompetitiveScore(float minScore) throws IOException { if (scoreMode == ScoreMode.None) { childScorer.setMinCompetitiveScore(minScore); } } private void setScoreAndFreq() throws IOException { if (childApproximation.docID() >= parentApproximation.docID()) { return; } double score = scoreMode == ScoreMode.None ? 0 : childScorer.score(); int freq = 1; while (childApproximation.nextDoc() < parentApproximation.docID()) { if (childTwoPhase == null || childTwoPhase.matches()) { final float childScore = scoreMode == ScoreMode.None ? 0 : childScorer.score(); freq += 1; switch (scoreMode) { case Total: case Avg: score += childScore; break; case Min: score = Math.min(score, childScore); break; case Max: score = Math.max(score, childScore); break; case None: break; default: throw new AssertionError(); } } } if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) { throw new IllegalStateException("Child query must not match same docs with parent filter. " + "Combine them as must clauses (+) to find a problem doc. " + "docId=" + parentApproximation.docID() + ", " + childScorer.getClass()); } if (scoreMode == ScoreMode.Avg) { score /= freq; } this.score = (float) score; } public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException { int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1); int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc Explanation bestChild = null; int matches = 0; for (int childDoc = start; childDoc <= end; childDoc++) { Explanation child = childWeight.explain(context, childDoc - context.docBase); if (child.isMatch()) { matches++; if (bestChild == null || child.getValue().floatValue() > bestChild.getValue().floatValue()) { bestChild = child; } } } return Explanation.match(score(), String.format(Locale.ROOT, "Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild ); } } @Override public Query rewrite(IndexReader reader) throws IOException { final Query childRewrite = childQuery.rewrite(reader); if (childRewrite != childQuery) { return new ToParentBlockJoinQuery(childRewrite, parentsFilter, scoreMode); } else { return super.rewrite(reader); } } @Override public String toString(String field) { return "ToParentBlockJoinQuery ("+childQuery.toString()+")"; } @Override public boolean equals(Object other) { return sameClassAs(other) && equalsTo(getClass().cast(other)); } private boolean equalsTo(ToParentBlockJoinQuery other) { return childQuery.equals(other.childQuery) && parentsFilter.equals(other.parentsFilter) && scoreMode == other.scoreMode; } @Override public int hashCode() { final int prime = 31; int hash = classHash(); hash = prime * hash + childQuery.hashCode(); hash = prime * hash + scoreMode.hashCode(); hash = prime * hash + parentsFilter.hashCode(); return hash; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy