
org.elasticsearch.percolator.PercolateQuery Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.percolator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.core.CheckedFunction;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.Set;
final class PercolateQuery extends Query implements Accountable {
// cost of matching the query against the document, arbitrary as it would be really complex to estimate
private static final float MATCH_COST = 1000;
private final String name;
private final QueryStore queryStore;
private final List documents;
private final Query candidateMatchesQuery;
private final Query verifiedMatchesQuery;
private final IndexSearcher percolatorIndexSearcher;
private final Query nonNestedDocsFilter;
PercolateQuery(
String name,
QueryStore queryStore,
List documents,
Query candidateMatchesQuery,
IndexSearcher percolatorIndexSearcher,
Query nonNestedDocsFilter,
Query verifiedMatchesQuery
) {
this.name = name;
this.documents = Objects.requireNonNull(documents);
this.candidateMatchesQuery = Objects.requireNonNull(candidateMatchesQuery);
this.queryStore = Objects.requireNonNull(queryStore);
this.percolatorIndexSearcher = Objects.requireNonNull(percolatorIndexSearcher);
this.nonNestedDocsFilter = nonNestedDocsFilter;
this.verifiedMatchesQuery = Objects.requireNonNull(verifiedMatchesQuery);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = candidateMatchesQuery.rewrite(reader);
if (rewritten != candidateMatchesQuery) {
return new PercolateQuery(
name,
queryStore,
documents,
rewritten,
percolatorIndexSearcher,
nonNestedDocsFilter,
verifiedMatchesQuery
);
} else {
return this;
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
return new Weight(this) {
@Override
public void extractTerms(Set set) {}
@Override
public Explanation explain(LeafReaderContext leafReaderContext, int docId) throws IOException {
Scorer scorer = scorer(leafReaderContext);
if (scorer != null) {
TwoPhaseIterator twoPhaseIterator = scorer.twoPhaseIterator();
int result = twoPhaseIterator.approximation().advance(docId);
if (result == docId) {
if (twoPhaseIterator.matches()) {
if (scoreMode.needsScores()) {
CheckedFunction percolatorQueries = queryStore.getQueries(leafReaderContext);
Query query = percolatorQueries.apply(docId);
Explanation detail = percolatorIndexSearcher.explain(query, 0);
return Explanation.match(scorer.score(), "PercolateQuery", detail);
} else {
return Explanation.match(scorer.score(), "PercolateQuery");
}
}
}
}
return Explanation.noMatch("PercolateQuery");
}
@Override
public Scorer scorer(LeafReaderContext leafReaderContext) throws IOException {
final Scorer approximation = candidateMatchesWeight.scorer(leafReaderContext);
if (approximation == null) {
return null;
}
final CheckedFunction percolatorQueries = queryStore.getQueries(leafReaderContext);
if (scoreMode.needsScores()) {
return new BaseScorer(this, approximation) {
float score;
@Override
boolean matchDocId(int docId) throws IOException {
Query query = percolatorQueries.apply(docId);
if (query != null) {
if (nonNestedDocsFilter != null) {
query = new BooleanQuery.Builder().add(query, Occur.MUST)
.add(nonNestedDocsFilter, Occur.FILTER)
.build();
}
TopDocs topDocs = percolatorIndexSearcher.search(query, 1);
if (topDocs.scoreDocs.length > 0) {
score = topDocs.scoreDocs[0].score;
return true;
} else {
return false;
}
} else {
return false;
}
}
@Override
public float score() throws IOException {
return score;
}
};
} else {
ScorerSupplier verifiedDocsScorer = verifiedMatchesWeight.scorerSupplier(leafReaderContext);
Bits verifiedDocsBits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), verifiedDocsScorer);
return new BaseScorer(this, approximation) {
@Override
public float score() throws IOException {
return 0f;
}
boolean matchDocId(int docId) throws IOException {
// We use the verifiedDocsBits to skip the expensive MemoryIndex verification.
// If docId also appears in the verifiedDocsBits then that means during indexing
// we were able to extract all query terms and for this candidate match
// and we determined based on the nature of the query that it is safe to skip
// the MemoryIndex verification.
if (verifiedDocsBits.get(docId)) {
return true;
}
Query query = percolatorQueries.apply(docId);
if (query == null) {
return false;
}
if (nonNestedDocsFilter != null) {
query = new BooleanQuery.Builder().add(query, Occur.MUST).add(nonNestedDocsFilter, Occur.FILTER).build();
}
return Lucene.exists(percolatorIndexSearcher, query);
}
};
}
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
// This query uses a significant amount of memory, let's never
// cache it or compound queries that wrap it.
return false;
}
};
}
String getName() {
return name;
}
IndexSearcher getPercolatorIndexSearcher() {
return percolatorIndexSearcher;
}
boolean excludesNestedDocs() {
return nonNestedDocsFilter != null;
}
List getDocuments() {
return documents;
}
QueryStore getQueryStore() {
return queryStore;
}
Query getCandidateMatchesQuery() {
return candidateMatchesQuery;
}
Query getVerifiedMatchesQuery() {
return verifiedMatchesQuery;
}
// Comparing identity here to avoid being cached
// Note that in theory if the same instance gets used multiple times it could still get cached,
// however since we create a new query instance each time we this query this shouldn't happen and thus
// this risk neglectable.
@Override
public boolean equals(Object o) {
return this == o;
}
// Computing hashcode based on identity to avoid caching.
@Override
public int hashCode() {
return System.identityHashCode(this);
}
@Override
public String toString(String s) {
StringBuilder sources = new StringBuilder();
for (BytesReference document : documents) {
sources.append(document.utf8ToString());
sources.append('\n');
}
return "PercolateQuery{document_sources={" + sources + "},inner={" + candidateMatchesQuery.toString(s) + "}}";
}
@Override
public long ramBytesUsed() {
long ramUsed = 0L;
for (BytesReference document : documents) {
ramUsed += document.ramBytesUsed();
}
return ramUsed;
}
@FunctionalInterface
interface QueryStore {
CheckedFunction getQueries(LeafReaderContext ctx) throws IOException;
}
abstract static class BaseScorer extends Scorer {
final Scorer approximation;
BaseScorer(Weight weight, Scorer approximation) {
super(weight);
this.approximation = approximation;
}
@Override
public final DocIdSetIterator iterator() {
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
}
@Override
public final TwoPhaseIterator twoPhaseIterator() {
return new TwoPhaseIterator(approximation.iterator()) {
@Override
public boolean matches() throws IOException {
return matchDocId(approximation.docID());
}
@Override
public float matchCost() {
return MATCH_COST;
}
};
}
@Override
public final int docID() {
return approximation.docID();
}
abstract boolean matchDocId(int docId) throws IOException;
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.MAX_VALUE;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy