com.yahoo.search.predicate.PredicateIndex Maven / Gradle / Ivy
The newest version!
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.predicate;
import com.yahoo.api.annotations.Beta;
import com.yahoo.document.predicate.Predicate;
import com.yahoo.search.predicate.index.*;
import com.yahoo.search.predicate.index.conjunction.ConjunctionHit;
import com.yahoo.search.predicate.index.conjunction.ConjunctionIndex;
import com.yahoo.search.predicate.serialization.SerializationHelper;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Stream;
/**
* An index of {@link Predicate} objects.
*
* Use a {@link PredicateQuery} to find the ids of documents that have matching Predicates.
* Create an instance of {@link PredicateIndex} using the {@link PredicateIndexBuilder}.
*
* To build a {@link PredicateQuery} you add features and rangeFeatures with a 64-bit
* bitmap specifying which subqueries they appear in.
*
* To perform a search, create a {@link Searcher} and call its {@link Searcher#search(PredicateQuery)}
* method, which returns a stream of {@link Hit} objects,
* each of which contains a document id and a 64-bit bitmap specifying which subqueries the hit is for.
*
* Note that the {@link PredicateIndex} is thread-safe, but a {@link Searcher} is not.
* Each thread must use its own searcher.
*
* @author Magnar Nedland
* @author bjorncs
*/
@Beta
public class PredicateIndex {
private static final int SERIALIZATION_FORMAT_VERSION = 3;
private final PredicateRangeTermExpander expander;
private final int[] internalToExternalIdMapping;
private final byte[] minFeatureIndex;
private final short[] intervalEnds;
private final int highestIntervalEnd;
private final SimpleIndex intervalIndex;
private final SimpleIndex boundsIndex;
private final SimpleIndex conjunctionIntervalIndex;
private final PredicateIntervalStore intervalStore;
private final ConjunctionIndex conjunctionIndex;
private final int[] zeroConstraintDocuments;
private final Config config;
private final AtomicReference postingListCounter;
/**
* Package private as the index should be constructed using {@link PredicateIndexBuilder}.
*/
PredicateIndex(
Config config,
int[] internalToExternalIdMapping,
byte[] minFeatureIndex,
short[] intervalEnds,
int highestIntervalEnd,
SimpleIndex intervalIndex,
SimpleIndex boundsIndex,
SimpleIndex conjunctionIntervalIndex,
PredicateIntervalStore intervalStore,
ConjunctionIndex conjunctionIndex,
int[] zeroConstraintDocuments) {
this.internalToExternalIdMapping = internalToExternalIdMapping;
this.minFeatureIndex = minFeatureIndex;
this.intervalEnds = intervalEnds;
this.highestIntervalEnd = highestIntervalEnd;
this.intervalIndex = intervalIndex;
this.boundsIndex = boundsIndex;
this.conjunctionIntervalIndex = conjunctionIntervalIndex;
this.intervalStore = intervalStore;
this.conjunctionIndex = conjunctionIndex;
this.zeroConstraintDocuments = zeroConstraintDocuments;
this.expander = new PredicateRangeTermExpander(config.arity, config.lowerBound, config.upperBound);
this.config = config;
this.postingListCounter = new AtomicReference<>(new CachedPostingListCounter(internalToExternalIdMapping.length));
}
public void rebuildPostingListCache() {
postingListCounter.getAndUpdate(CachedPostingListCounter::rebuildCache);
}
/**
* Create a new searcher.
*/
public Searcher searcher() {
return new Searcher();
}
public void writeToOutputStream(DataOutputStream out) throws IOException {
out.writeInt(SERIALIZATION_FORMAT_VERSION);
config.writeToOutputStream(out);
SerializationHelper.writeIntArray(internalToExternalIdMapping, out);
SerializationHelper.writeByteArray(minFeatureIndex, out);
SerializationHelper.writeShortArray(intervalEnds, out);
out.writeInt(highestIntervalEnd);
SerializationHelper.writeIntArray(zeroConstraintDocuments, out);
intervalIndex.writeToOutputStream(out);
boundsIndex.writeToOutputStream(out);
conjunctionIntervalIndex.writeToOutputStream(out);
intervalStore.writeToOutputStream(out);
conjunctionIndex.writeToOutputStream(out);
}
public static PredicateIndex fromInputStream(DataInputStream in) throws IOException {
int version = in.readInt();
if (version != SERIALIZATION_FORMAT_VERSION) {
throw new IllegalArgumentException(String.format(
"Invalid serialization format version. Expected %d, was %d.", SERIALIZATION_FORMAT_VERSION, version));
}
Config config = Config.fromInputStream(in);
int[] internalToExternalIdMapping = SerializationHelper.readIntArray(in);
byte[] minFeatureIndex = SerializationHelper.readByteArray(in);
short[] intervalEnds = SerializationHelper.readShortArray(in);
int highestIntervalEnd = in.readInt();
int[] zeroConstraintDocuments = SerializationHelper.readIntArray(in);
SimpleIndex intervalIndex = SimpleIndex.fromInputStream(in);
SimpleIndex boundsIndex = SimpleIndex.fromInputStream(in);
SimpleIndex conjunctionIntervalIndex = SimpleIndex.fromInputStream(in);
PredicateIntervalStore intervalStore = PredicateIntervalStore.fromInputStream(in);
ConjunctionIndex conjunctionIndex = ConjunctionIndex.fromInputStream(in);
return new PredicateIndex(
config,
internalToExternalIdMapping,
minFeatureIndex,
intervalEnds,
highestIntervalEnd,
intervalIndex,
boundsIndex,
conjunctionIntervalIndex,
intervalStore,
conjunctionIndex,
zeroConstraintDocuments
);
}
@Beta
public class Searcher {
private final byte[] nPostingListsForDocument;
private final ConjunctionIndex.Searcher conjunctionIndexSearcher;
private Searcher() {
this.nPostingListsForDocument = new byte[internalToExternalIdMapping.length];
this.conjunctionIndexSearcher = conjunctionIndex.searcher();
}
/**
* Retrieves a stream of hits for the given query.
*
* @param query Specifies the boolean variables that are true.
* @return A stream of hits.
*/
public Stream search(PredicateQuery query) {
ArrayList postingLists = new ArrayList<>();
for (PredicateQuery.Feature feature : query.getFeatures()) {
addIntervalPostingList(feature.featureHash, feature.subqueryBitmap, postingLists);
}
for (PredicateQuery.RangeFeature feature : query.getRangeFeatures()) {
expander.expand(
feature.key,
feature.value,
featureHash -> addIntervalPostingList(featureHash, feature.subqueryBitmap, postingLists),
(featureHash, value) -> addBoundsPostingList(featureHash, value, feature.subqueryBitmap, postingLists));
}
addCompressedZStarPostingList(postingLists);
addConjunctionPostingLists(query, postingLists);
addZeroConstraintPostingList(postingLists);
CachedPostingListCounter counter = postingListCounter.get();
counter.registerUsage(postingLists);
counter.countPostingListsPerDocument(postingLists, nPostingListsForDocument);
return new PredicateSearch(
postingLists, nPostingListsForDocument, minFeatureIndex, intervalEnds, highestIntervalEnd).stream()
// Map to external id. Note that internal id for first document is 1.
.map(hit -> new Hit(internalToExternalIdMapping[hit.getDocId()], hit.getSubquery()));
}
private void addCompressedZStarPostingList(List postingLists) {
SimpleIndex.Entry e = intervalIndex.getPostingList(Feature.Z_STAR_COMPRESSED_ATTRIBUTE_HASH);
if (e != null) {
postingLists.add(new ZstarCompressedPostingList(intervalStore, e.docIds, e.dataRefs));
}
}
private void addBoundsPostingList(
long featureHash, int value, long subqueryBitMap, List postingLists) {
SimpleIndex.Entry e = boundsIndex.getPostingList(featureHash);
if (e != null) {
postingLists.add(new BoundsPostingList(intervalStore, e.docIds, e.dataRefs, subqueryBitMap, value));
}
}
private void addIntervalPostingList(long featureHash, long subqueryBitMap, List postingLists) {
SimpleIndex.Entry e = intervalIndex.getPostingList(featureHash);
if (e != null) {
postingLists.add(new IntervalPostingList(intervalStore, e.docIds, e.dataRefs, subqueryBitMap));
}
}
private void addConjunctionPostingLists(PredicateQuery query, List postingLists) {
List hits = conjunctionIndexSearcher.search(query);
for (ConjunctionHit hit : hits) {
SimpleIndex.Entry e = conjunctionIntervalIndex.getPostingList(hit.conjunctionId);
if (e != null) {
postingLists.add(new IntervalPostingList(intervalStore, e.docIds, e.dataRefs, hit.subqueryBitmap));
}
}
}
private void addZeroConstraintPostingList(ArrayList postingLists) {
if (zeroConstraintDocuments.length > 0) {
postingLists.add(new ZeroConstraintPostingList(zeroConstraintDocuments));
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy