All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.search.predicate.index.conjunction.ConjunctionIndex Maven / Gradle / Ivy

There is a newer version: 8.458.13
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.predicate.index.conjunction;

import com.yahoo.document.predicate.FeatureConjunction;
import com.yahoo.search.predicate.PredicateQuery;
import com.yahoo.search.predicate.SubqueryBitmap;
import com.yahoo.search.predicate.serialization.SerializationHelper;
import com.yahoo.search.predicate.utils.PrimitiveArraySorter;
import org.eclipse.collections.api.map.primitive.IntObjectMap;
import org.eclipse.collections.api.map.primitive.LongObjectMap;
import org.eclipse.collections.api.tuple.primitive.IntObjectPair;
import org.eclipse.collections.api.tuple.primitive.LongObjectPair;
import org.eclipse.collections.impl.map.mutable.primitive.IntObjectHashMap;
import org.eclipse.collections.impl.map.mutable.primitive.LongObjectHashMap;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;

/**
 * A searchable index of conjunctions (see {@link FeatureConjunction} / {@link IndexableFeatureConjunction}).
 * Implements the algorithm described in the paper
 * Indexing Boolean Expressions.
 *
 * @author Magnar Nedland
 * @author bjorncs
 */
public class ConjunctionIndex {

    // A map from K value to FeatureIndex
    private final IntObjectMap kIndex;
    private final int[] zList;
    private final long[] idMapping;

    public ConjunctionIndex(IntObjectMap kIndex, int[] zList, long[] idMapping) {
        this.kIndex = kIndex;
        this.zList = zList;
        this.idMapping = idMapping;
    }

    public Searcher searcher() {
        return new Searcher();
    }

    public void writeToOutputStream(DataOutputStream out) throws IOException {
        SerializationHelper.writeIntArray(zList, out);
        SerializationHelper.writeLongArray(idMapping, out);
        out.writeInt(kIndex.size());
        for (IntObjectPair p : kIndex.keyValuesView()) {
            out.writeInt(p.getOne());
            p.getTwo().writeToOutputStream(out);
        }
    }

    public static ConjunctionIndex fromInputStream(DataInputStream in) throws IOException {
        int[] zList = SerializationHelper.readIntArray(in);
        long[] idMapping = SerializationHelper.readLongArray(in);
        int kIndexSize = in.readInt();
        IntObjectHashMap kIndex = new IntObjectHashMap<>(kIndexSize);
        for (int i = 0; i < kIndexSize; i++) {
            int key = in.readInt();
            kIndex.put(key, FeatureIndex.fromInputStream(in));
        }
        kIndex.compact();
        return new ConjunctionIndex(kIndex, zList, idMapping);
    }

    public static class FeatureIndex {
        // Maps a feature id to conjunction id
        private final LongObjectMap map;

        public FeatureIndex(LongObjectMap map) {
            this.map = map;
        }

        public Optional getConjunctionIdsForFeature(long featureId) {
            return Optional.ofNullable(map.get(featureId));
        }

        public void writeToOutputStream(DataOutputStream out) throws IOException {
            out.writeInt(map.size());
            for (LongObjectPair p : map.keyValuesView()) {
                out.writeLong(p.getOne());
                SerializationHelper.writeIntArray(p.getTwo(), out);
            }
        }

        public static FeatureIndex fromInputStream(DataInputStream in) throws IOException {
            int mapSize = in.readInt();
            LongObjectHashMap map = new LongObjectHashMap<>(mapSize);
            for (int i = 0; i < mapSize; i++) {
                long key = in.readLong();
                map.put(key, SerializationHelper.readIntArray(in));
            }
            map.compact();
            return new FeatureIndex(map);
        }
    }

    public class Searcher {
        private final byte[] iteratorsPerConjunction;

        private Searcher() {
            this.iteratorsPerConjunction = new byte[idMapping.length];
        }

        /**
         * Retrieves a list of hits for the given query.
         *
         * @param query Specifies the boolean variables that are true.
         * @return List of hits
         */
        public List search(PredicateQuery query) {
            List conjunctionHits = new ArrayList<>();
            int uniqueKeys = (int) query.getFeatures().stream().map(e -> e.key).distinct().count();
            for (int k = uniqueKeys; k >= 0; k--) {
                List iterators = new ArrayList<>();
                getFeatureIndex(k)
                        .ifPresent(featureIndex -> addFeatureIterators(query, featureIndex, iterators));
                if (k == 0 && zList.length > 0) {
                    iterators.add(new ConjunctionIdIterator(SubqueryBitmap.ALL_SUBQUERIES, zList));
                }
                if (!iterators.isEmpty()) {
                    calculateIteratorsPerConjunction(iterators);
                    findMatchingConjunctions(k, iterators, conjunctionHits, iteratorsPerConjunction);
                }
            }
            return conjunctionHits;
        }

        private void calculateIteratorsPerConjunction(List iterators) {
            Arrays.fill(iteratorsPerConjunction, (byte)0);
            for (ConjunctionIdIterator iterator : iterators) {
                for (int id : iterator.getConjunctionIds()) {
                    if (ConjunctionId.isPositive(id)) {
                        ++iteratorsPerConjunction[id >>> 1];
                    }
                }
            }
        }

        private Optional getFeatureIndex(int k) {
            return Optional.ofNullable(kIndex.get(k));
        }

        private void addFeatureIterators(PredicateQuery query, FeatureIndex featureIndex, List iterators) {
            query.getFeatures().stream()
                    .map(e -> toSingleTermIterator(e, featureIndex))
                    .filter(Optional::isPresent)
                    .map(Optional::get)
                    .forEach(iterators::add);
        }

        private Optional toSingleTermIterator(PredicateQuery.Feature feature, FeatureIndex featureIndex) {
            return featureIndex.getConjunctionIdsForFeature(feature.featureHash)
                    .map(conjunctions -> new ConjunctionIdIterator(feature.subqueryBitmap, conjunctions));
        }

        private void findMatchingConjunctions(int k, List iterators, List matchingIds, byte[] iteratorsPerConjunction) {
            if (k == 0) {
                k = 1;
            }
            int nextId = getNextId(0, k, iteratorsPerConjunction);
            if (nextId == -1) {
                return; // no hits
            }

            int nIterators = iterators.size();
            if (nIterators < k) {
                return; // No hits
            }
            short[] sortedIndexes = new short[nIterators];
            short[] sortedIndexesMergeBuffer = new short[nIterators];
            for (short i = 0; i < nIterators; ++i) {
                sortedIndexes[i] = i;
            }

            int[] currentIds = new int[nIterators];
            int nCompleted = initializeIterators(iterators, sortedIndexes, currentIds, nextId);
            nIterators -= nCompleted;

            while (nIterators >= k) {
                int id0 = currentIds[sortedIndexes[0]];
                int idK = currentIds[sortedIndexes[k - 1]];

                // There should be at least k iterators for conjunction.
                if (ConjunctionId.equals(id0, idK)) {
                    long matchingSubqueries = SubqueryBitmap.ALL_SUBQUERIES;
                    // Find first positive conjunction
                    int firstPositive = 0;
                    while (firstPositive < nIterators && !ConjunctionId.isPositive(currentIds[sortedIndexes[firstPositive]])) {
                        // AND in the complement of the bitmap for negative conjunctions.
                        matchingSubqueries &= ~iterators.get(sortedIndexes[firstPositive]).getSubqueryBitmap();
                        ++firstPositive;
                    }
                    if (firstPositive + k <= nIterators) {
                        // Verify that at there are k positive iterators for the current conjunction.
                        id0 = currentIds[sortedIndexes[firstPositive]];
                        idK = currentIds[sortedIndexes[firstPositive + k - 1]];
                        if (id0 == idK) { // We know that id0 is positive conjunction
                            for (int i = firstPositive; i < firstPositive + k; i++) {
                                matchingSubqueries &= iterators.get(sortedIndexes[i]).getSubqueryBitmap();
                            }
                            if (matchingSubqueries != 0) {
                                matchingIds.add(new ConjunctionHit(toExternalId(id0), matchingSubqueries));
                            }
                        }
                    }
                }

                // Advance iterators to next conjunction.
                nextId = getNextId(ConjunctionId.nextId(id0), k, iteratorsPerConjunction);
                if (nextId == -1) {
                    return;
                }
                int completed = 0;
                int i;
                for (i = 0; i < nIterators; ++i) {
                    short index = sortedIndexes[i];
                    if (ConjunctionId.compare(currentIds[index], nextId) < 0) {
                        ConjunctionIdIterator iterator = iterators.get(index);
                        if (iterator.next(nextId)) {
                            currentIds[index] = iterator.getConjunctionId();
                        } else {
                            currentIds[index] = Integer.MAX_VALUE;
                            ++completed;
                        }
                    } else {
                        break;
                    }
                }
                if (i > 0 && nIterators - completed >= k) {
                    boolean swapMergeBuffer =
                            PrimitiveArraySorter.sortAndMerge(sortedIndexes, sortedIndexesMergeBuffer, i, nIterators,
                                    (a, b) -> Integer.compare(currentIds[a], currentIds[b]));
                    if (swapMergeBuffer) {
                        short[] temp = sortedIndexes;
                        sortedIndexes = sortedIndexesMergeBuffer;
                        sortedIndexesMergeBuffer = temp;
                    }
                }
                nIterators -= completed;
            }
        }

        private int initializeIterators(List iterators, short[] sortedIndexes, int[] currentIds, int nextId) {
            int nCompleted = 0;
            int nIterators = iterators.size();
            for (int i = 0; i < nIterators; i++) {
                ConjunctionIdIterator iterator = iterators.get(i);
                if (iterator.next(nextId)) {
                    currentIds[i] = iterator.getConjunctionId();
                } else {
                    currentIds[i] = Integer.MAX_VALUE;
                    ++nCompleted;

                }
            }
            PrimitiveArraySorter.sort(sortedIndexes, (a, b) -> Integer.compare(currentIds[a], currentIds[b]));
            return nCompleted;
        }

        private int getNextId(int fromId, int k, byte[] iteratorsPerConjunction) {
            int id = fromId >>> 1;
            int nDocuments = iteratorsPerConjunction.length;
            while (id < nDocuments && iteratorsPerConjunction[id] < k) {
                ++id;
            }
            return id == nDocuments ? -1 : ((id << 1) | 1);

        }

        private long toExternalId(int internalId) {
            return idMapping[internalId >>> 1];
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy