All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.percolator.PercolatorMatchedSlotSubFetchPhase Maven / Gradle / Ivy

/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.percolator;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.elasticsearch.Version;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhaseProcessor;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.elasticsearch.percolator.PercolatorHighlightSubFetchPhase.locatePercolatorQuery;

/**
 * Adds a special field to a percolator query hit to indicate which documents matched with the percolator query.
 * This is useful when multiple documents are being percolated in a single request.
 */
final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {

    static final String FIELD_NAME_PREFIX = "_percolator_document_slot";

    @Override
    public FetchSubPhaseProcessor getProcessor(FetchContext fetchContext) throws IOException {

        List percolateContexts = new ArrayList<>();
        List percolateQueries = locatePercolatorQuery(fetchContext.query());
        boolean singlePercolateQuery = percolateQueries.size() == 1;
        for (PercolateQuery pq : percolateQueries) {
            percolateContexts.add(new PercolateContext(pq, singlePercolateQuery));
        }
        if (percolateContexts.isEmpty()) {
            return null;
        }

        return new FetchSubPhaseProcessor() {

            LeafReaderContext ctx;

            @Override
            public void setNextReader(LeafReaderContext readerContext) {
                this.ctx = readerContext;
            }

            @Override
            public void process(HitContext hitContext) throws IOException {
                for (PercolateContext pc : percolateContexts) {
                    String fieldName = pc.fieldName();
                    Query query = pc.percolateQuery.getQueryStore().getQueries(ctx).apply(hitContext.docId());
                    if (query == null) {
                        // This is not a document with a percolator field.
                        continue;
                    }
                    query = pc.filterNestedDocs(query);
                    IndexSearcher percolatorIndexSearcher = pc.percolateQuery.getPercolatorIndexSearcher();
                    int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
                    TopDocs topDocs = percolatorIndexSearcher.search(query, memoryIndexMaxDoc, new Sort(SortField.FIELD_DOC));
                    if (topDocs.totalHits.value == 0) {
                        // This hit didn't match with a percolate query,
                        // likely to happen when percolating multiple documents
                        continue;
                    }

                    IntStream slots = convertTopDocsToSlots(topDocs, pc.rootDocsBySlot);
                    // _percolator_document_slot fields are document fields and should be under "fields" section in a hit
                    hitContext.hit().setDocumentField(fieldName, new DocumentField(fieldName, slots.boxed().collect(Collectors.toList())));
                }
            }
        };
    }

    static class PercolateContext {
        final PercolateQuery percolateQuery;
        final boolean singlePercolateQuery;
        final int[] rootDocsBySlot;

        PercolateContext(PercolateQuery pq, boolean singlePercolateQuery) throws IOException {
            this.percolateQuery = pq;
            this.singlePercolateQuery = singlePercolateQuery;
            IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
            Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(Version.CURRENT));
            Weight weight = percolatorIndexSearcher.createWeight(nonNestedFilter, ScoreMode.COMPLETE_NO_SCORES, 1f);
            Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
            int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
            BitSet rootDocs = BitSet.of(s.iterator(), memoryIndexMaxDoc);
            boolean hasNestedDocs = rootDocs.cardinality() != percolatorIndexSearcher.getIndexReader().numDocs();
            if (hasNestedDocs) {
                this.rootDocsBySlot = buildRootDocsSlots(rootDocs);
            } else {
                this.rootDocsBySlot = null;
            }
        }

        String fieldName() {
            return singlePercolateQuery ? FIELD_NAME_PREFIX : FIELD_NAME_PREFIX + "_" + percolateQuery.getName();
        }

        Query filterNestedDocs(Query in) {
            if (rootDocsBySlot != null) {
                // Ensures that we filter out nested documents
                return new BooleanQuery.Builder().add(in, BooleanClause.Occur.MUST)
                    .add(Queries.newNonNestedFilter(Version.CURRENT), BooleanClause.Occur.FILTER)
                    .build();
            }
            return in;
        }
    }

    static IntStream convertTopDocsToSlots(TopDocs topDocs, int[] rootDocsBySlot) {
        IntStream stream = Arrays.stream(topDocs.scoreDocs).mapToInt(scoreDoc -> scoreDoc.doc);
        if (rootDocsBySlot != null) {
            stream = stream.map(docId -> Arrays.binarySearch(rootDocsBySlot, docId));
        }
        return stream;
    }

    static int[] buildRootDocsSlots(BitSet rootDocs) {
        int slot = 0;
        int[] rootDocsBySlot = new int[rootDocs.cardinality()];
        BitSetIterator iterator = new BitSetIterator(rootDocs, 0);
        for (int rootDocId = iterator.nextDoc(); rootDocId != NO_MORE_DOCS; rootDocId = iterator.nextDoc()) {
            rootDocsBySlot[slot++] = rootDocId;
        }
        return rootDocsBySlot;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy