org.elasticsearch.search.facet.terms.strings.TermsStringOrdinalsFacetExecutor Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of elasticsearch
ElasticSearch - Open Source, Distributed, RESTful Search Engine
There is a newer version: 8.14.1
/*
 * Licensed to ElasticSearch and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.search.facet.terms.strings;

import com.google.common.collect.ImmutableSet;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.collect.BoundedTreeSet;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs.Iter;
import org.elasticsearch.search.facet.FacetExecutor;
import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 *
 */
public class TermsStringOrdinalsFacetExecutor extends FacetExecutor {

    private final IndexFieldData.WithOrdinals indexFieldData;

    private final TermsFacet.ComparatorType comparatorType;
    private final int size;
    private final int minCount;
    private final ImmutableSet excluded;
    private final Matcher matcher;
    final int ordinalsCacheAbove;

    final List aggregators;
    long missing;
    long total;

    public TermsStringOrdinalsFacetExecutor(IndexFieldData.WithOrdinals indexFieldData, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context,
                                            ImmutableSet excluded, Pattern pattern, int ordinalsCacheAbove) {
        this.indexFieldData = indexFieldData;
        this.size = size;
        this.comparatorType = comparatorType;
        this.ordinalsCacheAbove = ordinalsCacheAbove;

        if (excluded == null || excluded.isEmpty()) {
            this.excluded = null;
        } else {
            this.excluded = excluded;
        }
        this.matcher = pattern != null ? pattern.matcher("") : null;

        // minCount is offset by -1
        if (allTerms) {
            minCount = -1;
        } else {
            minCount = 0;
        }

        this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size());
    }

    @Override
    public Collector collector() {
        return new Collector();
    }

    @Override
    public InternalFacet buildFacet(String facetName) {
        final CharsRef spare = new CharsRef();
        AggregatorPriorityQueue queue = new AggregatorPriorityQueue(aggregators.size());
        for (ReaderAggregator aggregator : aggregators) {
            if (aggregator.nextPosition()) {
                queue.add(aggregator);
            }
        }

        // YACK, we repeat the same logic, but once with an optimizer priority queue for smaller sizes
        if (size < EntryPriorityQueue.LIMIT) {
            // optimize to use priority size
            EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator());

            while (queue.size() > 0) {
                ReaderAggregator agg = queue.top();
                BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we get around this?)
                int count = 0;
                do {
                    count += agg.counts[agg.position];
                    if (agg.nextPosition()) {
                        agg = queue.updateTop();
                    } else {
                        // we are done with this reader
                        queue.pop();
                        agg = queue.top();
                    }
                } while (agg != null && value.equals(agg.current));

                if (count > minCount) {
                    if (excluded != null && excluded.contains(value)) {
                        continue;
                    }
                    if (matcher != null) {
                        UnicodeUtil.UTF8toUTF16(value, spare);
                        assert spare.toString().equals(value.utf8ToString());
                        if (!matcher.reset(spare).matches()) {
                            continue;
                        }
                    }
                    InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count);
                    ordered.insertWithOverflow(entry);
                }
            }
            InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()];
            for (int i = ordered.size() - 1; i >= 0; i--) {
                list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop();
            }

            for (ReaderAggregator aggregator : aggregators) {
                if (aggregator.counts.length > ordinalsCacheAbove) {
                    CacheRecycler.pushIntArray(aggregator.counts);
                }
            }

            return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total);
        }

        BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size);

        while (queue.size() > 0) {
            ReaderAggregator agg = queue.top();
            BytesRef value = agg.values.makeSafe(agg.current); // we need to makeSafe it, since we end up pushing it... (can we work around that?)
            int count = 0;
            do {
                count += agg.counts[agg.position];
                if (agg.nextPosition()) {
                    agg = queue.updateTop();
                } else {
                    // we are done with this reader
                    queue.pop();
                    agg = queue.top();
                }
            } while (agg != null && value.equals(agg.current));

            if (count > minCount) {
                if (excluded != null && excluded.contains(value)) {
                    continue;
                }
                if (matcher != null) {
                    UnicodeUtil.UTF8toUTF16(value, spare);
                    assert spare.toString().equals(value.utf8ToString());
                    if (!matcher.reset(spare).matches()) {
                        continue;
                    }
                }
                InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count);
                ordered.add(entry);
            }
        }


        for (ReaderAggregator aggregator : aggregators) {
            if (aggregator.counts.length > ordinalsCacheAbove) {
                CacheRecycler.pushIntArray(aggregator.counts);
            }
        }

        return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total);
    }

    class Collector extends FacetExecutor.Collector {

        private long missing;
        private long total;
        private BytesValues.WithOrdinals values;
        private ReaderAggregator current;
        private Ordinals.Docs ordinals;

        @Override
        public void setNextReader(AtomicReaderContext context) throws IOException {
            if (current != null) {
                missing += current.counts[0];
                total += current.total - current.counts[0];
                if (current.values.ordinals().getNumOrds() > 0) {
                    aggregators.add(current);
                }
            }
            values = indexFieldData.load(context).getBytesValues();
            current = new ReaderAggregator(values, ordinalsCacheAbove);
            ordinals = values.ordinals();
        }

        @Override
        public void collect(int doc) throws IOException {
            Iter iter = ordinals.getIter(doc);
            int ord = iter.next();
            current.onOrdinal(doc, ord);
            while((ord = iter.next()) != 0) {
                current.onOrdinal(doc, ord);
            }
        }

        @Override
        public void postCollection() {
            if (current != null) {
                missing += current.counts[0];
                total += current.total - current.counts[0];
                // if we have values for this one, add it
                if (current.values.ordinals().getNumOrds() > 0) {
                    aggregators.add(current);
                }
                current = null;
            }
            TermsStringOrdinalsFacetExecutor.this.missing = missing;
            TermsStringOrdinalsFacetExecutor.this.total = total;
        }
    }

    public static final class ReaderAggregator {

        final BytesValues.WithOrdinals values;
        final int[] counts;

        int position = 0;
        BytesRef current;
        int total;
        private final int maxOrd;

        public ReaderAggregator(BytesValues.WithOrdinals values, int ordinalsCacheLimit) {
            this.values = values;
            this.maxOrd = values.ordinals().getMaxOrd();

            if (maxOrd > ordinalsCacheLimit) {
                this.counts = CacheRecycler.popIntArray(maxOrd);
            } else {
                this.counts = new int[maxOrd];
            }
        }

        final void onOrdinal(int docId, int ordinal) {
            counts[ordinal]++;
            total++;
        }

        public boolean nextPosition() {
            if (++position >= maxOrd) {
                return false;
            }
            current = values.getValueByOrd(position);
            return true;
        }
    }

    public static class AggregatorPriorityQueue extends PriorityQueue {

        public AggregatorPriorityQueue(int size) {
            super(size);
        }

        @Override
        protected boolean lessThan(ReaderAggregator a, ReaderAggregator b) {
            return a.current.compareTo(b.current) < 0;
        }
    }
}