org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :distribution:archives:integ-test-zip
There is a newer version: 8.14.0
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.search.aggregations.bucket.histogram;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.Rounding;
import org.elasticsearch.common.Rounding.DateTimeUnit;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.AdaptingAggregator;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.CardinalityUpperBound;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator;
import org.elasticsearch.search.aggregations.bucket.range.InternalDateRange;
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator;
import org.elasticsearch.search.aggregations.bucket.range.RangeAggregatorSupplier;
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.BiConsumer;

/**
 * Aggregator for {@code date_histogram} that rounds values using
 * {@link Rounding}. See {@link FromDateRange} which also aggregates for
 * {@code date_histogram} but does so by running a {@code range} aggregation
 * over the date and transforming the results. In general
 * {@link FromDateRange} is faster than {@link DateHistogramAggregator}
 * but {@linkplain DateHistogramAggregator} works when we can't precalculate
 * all of the {@link Rounding.Prepared#fixedRoundingPoints() fixed rounding points}.
 */
class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator {
    /**
     * Build an {@link Aggregator} for a {@code date_histogram} aggregation.
     * If we can determine the bucket boundaries from
     * {@link Rounding.Prepared#fixedRoundingPoints()} we use
     * {@link RangeAggregator} to do the actual collecting, otherwise we use
     * an specialized {@link DateHistogramAggregator Aggregator} specifically
     * for the {@code date_histogram}s. We prefer to delegate to the
     * {@linkplain RangeAggregator} because it can sometimes be further
     * optimized into a {@link FiltersAggregator}. Even when it can't be
     * optimized, it is going to be marginally faster and consume less memory
     * than the {@linkplain DateHistogramAggregator} because it doesn't need
     * to the round points and because it can pass precise cardinality
     * estimates to its child aggregations.
     */
    public static Aggregator build(
        String name,
        AggregatorFactories factories,
        Rounding rounding,
        BucketOrder order,
        boolean keyed,
        long minDocCount,
        @Nullable LongBounds extendedBounds,
        @Nullable LongBounds hardBounds,
        ValuesSourceConfig valuesSourceConfig,
        AggregationContext context,
        Aggregator parent,
        CardinalityUpperBound cardinality,
        Map metadata
    ) throws IOException {
        Rounding.Prepared preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding);
        Aggregator asRange = adaptIntoRangeOrNull(
            name,
            factories,
            rounding,
            preparedRounding,
            order,
            keyed,
            minDocCount,
            extendedBounds,
            hardBounds,
            valuesSourceConfig,
            context,
            parent,
            cardinality,
            metadata
        );
        if (asRange != null) {
            return asRange;
        }
        return new DateHistogramAggregator(
            name,
            factories,
            rounding,
            preparedRounding,
            order,
            keyed,
            minDocCount,
            extendedBounds,
            hardBounds,
            valuesSourceConfig,
            context,
            parent,
            cardinality,
            metadata
        );
    }

    private static FromDateRange adaptIntoRangeOrNull(
        String name,
        AggregatorFactories factories,
        Rounding rounding,
        Rounding.Prepared preparedRounding,
        BucketOrder order,
        boolean keyed,
        long minDocCount,
        @Nullable LongBounds extendedBounds,
        @Nullable LongBounds hardBounds,
        ValuesSourceConfig valuesSourceConfig,
        AggregationContext context,
        Aggregator parent,
        CardinalityUpperBound cardinality,
        Map metadata
    ) throws IOException {
        long[] fixedRoundingPoints = preparedRounding.fixedRoundingPoints();
        if (fixedRoundingPoints == null) {
            return null;
        }
        // Range aggs use a double to aggregate and we don't want to lose precision.
        long min = fixedRoundingPoints[0];
        long max = fixedRoundingPoints[fixedRoundingPoints.length - 1];
        if (min < -RangeAggregator.MAX_ACCURATE_BOUND || min > RangeAggregator.MAX_ACCURATE_BOUND) {
            return null;
        }
        if (max < -RangeAggregator.MAX_ACCURATE_BOUND || max > RangeAggregator.MAX_ACCURATE_BOUND) {
            return null;
        }
        RangeAggregatorSupplier rangeSupplier = context.getValuesSourceRegistry()
            .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig);
        if (rangeSupplier == null) {
            return null;
        }
        RangeAggregator.Range[] ranges = ranges(hardBounds, fixedRoundingPoints);
        return new DateHistogramAggregator.FromDateRange(
            parent,
            factories,
            subAggregators -> rangeSupplier.build(
                name,
                subAggregators,
                valuesSourceConfig,
                InternalDateRange.FACTORY,
                ranges,
                false,
                context,
                parent,
                cardinality,
                metadata
            ),
            valuesSourceConfig.format(),
            rounding,
            preparedRounding,
            order,
            minDocCount,
            extendedBounds,
            keyed,
            fixedRoundingPoints
        );
    }

    private static RangeAggregator.Range[] ranges(LongBounds hardBounds, long[] fixedRoundingPoints) {
        if (hardBounds == null) {
            RangeAggregator.Range[] ranges = new RangeAggregator.Range[fixedRoundingPoints.length];
            for (int i = 0; i < fixedRoundingPoints.length - 1; i++) {
                ranges[i] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1]);
            }
            ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null);
            return ranges;
        }
        List ranges = new ArrayList<>(fixedRoundingPoints.length);
        for (int i = 0; i < fixedRoundingPoints.length - 1; i++) {
            if (hardBounds.contain(fixedRoundingPoints[i])) {
                ranges.add(new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1]));
            }
        }
        if (hardBounds.contain(fixedRoundingPoints[fixedRoundingPoints.length - 1])) {
            ranges.add(new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null));
        }
        return ranges.toArray(new RangeAggregator.Range[0]);
    }

    private final ValuesSource.Numeric valuesSource;
    private final DocValueFormat formatter;
    private final Rounding rounding;
    /**
     * The rounding prepared for rewriting the data in the shard.
     */
    private final Rounding.Prepared preparedRounding;
    private final BucketOrder order;
    private final boolean keyed;

    private final long minDocCount;
    private final LongBounds extendedBounds;
    private final LongBounds hardBounds;

    private final LongKeyedBucketOrds bucketOrds;

    DateHistogramAggregator(
        String name,
        AggregatorFactories factories,
        Rounding rounding,
        Rounding.Prepared preparedRounding,
        BucketOrder order,
        boolean keyed,
        long minDocCount,
        @Nullable LongBounds extendedBounds,
        @Nullable LongBounds hardBounds,
        ValuesSourceConfig valuesSourceConfig,
        AggregationContext context,
        Aggregator parent,
        CardinalityUpperBound cardinality,
        Map metadata
    ) throws IOException {
        super(name, factories, context, parent, CardinalityUpperBound.MANY, metadata);
        this.rounding = rounding;
        this.preparedRounding = preparedRounding;
        this.order = order;
        order.validate(this);
        this.keyed = keyed;
        this.minDocCount = minDocCount;
        this.extendedBounds = extendedBounds;
        this.hardBounds = hardBounds;
        // TODO: Stop using null here
        this.valuesSource = valuesSourceConfig.hasValues() ? (ValuesSource.Numeric) valuesSourceConfig.getValuesSource() : null;
        this.formatter = valuesSourceConfig.format();

        bucketOrds = LongKeyedBucketOrds.build(bigArrays(), cardinality);
    }

    @Override
    public ScoreMode scoreMode() {
        if (valuesSource != null && valuesSource.needsScores()) {
            return ScoreMode.COMPLETE;
        }
        return super.scoreMode();
    }

    @Override
    public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
        if (valuesSource == null) {
            return LeafBucketCollector.NO_OP_COLLECTOR;
        }
        SortedNumericDocValues values = valuesSource.longValues(ctx);
        return new LeafBucketCollectorBase(sub, values) {
            @Override
            public void collect(int doc, long owningBucketOrd) throws IOException {
                if (values.advanceExact(doc)) {
                    int valuesCount = values.docValueCount();

                    long previousRounded = Long.MIN_VALUE;
                    for (int i = 0; i < valuesCount; ++i) {
                        long value = values.nextValue();
                        long rounded = preparedRounding.round(value);
                        assert rounded >= previousRounded;
                        if (rounded == previousRounded) {
                            continue;
                        }
                        if (hardBounds == null || hardBounds.contain(rounded)) {
                            long bucketOrd = bucketOrds.add(owningBucketOrd, rounded);
                            if (bucketOrd < 0) { // already seen
                                bucketOrd = -1 - bucketOrd;
                                collectExistingBucket(sub, doc, bucketOrd);
                            } else {
                                collectBucket(sub, doc, bucketOrd);
                            }
                        }
                        previousRounded = rounded;
                    }
                }
            }
        };
    }

    @Override
    public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
        return buildAggregationsForVariableBuckets(
            owningBucketOrds,
            bucketOrds,
            (bucketValue, docCount, subAggregationResults) -> {
                return new InternalDateHistogram.Bucket(bucketValue, docCount, keyed, formatter, subAggregationResults);
            },
            (owningBucketOrd, buckets) -> {
                // the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order
                CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator());

                InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
                    ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
                    : null;
                return new InternalDateHistogram(
                    name,
                    buckets,
                    order,
                    minDocCount,
                    rounding.offset(),
                    emptyBucketInfo,
                    formatter,
                    keyed,
                    metadata()
                );
            }
        );
    }

    @Override
    public InternalAggregation buildEmptyAggregation() {
        InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
            ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
            : null;
        return new InternalDateHistogram(
            name,
            Collections.emptyList(),
            order,
            minDocCount,
            rounding.offset(),
            emptyBucketInfo,
            formatter,
            keyed,
            metadata()
        );
    }

    @Override
    public void doClose() {
        Releasables.close(bucketOrds);
    }

    @Override
    public void collectDebugInfo(BiConsumer add) {
        add.accept("total_buckets", bucketOrds.size());
    }

    /**
     * Returns the size of the bucket in specified units.
     *
     * If unitSize is null, returns 1.0
     */
    @Override
    public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) {
        if (unitSize != null) {
            return preparedRounding.roundingSize(bucketOrds.get(bucket), unitSize);
        } else {
            return 1.0;
        }
    }

    @Override
    public double bucketSize(Rounding.DateTimeUnit unitSize) {
        if (unitSize != null) {
            return preparedRounding.roundingSize(unitSize);
        } else {
            return 1.0;
        }
    }

    static class FromDateRange extends AdaptingAggregator implements SizedBucketAggregator {
        private final DocValueFormat format;
        private final Rounding rounding;
        private final Rounding.Prepared preparedRounding;
        private final BucketOrder order;
        private final long minDocCount;
        private final LongBounds extendedBounds;
        private final boolean keyed;
        private final long[] fixedRoundingPoints;

        FromDateRange(
            Aggregator parent,
            AggregatorFactories subAggregators,
            CheckedFunction delegate,
            DocValueFormat format,
            Rounding rounding,
            Rounding.Prepared preparedRounding,
            BucketOrder order,
            long minDocCount,
            LongBounds extendedBounds,
            boolean keyed,
            long[] fixedRoundingPoints
        ) throws IOException {
            super(parent, subAggregators, delegate);
            this.format = format;
            this.rounding = rounding;
            this.preparedRounding = preparedRounding;
            this.order = order;
            order.validate(this);
            this.minDocCount = minDocCount;
            this.extendedBounds = extendedBounds;
            this.keyed = keyed;
            this.fixedRoundingPoints = fixedRoundingPoints;
        }

        @Override
        protected InternalAggregation adapt(InternalAggregation delegateResult) {
            InternalDateRange range = (InternalDateRange) delegateResult;
            List buckets = new ArrayList<>(range.getBuckets().size());
            // This code below was converted from a regular for loop to a stream forEach to avoid
            // JDK-8285835. It needs to stay in this form until we upgrade our JDK distribution to
            // pick up a fix for the compiler crash.
            range.getBuckets().stream().forEach(rangeBucket -> {
                if (rangeBucket.getDocCount() > 0) {
                    buckets.add(
                        new InternalDateHistogram.Bucket(
                            rangeBucket.getFrom().toInstant().toEpochMilli(),
                            rangeBucket.getDocCount(),
                            keyed,
                            format,
                            rangeBucket.getAggregations()
                        )
                    );
                }
            });

            CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator());

            InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0
                ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds)
                : null;
            return new InternalDateHistogram(
                range.getName(),
                buckets,
                order,
                minDocCount,
                rounding.offset(),
                emptyBucketInfo,
                format,
                keyed,
                range.getMetadata()
            );
        }

        public final InternalAggregations buildEmptySubAggregations() {
            List aggs = new ArrayList<>();
            for (Aggregator aggregator : subAggregators()) {
                aggs.add(aggregator.buildEmptyAggregation());
            }
            return InternalAggregations.from(aggs);
        }

        @Override
        public double bucketSize(long bucket, DateTimeUnit unitSize) {
            if (unitSize != null) {
                long startPoint = bucket < fixedRoundingPoints.length ? fixedRoundingPoints[(int) bucket] : Long.MIN_VALUE;
                return preparedRounding.roundingSize(startPoint, unitSize);
            } else {
                return 1.0;
            }
        }

        @Override
        public double bucketSize(DateTimeUnit unitSize) {
            if (unitSize != null) {
                return preparedRounding.roundingSize(unitSize);
            } else {
                return 1.0;
            }
        }
    }
}