All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.index.sasi.utils.RangeIterator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.index.sasi.utils;

import java.io.Closeable;
import java.util.Comparator;
import java.util.List;
import java.util.PriorityQueue;

import com.google.common.annotations.VisibleForTesting;

public abstract class RangeIterator, T extends CombinedValue> extends AbstractIterator implements Closeable
{
    private final K min, max;
    private final long count;
    private K current;

    protected RangeIterator(Builder.Statistics statistics)
    {
        this(statistics.min, statistics.max, statistics.tokenCount);
    }

    public RangeIterator(RangeIterator range)
    {
        this(range == null ? null : range.min, range == null ? null : range.max, range == null ? -1 : range.count);
    }

    public RangeIterator(K min, K max, long count)
    {
        if (min == null || max == null || count == 0)
            assert min == null && max == null && (count == 0 || count == -1);

        this.min = min;
        this.current = min;
        this.max = max;
        this.count = count;
    }

    public final K getMinimum()
    {
        return min;
    }

    public final K getCurrent()
    {
        return current;
    }

    public final K getMaximum()
    {
        return max;
    }

    public final long getCount()
    {
        return count;
    }

    /**
     * When called, this iterators current position should
     * be skipped forwards until finding either:
     *   1) an element equal to or bigger than next
     *   2) the end of the iterator
     *
     * @param nextToken value to skip the iterator forward until matching
     *
     * @return The next current token after the skip was performed
     */
    public final T skipTo(K nextToken)
    {
        if (min == null || max == null)
            return endOfData();

        if (current.compareTo(nextToken) >= 0)
            return next == null ? recomputeNext() : next;

        if (max.compareTo(nextToken) < 0)
            return endOfData();

        performSkipTo(nextToken);
        return recomputeNext();
    }

    protected abstract void performSkipTo(K nextToken);

    protected T recomputeNext()
    {
        return tryToComputeNext() ? peek() : endOfData();
    }

    protected boolean tryToComputeNext()
    {
        boolean hasNext = super.tryToComputeNext();
        current = hasNext ? next.get() : getMaximum();
        return hasNext;
    }

    public static abstract class Builder, D extends CombinedValue>
    {
        public enum IteratorType
        {
            UNION, INTERSECTION
        }

        @VisibleForTesting
        protected final Statistics statistics;

        @VisibleForTesting
        protected final PriorityQueue> ranges;

        public Builder(IteratorType type)
        {
            statistics = new Statistics<>(type);
            ranges = new PriorityQueue<>(16, (Comparator>) (a, b) -> a.getCurrent().compareTo(b.getCurrent()));
        }

        public K getMinimum()
        {
            return statistics.min;
        }

        public K getMaximum()
        {
            return statistics.max;
        }

        public long getTokenCount()
        {
            return statistics.tokenCount;
        }

        public int rangeCount()
        {
            return ranges.size();
        }

        public Builder add(RangeIterator range)
        {
            if (range == null)
                return this;

            if (range.getCount() > 0)
                ranges.add(range);
            statistics.update(range);

            return this;
        }

        public Builder add(List> ranges)
        {
            if (ranges == null || ranges.isEmpty())
                return this;

            ranges.forEach(this::add);
            return this;
        }

        public final RangeIterator build()
        {
            if (rangeCount() == 0)
                return new EmptyRangeIterator<>();
            else
                return buildIterator();
        }

        public static class EmptyRangeIterator, D extends CombinedValue> extends RangeIterator
        {
            EmptyRangeIterator() { super(null, null, 0); }
            public D computeNext() { return endOfData(); }
            protected void performSkipTo(K nextToken) { }
            public void close() { }
        }

        protected abstract RangeIterator buildIterator();

        public static class Statistics, D extends CombinedValue>
        {
            protected final IteratorType iteratorType;

            protected K min, max;
            protected long tokenCount;

            // iterator with the least number of items
            protected RangeIterator minRange;
            // iterator with the most number of items
            protected RangeIterator maxRange;

            // tracks if all of the added ranges overlap, which is useful in case of intersection,
            // as it gives direct answer as to such iterator is going to produce any results.
            private boolean isOverlapping = true;

            public Statistics(IteratorType iteratorType)
            {
                this.iteratorType = iteratorType;
            }

            /**
             * Update statistics information with the given range.
             *
             * Updates min/max of the combined range, token count and
             * tracks range with the least/most number of tokens.
             *
             * @param range The range to update statistics with.
             */
            public void update(RangeIterator range)
            {
                switch (iteratorType)
                {
                    case UNION:
                        min = nullSafeMin(min, range.getMinimum());
                        max = nullSafeMax(max, range.getMaximum());
                        break;

                    case INTERSECTION:
                        // minimum of the intersection is the biggest minimum of individual iterators
                        min = nullSafeMax(min, range.getMinimum());
                        // maximum of the intersection is the smallest maximum of individual iterators
                        max = nullSafeMin(max, range.getMaximum());
                        break;

                    default:
                        throw new IllegalStateException("Unknown iterator type: " + iteratorType);
                }

                // check if new range is disjoint with already added ranges, which means that this intersection
                // is not going to produce any results, so we can cleanup range storage and never added anything to it.
                isOverlapping &= isOverlapping(min, max, range);

                minRange = minRange == null ? range : min(minRange, range);
                maxRange = maxRange == null ? range : max(maxRange, range);

                tokenCount += range.getCount();
            }

            private RangeIterator min(RangeIterator a, RangeIterator b)
            {
                return a.getCount() > b.getCount() ? b : a;
            }

            private RangeIterator max(RangeIterator a, RangeIterator b)
            {
                return a.getCount() > b.getCount() ? a : b;
            }

            public boolean isDisjoint()
            {
                return !isOverlapping;
            }

            public double sizeRatio()
            {
                return minRange.getCount() * 1d / maxRange.getCount();
            }
        }
    }

    @VisibleForTesting
    protected static , D extends CombinedValue> boolean isOverlapping(RangeIterator a, RangeIterator b)
    {
        return isOverlapping(a.getCurrent(), a.getMaximum(), b);
    }

    /**
     * Ranges are overlapping the following cases:
     *
     *   * When they have a common subrange:
     *
     *   min       b.current      max          b.max
     *   +---------|--------------+------------|
     *
     *   b.current      min       max          b.max
     *   |--------------+---------+------------|
     *
     *   min        b.current     b.max        max
     *   +----------|-------------|------------+
     *
     *
     *  If either range is empty, they're disjoint.
     */
    @VisibleForTesting
    protected static , D extends CombinedValue> boolean isOverlapping(K min, K max, RangeIterator b)
    {
        return (min != null && max != null) &&
               b.getCount() != 0 &&
               (min.compareTo(b.getMaximum()) <= 0 && b.getCurrent().compareTo(max) <= 0);
    }

    @SuppressWarnings("unchecked")
    private static  T nullSafeMin(T a, T b)
    {
        if (a == null) return b;
        if (b == null) return a;

        return a.compareTo(b) > 0 ? b : a;
    }

    @SuppressWarnings("unchecked")
    private static  T nullSafeMax(T a, T b)
    {
        if (a == null) return b;
        if (b == null) return a;

        return a.compareTo(b) > 0 ? a : b;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy