All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.utils.HistogramBuilder Maven / Gradle / Ivy

Go to download

The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.

There is a newer version: 5.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.utils;

import java.util.Arrays;

/**
 * Simple class for constructing an EsimtatedHistogram from a set of predetermined values
 */
public class HistogramBuilder
{

    public static final long[] EMPTY_LONG_ARRAY = new long[]{};
    public static final long[] ZERO = new long[]{ 0 };

    public HistogramBuilder() {}
    public HistogramBuilder(long[] values)
    {
        for (long value : values)
        {
            add(value);
        }
    }

    private long[] values = new long[10];
    int count = 0;

    public void add(long value)
    {
        if (count == values.length)
            values = Arrays.copyOf(values, values.length << 1);
        values[count++] = value;
    }

    /**
     * See {@link #buildWithStdevRangesAroundMean(int)}
     * @return buildWithStdevRangesAroundMean(3)
     */
    public EstimatedHistogram buildWithStdevRangesAroundMean()
    {
        return buildWithStdevRangesAroundMean(3);
    }

    /**
     * Calculate the min, mean, max and standard deviation of the items in the builder, and
     * generate an EstimatedHistogram with upto maxdev stdev size ranges  either
     * side of the mean, until min/max are hit; if either min/max are not reached a further range is
     * inserted at the relevant ends. e.g., with a maxdevs of 3, there may be up to 8 ranges
     * (between 9 boundaries, the middle being the mean); the middle 6 will have the same size (stdev)
     * with the outermost two stretching out to min and max.
     *
     * @param maxdevs
     * @return
     */
    public EstimatedHistogram buildWithStdevRangesAroundMean(int maxdevs)
    {
        if (maxdevs < 0)
            throw new IllegalArgumentException("maxdevs must be greater than or equal to zero");

        final int count = this.count;
        final long[] values = this.values;

        if (count == 0)
            return new EstimatedHistogram(EMPTY_LONG_ARRAY, ZERO);

        long min = Long.MAX_VALUE, max = Long.MIN_VALUE;
        double sum = 0, sumsq = 0;
        for (int i = 0 ; i < count ; i++)
        {
            final long value = values[i];
            sum += value;
            sumsq += value * value;
            if (value < min)
                min = value;
            if (value > max)
                max = value;
        }

        final long mean = Math.round(sum / count);
        final double stdev =
                Math.sqrt((sumsq / count) - (mean * (double) mean));

        // build the ranges either side of the mean
        final long[] lowhalf = buildRange(mean, min, true, stdev, maxdevs);
        final long[] highhalf = buildRange(mean, max, false, stdev, maxdevs);

        // combine the ranges
        final long[] ranges = new long[lowhalf.length + highhalf.length + 1];
        System.arraycopy(lowhalf, 0, ranges, 0, lowhalf.length);
        ranges[lowhalf.length] = mean;
        System.arraycopy(highhalf, 0, ranges, lowhalf.length + 1, highhalf.length);

        final EstimatedHistogram hist = new EstimatedHistogram(ranges, new long[ranges.length + 1]);
        for (int i = 0 ; i < count ; i++)
            hist.add(values[i]);
        return hist;
    }

    private static long[] buildRange(long mean, long minormax, boolean ismin, double stdev, int maxdevs)
    {
        if (minormax == mean)
            // minormax == mean we have no range to produce, but given the exclusive starts
            // that begin at zero by default (or -Inf) in EstimatedHistogram we have to generate a min range
            // to indicate where we start from
            return ismin ? new long[] { mean - 1 } : EMPTY_LONG_ARRAY;

        if (stdev < 1)
        {
            // deal with stdevs too small to generate sensible ranges
            return ismin ? new long[] { minormax - 1, mean - 1 } :
                           new long[] { minormax };
        }

        long larger, smaller;
        if (ismin) { larger = mean;     smaller = minormax; }
        else       { larger = minormax; smaller = mean;     }

        double stdevsTo = (larger - smaller) / stdev;
        if (stdevsTo > 0 && stdevsTo < 1)
            // always round up if there's just one non-empty range
            stdevsTo = 1;
        else
            // otherwise round to the nearest half stdev, to avoid tiny ranges at the start/end
            stdevsTo = Math.round(stdevsTo);

        // limit to 4 stdev ranges - last range will contain everything to boundary
        final int len = Math.min(maxdevs + 1, (int) stdevsTo);
        final long[] range = new long[len];
        long next = ismin ? minormax - 1 : minormax;
        for (int i = 0 ; i < range.length ; i++)
        {
            long delta = (range.length - (i + 1)) * (long) stdev;
            if (ismin)
            {
                range[i] = next;
                next = mean - delta;
            }
            else
            {
                range[len - 1 - i] = next;
                next = mean + delta;
            }
        }
        return range;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy