All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.threerings.parlor.rating.util.Percentiler Maven / Gradle / Ivy

The newest version!
//
// $Id$
//
// Vilya library - tools for developing networked games
// Copyright (C) 2002-2012 Three Rings Design, Inc., All Rights Reserved
// http://code.google.com/p/vilya/
//
// This library is free software; you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation; either version 2.1 of the License, or
// (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package com.threerings.parlor.rating.util;

import java.io.PrintWriter;

import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.nio.LongBuffer;

import com.samskivert.util.StringUtil;

import static com.threerings.parlor.Log.log;

/**
 * Used to keep track of the percentile distribution of positive values (generally puzzle scores).
 */
public class Percentiler
{
    /**
     * Creates an empty percentiler.
     */
    public Percentiler ()
    {
    }

    /**
     * Creates a percentiler that expects values to fall within the given range.
     */
    public Percentiler (int min, int max)
    {
        _min = min;
        _max = max;
        _fixedRange = true;
    }

    /**
     * Creates a percentiler from its serialized representation.
     */
    public Percentiler (byte[] data)
    {
        ByteBuffer in = ByteBuffer.wrap(data);

        // read our int data
        IntBuffer iin = in.asIntBuffer();
        _max = iin.get();
        iin.get(_counts);
        in.position(iin.position() * INT_SIZE);

        // read our long data
        LongBuffer lin = in.asLongBuffer();
        _snapTotal = (_total = lin.get());
        in.position(iin.position() * INT_SIZE + lin.position() * 2 * INT_SIZE);

        // read our min value (which was added afterwards and must do some jockeying to maintain
        // backwards compatibility)
        if (in.position() == in.limit()) {
            _min = 0; // legacy
        } else {
            _min = in.asIntBuffer().get();
        }

        // Un-break percentilers that have been stored with bogus data
        if (_max < _min) {
            log.warning("Percentiler initialized with bogus range. Coping.",
                "min", _min, "max", _max);
            _max = _min + 1;
        }

        // compute our percentiles
        recomputePercentiles();
    }

    /**
     * Records a value, updating the histogram but not the percentiles (a call to {@link
     * #recomputePercentiles} is required for that and is sufficiently expensive that it shouldn't
     * be done every time a value is added).
     */
    public void recordValue (float value)
    {
        recordValue(value, true);
    }

    /**
     * See {@link #recordValue(float)}.
     */
    public void recordValue (float value, boolean logNewMax)
    {
        // if this is the first value ever recorded; note our min and max
        if (_total == 0 && !_fixedRange) {
            _min = (int)Math.floor(value);
            _max = Math.max((int)Math.ceil(value), _min + 1);
        }

        // if this value is outside our bounds, we need to redistribute our buckets
        if (value < _min || value > _max) {
            if (_fixedRange) {
                log.warning("Recording value outside of initially fixed range",
                    "min", _min, "max", _max, "value", value);
                _fixedRange = false;
            }

            // expand by 20% in the direction of either our new minimum or new maximum
            int newmin = (value < _min) ? (_max - (int)Math.ceil((_max - value) * 1.2f)) : _min;
            int newmax = (value > _max) ? (_min + (int)Math.ceil((value - _min) * 1.2f)) : _max;

            if (newmin > _min || newmax < _max) {
                log.warning("Grew our range in crazy ways?!", "value", value, "total", _total,
                            "new", ("" + newmin + ":" + newmax), "old", ("" + _min + ":" + _max));
            }

            if (logNewMax) {
                log.info("Resizing", "value", value, "total", _total,
                         "new", ("" + newmin + ":" + newmax), "old", ("" + _min + ":" + _max));
            }

            // create a new counts array and map the old array to the new
            float ndelta = (newmax - newmin) / (float)BUCKET_COUNT;
            float odelta = (_max - _min) / (float)BUCKET_COUNT;
            int[] counts = new int[BUCKET_COUNT];

            for (int ii = 0; ii < BUCKET_COUNT; ii++) {
                // determine the first new bucket that contains some or all of the old bucket
                float obot = _min + odelta * ii;
                int newidx = Math.min(BUCKET_COUNT-1, (int)Math.floor((obot - newmin) / ndelta));

                // compute how much of this bucket (if any) spills over into the next bucket
                float newoff = (float)Math.IEEEremainder(obot - newmin, ndelta);
                float nextfrac = (newoff + odelta) - ndelta;

                // now put this bucket's contents into either one or two new buckets
                if (nextfrac <= 0 || newidx == BUCKET_COUNT-1) {
                    counts[newidx] += _counts[ii];
                } else {
                    int next = Math.round(_counts[ii] * nextfrac / odelta);
                    counts[newidx] += (_counts[ii] - next);
                    counts[newidx+1] += next;
                }
            }

            // put the remapped histogram into place
            _min = newmin;
            _max = newmax;
            _counts = counts;

            // force a recalculation
            _nextRecomp = 0;
        }

        // increment the bucket associated with this value
        _counts[toBucketIndex(value)]++;
        _total++;

        // see if it's time to recompute
        if (_nextRecomp-- <= 0) {
            recomputePercentiles();
            // recompute again when we've grown by 5%
            _nextRecomp = (int)(_total/20);
        }
    }

    /**
     * Returns the total number of values ever recorded to this percentiler.
     */
    public long getRecordedCount ()
    {
        return _total;
    }

    /**
     * Returns true if thsi percentiler has been modified since it was created or since the last
     * call to {@link #clearModified}.
     */
    public boolean isModified ()
    {
        return (_total != _snapTotal);
    }

    /**
     * Clears this percentiler's "is modified" state.
     */
    public void clearModified ()
    {
        _snapTotal = _total;
    }

    /**
     * Returns the percent of all numbers seen that are lower than the specified value. This value
     * can range from zero to 100 (100 in the case where this is the highest value ever seen by
     * this percentiler). This value reflects the percentiles computed as of the most recent call
     * to {@link #recomputePercentiles}.
     */
    public int getPercentile (float value)
    {
        if (value < _min) {
            return 0;
        } else if (value > _max) {
            return 100;
        } else {
            return _percentile[toBucketIndex(value)];
        }
    }

    /**
     * Returns the score necessary to attain the specified percentile.  This value reflects the
     * percentiles computed as of the most recent call to {@link #recomputePercentiles}.
     *
     * @param percentile the desired percentile (from 0 to 99 inclusive).
     */
    public float getRequiredScore (int percentile)
    {
        percentile = Math.max(0, Math.min(99, percentile)); // bound this!
        return _reverse[percentile] * ((float)(_max - _min) / BUCKET_COUNT) + _min;
    }

    /**
     * Returns the largest score seen by this percentiler.
     */
    public int getMaxScore ()
    {
        return _max;
    }

    /**
     * Returns the smallest score seen by this percentiler.
     */
    public int getMinScore ()
    {
        return _min;
    }

    /**
     * Returns the scores required to obtain a percentile rating from 0 to 99.
     */
    public float[] getRequiredScores ()
    {
        float[] scores = new float[100];
        for (int ii = 0; ii < 100; ii++) {
            scores[ii] = getRequiredScore(ii);
        }
        return scores;
    }

    /**
     * Returns the counts for each bucket.
     */
    public int[] getCounts ()
    {
        return _counts.clone();
    }

    /**
     * Recomputes the percentile cutoffs based on the values recorded since the last percentile
     * computation.
     */
    public void recomputePercentiles ()
    {
        // compute the forward mapping (score to percentile)
        long accum = 0;
        for (int ii = 0; ii < BUCKET_COUNT-1; ii++) {
            accum += _counts[ii];
            _percentile[ii+1] = (_total == 0) ? 50 : (byte)(accum*100/_total);
        }

        // compute the reverse mapping (percentile to minimum score)
        for (int ii = 0, pp = 0; ii < BUCKET_COUNT; ii++) {
            // scan forward to the percentile bucket that maps to this percentile
            while (_percentile[pp] < ii && pp < (BUCKET_COUNT-1)) {
                pp++;
            }
            _reverse[ii] = (byte)pp;
        }
    }

    /**
     * Converts this percentiler to a byte array so that it may be stored into a database.
     */
    public byte[] toBytes ()
    {
        byte[] data = new byte[(BUCKET_COUNT+4) * INT_SIZE];
        ByteBuffer out = ByteBuffer.wrap(data);

        // write our int data
        IntBuffer iout = out.asIntBuffer();
        iout.put(_max);
        iout.put(_counts);
        out.position(iout.position() * INT_SIZE);

        // write our long data
        LongBuffer lout = out.asLongBuffer();
        lout.put(_total);
        out.position(iout.position() * INT_SIZE + lout.position() * 2 * INT_SIZE);

        // write our min value (added later so we can't write it above like we wish we could)
        out.asIntBuffer().put(_min);

        return data;
    }

    @Override
    public String toString ()
    {
        StringBuilder buf = new StringBuilder();
        buf.append("[total=").append(_total);
        buf.append(", min=").append(_min);
        buf.append(", max=").append(_max);
        buf.append(", pcts=(");
        for (int ii = 0; ii < 10; ii++) {
            if (ii > 0) {
                buf.append("-");
            }
            buf.append(StringUtil.format(getRequiredScore(10*ii)));
        }
        return buf.append(")]").toString();
    }

    /**
     * Dumps out our data in a format that can be used to generate a gnuplot.
     */
    public void dumpGnuPlot (PrintWriter out)
    {
        float delta = (_max - _min) / (float)BUCKET_COUNT;
        for (int ii = 0; ii < BUCKET_COUNT; ii++) {
            out.println((_min + ii * delta) + " " + _percentile[ii] + " " + _counts[ii]);
        }
    }

    /**
     * Dumps a text representation of this percentiler to the supplied print stream.
     */
    public void dump (PrintWriter out)
    {
        // obtain our maximum count
        int max = 0;
        for (int ii = 0; ii < BUCKET_COUNT; ii++) {
            if (_counts[ii] > max) {
                max = _counts[ii];
            }
        }

        // figure out how many digits are needed to display the biggest bucket's size
        int digits = (int)Math.ceil(Math.log(max) / Math.log(10));
        digits = Math.max(digits, 1);

        // output each bucket in a column of its own
        for (int rr = 9; rr >= 0; rr--) {
            // print the "value" of this row
            out.print(StringUtil.pad("" + (rr+1)*max/10, digits) + " ");
            for (int ii = 0; ii < BUCKET_COUNT; ii++) {
                out.print((_counts[ii] * 10 / max > rr) ? "*" : " ");
            }
            out.println("");
        }

        out.print(spaces(digits));
        for (int ii = 0; ii < BUCKET_COUNT; ii++) {
            out.print("-");
        }
        out.println("");

        out.print(spaces(digits));
        for (int ii = 0; ii < BUCKET_COUNT; ii++) {
            out.print(_percentile[ii]%10);
        }
        out.println("");

        out.print(spaces(digits));
        for (int ii = 0; ii < BUCKET_COUNT; ii++) {
            out.print((_percentile[ii]/10)%10);
        }
        out.println("");

        // print out a scale along the very bottom
        out.println("");
        out.println("total: " + _total + " min: " + _min + " max: " + _max +
                    " delta: " + ((float)_max / BUCKET_COUNT));
    }

    protected final String spaces (int count)
    {
        StringBuilder buf = new StringBuilder();
        for (int ii = 0; ii < count; ii++) {
            buf.append(" ");
        }
        return buf.toString();
    }

    /**
     * Returns the histogram bucket to which this value is assigned.
     */
    protected final int toBucketIndex (float value)
    {
        int idx = Math.round((value - _min) * BUCKET_COUNT / (_max - _min));
        idx = Math.min(idx, BUCKET_COUNT-1);
        if (idx < 0 || idx >= BUCKET_COUNT) {
            log.warning("Bogus bucket index, using 0", "value", value, "max", _max, "min", _min,
                        "idx", idx, new Throwable());
            return 0;
        }
        return idx;
    }

    /** If this Percentiler was created with a fixed range. */
    protected boolean _fixedRange;

    /** The total number of data points seen by this percentiler. */
    protected long _total;

    /** The value of {@link #_total} at creation time or as of a call to {@link #clearModified}. */
    protected long _snapTotal;

    /** The minimum value seen by this percentiler. */
    protected int _min;

    /** The maximum value seen by this percentiler. */
    protected int _max;

    /** Counts down to our next recalculation. */
    protected int _nextRecomp;

    /** A histogram of all values recorded to this percentiler. */
    protected int[] _counts = new int[BUCKET_COUNT];

    /** The percentile associated with each bucket. */
    protected byte[] _percentile = new byte[BUCKET_COUNT];

    /** The bucket associated with each percentile. */
    protected byte[] _reverse = new byte[BUCKET_COUNT];

    /** The number of divisions between zero and our maximum value, which defines the granularity
     * of our histogram. */
    protected static final int BUCKET_COUNT = 100;

    /** Number of bytes in an int; makes code clearer. */
    protected static final int INT_SIZE = 4;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy