com.caseystella.analytics.distribution.Distribution Maven / Gradle / Ivy
/**
* Copyright (C) 2016 Hurence ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.caseystella.analytics.distribution;
import com.caseystella.analytics.DataPoint;
import com.caseystella.analytics.distribution.config.RotationConfig;
import com.caseystella.analytics.distribution.config.Type;
import com.caseystella.analytics.distribution.sampling.ExponentiallyBiasedAChao;
import com.caseystella.analytics.distribution.scaling.ScalingFunction;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.twitter.algebird.QTree;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import scala.Tuple2;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
public class Distribution implements Measurable {
public static class Context {
private Distribution currentDistribution;
private Distribution previousDistribution;
private LinkedList chunks = new LinkedList<>();
private ExponentiallyBiasedAChao reservoir;
public Context(int reservoirSize, double decayRate) {
if(reservoirSize > 0) {
reservoir = new ExponentiallyBiasedAChao<>(reservoirSize, decayRate, new Random(0));
}
else {
reservoir = null;
}
}
public Distribution getPreviousDistribution() {
return previousDistribution;
}
public Distribution getCurrentDistribution() {
return currentDistribution;
}
public LinkedList getChunks() {
return chunks;
}
public ExponentiallyBiasedAChao getSample() {
return reservoir;
}
public long getAmount() {
return currentDistribution == null?0L:currentDistribution.getAmount();
}
public void addDataPoint( DataPoint dp
, RotationConfig rotationPolicy
, RotationConfig chunkingPolicy
, ScalingFunction scalingFunction
, GlobalStatistics stats
)
{
if(currentDistribution == null) {
currentDistribution = new Distribution(dp, scalingFunction, stats);
}
else {
currentDistribution.addDataPoint(dp, scalingFunction);
}
//do I need to create a new chunk?
boolean needNewChunk = chunks.size() == 0 || outOfPolicy(getCurrentChunk(), chunkingPolicy);
if(needNewChunk) {
addChunk(new Distribution(dp, scalingFunction, stats));
}
else {
getCurrentChunk().addDataPoint(dp, scalingFunction);
}
if(needNewChunk) {
//do I need to rotate now?
boolean needsRotation = outOfPolicy(currentDistribution, rotationPolicy)
&& outOfPolicy(sketch(Iterables.limit(chunks, chunks.size() - 1)), rotationPolicy);
if(needsRotation) {
rotate();
}
}
}
protected void addChunk(Distribution d) {
chunks.addFirst(d);
}
protected void rotate() {
chunks.removeLast();
previousDistribution = currentDistribution;
currentDistribution = Distribution.merge(chunks);
if(reservoir != null) {
reservoir.advancePeriod();
}
}
private Distribution getCurrentChunk() {
return chunks.getFirst();
}
private Measurable sketch(Iterable chunks) {
long begin = Long.MAX_VALUE;
long end = -1;
long amount = 0;
for(Distribution d : chunks) {
begin = Math.min(begin, d.getBegin());
end = Math.max(end, d.getEnd());
amount += d.getAmount();
}
final long measurableBegin = begin;
final long measurableEnd= end;
final long measurableAmount = amount;
return new Measurable() {
@Override
public long getAmount() {
return measurableAmount;
}
@Override
public Long getBegin() {
return measurableBegin;
}
@Override
public Long getEnd() {
return measurableEnd;
}
};
}
private boolean outOfPolicy(Measurable dist, RotationConfig policy) {
if(policy.getType() == Type.BY_AMOUNT) {
return dist.getAmount() >= policy.getAmount();
}
else if(policy.getType() == Type.BY_TIME) {
return dist.getAmount() >= policy.getUnit().apply(dist);
}
else if(policy.getType() == Type.NEVER) {
return false;
}
else {
throw new IllegalStateException("Unsupported type: " + policy.getType());
}
}
}
QTree
© 2015 - 2025 Weber Informatics LLC | Privacy Policy