All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.wavefront.agent.histogram.HistogramRecompressor Maven / Gradle / Ivy

There is a newer version: 9999.0
Show newest version
package com.wavefront.agent.histogram;

import com.google.common.annotations.VisibleForTesting;
import com.tdunning.math.stats.AgentDigest;
import com.wavefront.common.TaggedMetricName;
import com.wavefront.common.Utils;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Counter;
import wavefront.report.Histogram;
import wavefront.report.HistogramType;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Supplier;

import static com.wavefront.agent.histogram.HistogramUtils.mergeHistogram;

/**
 * Recompresses histograms to reduce their size.
 *
 * @author [email protected]
 */
public class HistogramRecompressor implements Function {
  private final Supplier storageAccuracySupplier;
  private final Supplier histogramsCompacted = Utils.lazySupplier(() ->
      Metrics.newCounter(new TaggedMetricName("histogram", "histograms_compacted")));
  private final Supplier histogramsRecompressed = Utils.lazySupplier(() ->
      Metrics.newCounter(new TaggedMetricName("histogram", "histograms_recompressed")));

  /**
   * @param storageAccuracySupplier Supplier for histogram storage accuracy
   */
  public HistogramRecompressor(Supplier storageAccuracySupplier) {
    this.storageAccuracySupplier = storageAccuracySupplier;
  }

  @Override
  public Histogram apply(Histogram input) {
    Histogram result = input;
    if (hasDuplicateCentroids(input)) {
      // merge centroids with identical values first, and if we get the number of centroids
      // low enough, we might not need to incur recompression overhead after all.
      result = compactCentroids(input);
      histogramsCompacted.get().inc();
    }
    if (result.getBins().size() > 2 * storageAccuracySupplier.get()) {
      AgentDigest digest = new AgentDigest(storageAccuracySupplier.get(), 0);
      mergeHistogram(digest, result);
      digest.compress();
      result = digest.toHistogram(input.getDuration());
      histogramsRecompressed.get().inc();
    }
    return result;
  }

  @VisibleForTesting
  static boolean hasDuplicateCentroids(wavefront.report.Histogram histogram) {
    Set uniqueBins = new HashSet<>();
    for (Double bin : histogram.getBins()) {
      if (!uniqueBins.add(bin)) return true;
    }
    return false;
  }

  @VisibleForTesting
  static wavefront.report.Histogram compactCentroids(wavefront.report.Histogram histogram) {
    List bins = histogram.getBins();
    List counts = histogram.getCounts();
    int numCentroids = Math.min(bins.size(), counts.size());

    List newBins = new ArrayList<>();
    List newCounts = new ArrayList<>();

    Double accumulatedValue = null;
    int accumulatedCount = 0;
    for (int i = 0; i < numCentroids; ++i) {
      double value = bins.get(i);
      int count = counts.get(i);
      if (accumulatedValue == null) {
        accumulatedValue = value;
      } else if (value != accumulatedValue) {
        newBins.add(accumulatedValue);
        newCounts.add(accumulatedCount);
        accumulatedValue = value;
        accumulatedCount = 0;
      }
      accumulatedCount += count;
    }
    if (accumulatedValue != null) {
      newCounts.add(accumulatedCount);
      newBins.add(accumulatedValue);
    }
    return wavefront.report.Histogram.newBuilder()
        .setDuration(histogram.getDuration())
        .setBins(newBins)
        .setCounts(newCounts)
        .setType(HistogramType.TDIGEST)
        .build();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy