All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.wavefront.ingester.ReportHistogramDecoder Maven / Gradle / Ivy

There is a newer version: 2023-22.3
Show newest version
package com.wavefront.ingester;

import javax.annotation.Nullable;
import java.util.List;
import java.util.function.Supplier;

import com.google.common.collect.ImmutableList;
import com.tdunning.math.stats.AVLTreeDigest;
import com.tdunning.math.stats.Centroid;
import com.tdunning.math.stats.TDigest;
import com.wavefront.data.ParseException;
import com.wavefront.data.TooManyCentroidException;

import org.apache.commons.lang.time.DateUtils;
import wavefront.report.Histogram;
import wavefront.report.HistogramType;
import wavefront.report.ReportHistogram;

import static com.wavefront.ingester.IngesterContext.DEFAULT_HISTOGRAM_COMPRESS_LIMIT_RATIO;

/**
 * Decoder that takes in histograms of the type:
 *
 * [BinType] [Timestamp] [Centroids] [Metric] [Annotations]
 *
 * @author Tim Schmidt ([email protected]).
 */
public class ReportHistogramDecoder implements ReportableEntityDecoder {

  private static final AbstractIngesterFormatter FORMAT =
      ReportHistogramIngesterFormatter.newBuilder().
          caseSensitiveLiterals(ImmutableList.of("!M", "!H", "!D"),
              ReportHistogramDecoder::setBinType).
          optionalTimestamp(ReportHistogram::setTimestamp).
          centroids().
          text(ReportHistogram::setMetric).
          annotationList(ReportHistogram::setAnnotations).
          build();

  private final Supplier defaultHostNameSupplier;

  public ReportHistogramDecoder() {
    this("unknown");
  }

  public ReportHistogramDecoder(String defaultHostName) {
    this(() -> defaultHostName);
  }

  public ReportHistogramDecoder(Supplier defaultHostNameSupplier) {
    this.defaultHostNameSupplier = defaultHostNameSupplier;
  }

  @Override
  public void decode(String msg, List out, String customerId,
                     @Nullable IngesterContext ctx) {
    ReportHistogram histogram = FORMAT.drive(msg, defaultHostNameSupplier, customerId, null,null, null, null, null, null, null, ctx);
    if (histogram != null) {
      Histogram value = histogram.getValue();
      if (ctx != null) {
        if (value.getCounts().size() > ctx.getHistogramCentroidsLimit()) {
          throw new TooManyCentroidException("Too many centroids (max: " +
              ctx.getHistogramCentroidsLimit() + ")");
        }
        if (ctx.isOptimizeHistograms()) {
          optimizeForStorage(value.getBins(), value.getCounts(), value.getCounts().size(),
              ctx.getTargetHistogramAccuracy());
        }
      }
      // adjust timestamp according to histogram bin first
      long duration = value.getDuration();
      histogram.setTimestamp((histogram.getTimestamp() / duration) * duration);
      out.add(ReportHistogram.newBuilder(histogram).build());
    }
  }

  @Override
  public void decode(String msg, List out) {
    throw new UnsupportedOperationException("Customer ID extraction is not supported");
  }

  private static void setBinType(ReportHistogram target, String binType) {
    int durationMillis;
    switch (binType) {
      case "!M":
        durationMillis = (int) DateUtils.MILLIS_PER_MINUTE;
        break;
      case "!H":
        durationMillis = (int) DateUtils.MILLIS_PER_HOUR;
        break;
      case "!D":
        durationMillis = (int) DateUtils.MILLIS_PER_DAY;
        break;
      default:
        throw new ParseException("Unknown BinType " + binType);
    }
    Histogram histogram = new Histogram();
    histogram.setDuration(durationMillis);
    histogram.setType(HistogramType.TDIGEST);
    target.setValue(histogram);
  }

  /**
   * Optimize the means/counts pair if necessary.
   *
   * @param means  centroids means
   * @param counts centroid counts
   */
  private static void optimizeForStorage(@Nullable List means,
                                         @Nullable List counts,
                                         int size, int storageAccuracy) {
    if (means == null || means.isEmpty() || counts == null || counts.isEmpty()) {
      return;
    }

    if (size > DEFAULT_HISTOGRAM_COMPRESS_LIMIT_RATIO * storageAccuracy) { // Too many centroids
      rewrite(means, counts, storageAccuracy);
    }

    if (counts.stream().anyMatch(i -> i < 1)) { // Bogus counts
      rewrite(means, counts, storageAccuracy);
    } else {
      int strictlyIncreasingLength = 1;
      for (; strictlyIncreasingLength < means.size(); ++strictlyIncreasingLength) {
        if (means.get(strictlyIncreasingLength - 1) >= means.get(strictlyIncreasingLength)) {
          break;
        }
      }
      if (strictlyIncreasingLength != means.size()) { // not ordered
        rewrite(means, counts, storageAccuracy);
      }
    }
  }

  /**
   * Reorganizes a mean/count array pair (such that centroids) are in strictly ascending order.
   *
   * @param means  centroids means
   * @param counts centroid counts
   */
  private static void rewrite(List means, List counts, int storageAccuracy) {
    TDigest temp = new AVLTreeDigest(storageAccuracy);
    int size = Math.min(means.size(), counts.size());
    for (int i = 0; i < size; ++i) {
      int count = counts.get(i);
      if (count > 0) {
        temp.add(means.get(i), count);
      }
    }
    temp.compress();

    means.clear();
    counts.clear();
    for (Centroid c : temp.centroids()) {
      means.add(c.mean());
      counts.add(c.count());
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy