All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.opentsdb.core.SpanGroup Maven / Gradle / Ivy

Go to download

OpenTSDB is a distributed, scalable Time Series Database (TSDB) written on top of HBase. OpenTSDB was written to address a common need: store, index and serve metrics collected from computer systems (network gear, operating systems, applications) at a large scale, and make this data easily accessible and graphable.

There is a newer version: 2.4.1
Show newest version
// This file is part of OpenTSDB.
// Copyright (C) 2010-2012  The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
// General Public License for more details.  You should have received a copy
// of the GNU Lesser General Public License along with this program.  If not,
// see .
package net.opentsdb.core;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.hbase.async.Bytes;
import org.hbase.async.Bytes.ByteMap;

import com.stumbleupon.async.Callback;
import com.stumbleupon.async.Deferred;

import net.opentsdb.meta.Annotation;

/**
 * Groups multiple spans together and offers a dynamic "view" on them.
 * 

* This is used for queries to the TSDB, where we might group multiple * {@link Span}s that are for the same time series but different tags * together. We need to "hide" data points that are outside of the * time period of the query and do on-the-fly aggregation of the data * points coming from the different Spans, using an {@link Aggregator}. * Since not all the Spans will have their data points at exactly the * same time, we also do on-the-fly linear interpolation. If needed, * this view can also return the rate of change instead of the actual * data points. *

* This is one of the rare (if not the only) implementations of * {@link DataPoints} for which {@link #getTags} can potentially return * an empty map. *

* The implementation can also dynamically downsample the data when a * sampling interval a downsampling function (in the form of an * {@link Aggregator}) are given. This is done by using a special * iterator when using the {@link Span.DownsamplingIterator}. */ final class SpanGroup implements DataPoints { /** Annotations */ private final ArrayList annotations; /** Start time (UNIX timestamp in seconds or ms) on 32 bits ("unsigned" int). */ private final long start_time; /** End time (UNIX timestamp in seconds or ms) on 32 bits ("unsigned" int). */ private final long end_time; /** * The tags of this group. * This is the intersection set between the tags of all the Spans * in this group. * @see #computeTags */ private Map tags; private ByteMap tag_uids; /** * The names of the tags that aren't shared by every single data point. * This is the symmetric difference between the tags of all the Spans * in this group. * @see #computeTags */ private List aggregated_tags; private Set aggregated_tag_uids; /** Spans in this group. They must all be for the same metric. */ private final ArrayList spans = new ArrayList(); /** If true, use rate of change instead of actual values. */ private final boolean rate; /** Specifies the various options for rate calculations */ private RateOptions rate_options; /** Aggregator to use to aggregate data points from different Spans. */ private final Aggregator aggregator; /** * Downsampling function to use, if any (can be {@code null}). * If this is non-null, {@code sample_interval} must be strictly positive. */ private final Aggregator downsampler; /** Minimum time interval (in seconds) wanted between each data point. */ private final long sample_interval; /** Index of the query in the TSQuery class */ private final int query_index; /** Downsampling fill policy. */ private final FillPolicy fill_policy; /** The TSDB to which we belong, used for resolution */ private final TSDB tsdb; /** * Ctor. * @param tsdb The TSDB we belong to. * @param start_time Any data point strictly before this timestamp will be * ignored. * @param end_time Any data point strictly after this timestamp will be * ignored. * @param spans A sequence of initial {@link Spans} to add to this group. * Ignored if {@code null}. Additional spans can be added with {@link #add}. * @param rate If {@code true}, the rate of the series will be used instead * of the actual values. * @param aggregator The aggregation function to use. * @param interval Number of milliseconds wanted between each data point. * @param downsampler Aggregation function to use to group data points * within an interval. */ SpanGroup(final TSDB tsdb, final long start_time, final long end_time, final Iterable spans, final boolean rate, final Aggregator aggregator, final long interval, final Aggregator downsampler) { this(tsdb, start_time, end_time, spans, rate, new RateOptions(false, Long.MAX_VALUE, RateOptions.DEFAULT_RESET_VALUE), aggregator, interval, downsampler); } /** * Ctor. * @param tsdb The TSDB we belong to. * @param start_time Any data point strictly before this timestamp will be * ignored. * @param end_time Any data point strictly after this timestamp will be * ignored. * @param spans A sequence of initial {@link Spans} to add to this group. * Ignored if {@code null}. Additional spans can be added with {@link #add}. * @param rate If {@code true}, the rate of the series will be used instead * of the actual values. * @param rate_options Specifies the optional additional rate calculation options. * @param aggregator The aggregation function to use. * @param interval Number of milliseconds wanted between each data point. * @param downsampler Aggregation function to use to group data points * within an interval. * @since 2.0 */ SpanGroup(final TSDB tsdb, final long start_time, final long end_time, final Iterable spans, final boolean rate, final RateOptions rate_options, final Aggregator aggregator, final long interval, final Aggregator downsampler) { this(tsdb, start_time, end_time, spans, rate, rate_options, aggregator, interval, downsampler, -1, FillPolicy.NONE); } /** * Ctor. * @param tsdb The TSDB we belong to. * @param start_time Any data point strictly before this timestamp will be * ignored. * @param end_time Any data point strictly after this timestamp will be * ignored. * @param spans A sequence of initial {@link Spans} to add to this group. * Ignored if {@code null}. Additional spans can be added with {@link #add}. * @param rate If {@code true}, the rate of the series will be used instead * of the actual values. * @param rate_options Specifies the optional additional rate calculation options. * @param aggregator The aggregation function to use. * @param interval Number of milliseconds wanted between each data point. * @param downsampler Aggregation function to use to group data points * within an interval. * @param query_index index of the original query * @param fill_policy Policy specifying whether to interpolate or to fill * missing intervals with special values. * @since 2.2 */ SpanGroup(final TSDB tsdb, final long start_time, final long end_time, final Iterable spans, final boolean rate, final RateOptions rate_options, final Aggregator aggregator, final long interval, final Aggregator downsampler, final int query_index, final FillPolicy fill_policy) { annotations = new ArrayList(); this.start_time = (start_time & Const.SECOND_MASK) == 0 ? start_time * 1000 : start_time; this.end_time = (end_time & Const.SECOND_MASK) == 0 ? end_time * 1000 : end_time; if (spans != null) { for (final Span span : spans) { add(span); } } this.rate = rate; this.rate_options = rate_options; this.aggregator = aggregator; this.downsampler = downsampler; this.sample_interval = interval; this.query_index = query_index; this.fill_policy = fill_policy; this.tsdb = tsdb; } /** * Adds a span to this group, provided that it's in the right time range. * Must not be called once {@link #getTags} or * {@link #getAggregatedTags} has been called on this instance. * @param span The span to add to this group. If none of the data points * fall within our time range, this method will silently ignore that span. */ void add(final Span span) { if (tags != null) { throw new AssertionError("The set of tags has already been computed" + ", you can't add more Spans to " + this); } // normalize timestamps to milliseconds for proper comparison final long start = (start_time & Const.SECOND_MASK) == 0 ? start_time * 1000 : start_time; final long end = (end_time & Const.SECOND_MASK) == 0 ? end_time * 1000 : end_time; if (span.size() == 0) { // copy annotations that are in the time range for (Annotation annot : span.getAnnotations()) { long annot_start = annot.getStartTime(); if ((annot_start & Const.SECOND_MASK) == 0) { annot_start *= 1000; } long annot_end = annot.getStartTime(); if ((annot_end & Const.SECOND_MASK) == 0) { annot_end *= 1000; } if (annot_end >= start && annot_start <= end) { annotations.add(annot); } } } else { long first_dp = span.timestamp(0); if ((first_dp & Const.SECOND_MASK) == 0) { first_dp *= 1000; } // The following call to timestamp() will throw an // IndexOutOfBoundsException if size == 0, which is OK since it would // be a programming error. long last_dp = span.timestamp(span.size() - 1); if ((last_dp & Const.SECOND_MASK) == 0) { last_dp *= 1000; } if (first_dp <= end && last_dp >= start) { this.spans.add(span); annotations.addAll(span.getAnnotations()); } } } /** * Computes the intersection set + symmetric difference of tags in all spans. * This method loads the UID aggregated list and tag pair maps with byte arrays * but does not actually resolve the UIDs to strings. * On the first run, it will initialize the UID collections (which may be empty) * and subsequent calls will skip processing. */ private void computeTags() { if (tag_uids != null && aggregated_tag_uids != null) { return; } if (spans.isEmpty()) { tag_uids = new ByteMap(); aggregated_tag_uids = new HashSet(); return; } // local tag uids final ByteMap tag_set = new ByteMap(); // value is always null, we just want the set of unique keys final ByteMap discards = new ByteMap(); final Iterator it = spans.iterator(); while (it.hasNext()) { final Span span = it.next(); final ByteMap uids = span.getTagUids(); for (final Map.Entry tag_pair : uids.entrySet()) { // we already know it's an aggregated tag if (discards.containsKey(tag_pair.getKey())) { continue; } final byte[] tag_value = tag_set.get(tag_pair.getKey()); if (tag_value == null) { tag_set.put(tag_pair.getKey(), tag_pair.getValue()); } else if (Bytes.memcmp(tag_value, tag_pair.getValue()) != 0) { // bump to aggregated tags discards.put(tag_pair.getKey(), null); tag_set.remove(tag_pair.getKey()); } } } aggregated_tag_uids = discards.keySet(); tag_uids = tag_set; } public String metricName() { try { return metricNameAsync().joinUninterruptibly(); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Should never be here", e); } } public Deferred metricNameAsync() { return spans.isEmpty() ? Deferred.fromResult("") : spans.get(0).metricNameAsync(); } public Map getTags() { try { return getTagsAsync().joinUninterruptibly(); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Should never be here", e); } } public Deferred> getTagsAsync() { if (tags != null) { return Deferred.fromResult(tags); } if (spans.isEmpty()) { tags = new HashMap(0); return Deferred.fromResult(tags); } if (tag_uids == null) { computeTags(); } return resolveTags(tag_uids); } @Override public ByteMap getTagUids() { if (tag_uids == null) { computeTags(); } return tag_uids; } public List getAggregatedTags() { try { return getAggregatedTagsAsync().joinUninterruptibly(); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Should never be here", e); } } public Deferred> getAggregatedTagsAsync() { if (aggregated_tags != null) { return Deferred.fromResult(aggregated_tags); } if (spans.isEmpty()) { aggregated_tags = new ArrayList(0); return Deferred.fromResult(aggregated_tags); } if (aggregated_tag_uids == null) { computeTags(); } return resolveAggTags(aggregated_tag_uids); } public List getTSUIDs() { List tsuids = new ArrayList(spans.size()); for (Span sp : spans) { tsuids.addAll(sp.getTSUIDs()); } return tsuids; } /** * Compiles the annotations for each span into a new array list * @return Null if none of the spans had any annotations, a list if one or * more were found */ public List getAnnotations() { return annotations.isEmpty() ? null : annotations; } public int size() { // TODO(tsuna): There is a way of doing this way more efficiently by // inspecting the Spans and counting only data points that fall in // our time range. final SeekableView it = iterator(); int size = 0; while (it.hasNext()) { it.next(); size++; } return size; } public int aggregatedSize() { int size = 0; for (final Span span : spans) { size += span.size(); } return size; } public SeekableView iterator() { return AggregationIterator.create(spans, start_time, end_time, aggregator, aggregator.interpolationMethod(), downsampler, sample_interval, rate, rate_options, fill_policy); } /** * Finds the {@code i}th data point of this group in {@code O(n)}. * Where {@code n} is the number of data points in this group. */ private DataPoint getDataPoint(int i) { if (i < 0) { throw new IndexOutOfBoundsException("negative index: " + i); } final int saved_i = i; final SeekableView it = iterator(); DataPoint dp = null; while (it.hasNext() && i >= 0) { dp = it.next(); i--; } if (i != -1 || dp == null) { throw new IndexOutOfBoundsException("index " + saved_i + " too large (it's >= " + size() + ") for " + this); } return dp; } public long timestamp(final int i) { return getDataPoint(i).timestamp(); } public boolean isInteger(final int i) { return getDataPoint(i).isInteger(); } public double doubleValue(final int i) { return getDataPoint(i).doubleValue(); } public long longValue(final int i) { return getDataPoint(i).longValue(); } @Override public String toString() { return "SpanGroup(" + toStringSharedAttributes() + ", spans=" + spans + ')'; } private String toStringSharedAttributes() { return "start_time=" + start_time + ", end_time=" + end_time + ", tags=" + tags + ", aggregated_tags=" + aggregated_tags + ", rate=" + rate + ", aggregator=" + aggregator + ", downsampler=" + downsampler + ", sample_interval=" + sample_interval + ')'; } public int getQueryIndex() { return query_index; } /** * Resolves the set of tag keys to their string names. * @param tagks The set of unique tag names * @return a deferred to wait on for all of the tag keys to be resolved. The * result should be null. */ private Deferred> resolveAggTags(final Set tagks) { if (aggregated_tags != null) { return Deferred.fromResult(null); } aggregated_tags = new ArrayList(tagks.size()); final List> names = new ArrayList>(tagks.size()); for (final byte[] tagk : tagks) { names.add(tsdb.tag_names.getNameAsync(tagk)); } /** Adds the names to the aggregated_tags list */ final class ResolveCB implements Callback, ArrayList> { @Override public List call(final ArrayList names) throws Exception { for (final String name : names) { aggregated_tags.add(name); } return aggregated_tags; } } return Deferred.group(names).addCallback(new ResolveCB()); } /** * Resolves the tags to their names, loading them into {@link tags} after * initializing that map. * @param tag_uids The tag UIDs * @return A defeferred to wait on for resolution to complete, the result * should be null. */ private Deferred> resolveTags(final ByteMap tag_uids) { if (tags != null) { return Deferred.fromResult(null); } tags = new HashMap(tag_uids.size()); final List> deferreds = new ArrayList>(tag_uids.size()); /** Dumps the pairs into the map in the correct order */ final class PairCB implements Callback> { @Override public Object call(final ArrayList pair) throws Exception { tags.put(pair.get(0), pair.get(1)); return null; } } /** Callback executed once all of the pairs are resolved and stored in the map */ final class GroupCB implements Callback, ArrayList> { @Override public Map call(final ArrayList group) throws Exception { return tags; } } for (Map.Entry tag_pair : tag_uids.entrySet()) { final List> resolve_pair = new ArrayList>(2); resolve_pair.add(tsdb.tag_names.getNameAsync(tag_pair.getKey())); resolve_pair.add(tsdb.tag_values.getNameAsync(tag_pair.getValue())); deferreds.add(Deferred.groupInOrder(resolve_pair).addCallback(new PairCB())); } return Deferred.group(deferreds).addCallback(new GroupCB()); } }