net.opentsdb.search.TimeSeriesLookup.orig Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of opentsdb Show documentation
OpenTSDB is a distributed, scalable Time Series Database (TSDB) written on top of HBase. OpenTSDB was written to address a common need: store, index and serve metrics collected from computer systems (network gear, operating systems, applications) at a large scale, and make this data easily accessible and graphable.
There is a newer version: 2.4.1
Show newest version
// This file is part of OpenTSDB.
// Copyright (C) 2010-2014  The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
// General Public License for more details.  You should have received a copy
// of the GNU Lesser General Public License along with this program.  If not,
// see .
package net.opentsdb.search;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import net.opentsdb.core.Const;
import net.opentsdb.core.Internal;
import net.opentsdb.core.RowKey;
import net.opentsdb.core.TSDB;
import net.opentsdb.core.Tags;
import net.opentsdb.meta.TSMeta;
import net.opentsdb.query.QueryUtil;
import net.opentsdb.uid.NoSuchUniqueId;
import net.opentsdb.uid.NoSuchUniqueName;
import net.opentsdb.uid.UniqueId;
import net.opentsdb.uid.UniqueId.UniqueIdType;
import net.opentsdb.utils.ByteArrayPair;
import net.opentsdb.utils.Exceptions;
import net.opentsdb.utils.Pair;

import org.hbase.async.Bytes;
import org.hbase.async.KeyValue;
import org.hbase.async.Scanner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.stumbleupon.async.Callback;
import com.stumbleupon.async.Deferred;
import com.stumbleupon.async.DeferredGroupException;

/**
 * Lookup series related to a metric, tagk, tagv or any combination thereof.
 * This class doesn't handle wild-card searching yet.
 * 
 * When dealing with tags, we can lookup on tagks, tagvs or pairs. Thus:
 * tagk, null  <- lookup all series with a tagk
 * tagk, tagv  <- lookup all series with a tag pair
 * null, tagv  <- lookup all series with a tag value somewhere
 * 
 * The user can supply multiple tags in a query so the logic is a little goofy
 * but here it is:
 * - Different tagks are AND'd, e.g. given "host=web01 dc=lga" we will lookup
 *   series that contain both of those tag pairs. Also when given "host= dc="
 *   then we lookup series with both tag keys regardless of their values.
 * - Tagks without a tagv will override tag pairs. E.g. "host=web01 host=" will
 *   return all series with the "host" tagk.
 * - Tagvs without a tagk are OR'd. Given "=lga =phx" the lookup will fetch 
 *   anything with either "lga" or "phx" as the value for a pair. When combined
 *   with a tagk, e.g. "host=web01 =lga" then it will return any series with the
 *   tag pair AND any tag with the "lga" value.
 *  
 * To avoid running performance degrading regexes in HBase regions, we'll double
 * filter when necessary. If tagks are present, those are used in the rowkey 
 * filter and a secondary filter is applied in the TSD with remaining tagvs.
 * E.g. the query "host=web01 =lga" will issue a rowkey filter with "host=web01"
 * then within the TSD scanner, we'll filter out only the rows that contain an
 * "lga" tag value. We don't know where in a row key the tagv may fall, so we
 * would have to first match on the pair, then backtrack to find the value and 
 * make sure the pair is skipped. Thus its easier on the region server to execute
 * a simpler rowkey regex, pass all the results to the TSD, then let us filter on
 * tag values only when necessary. (if a query only has tag values, then this is
 * moot and we can pass them in a rowkey filter since they're OR'd).
 * 
 * @since 2.1
 */
public class TimeSeriesLookup {
  private static final Logger LOG = 
      LoggerFactory.getLogger(TimeSeriesLookup.class);
  
  /** Charset used to convert Strings to byte arrays and back. */
  private static final Charset CHARSET = Charset.forName("ISO-8859-1");
  
  /** The query with metrics and/or tags to use */
  private final SearchQuery query;
  
  /** Whether or not to dump the output to standard out for CLI commands */
  private boolean to_stdout;
  
  /** The TSD to use for lookups */
  private final TSDB tsdb;
  
  /** The metric UID if given by the query, post resolution */
  private byte[] metric_uid;
  
  /** Tag UID pairs if given in the query. Key or value may be null. */
  private List pairs;
  
  /** The compiled row key regex for HBase filtering */
  private String rowkey_regex;
  
  /** Post scan filtering if we have a lot of values to look at */
  private String tagv_filter;
  
  /** The results to send to the caller */
  private final List tsuids;
  
  /**
   * Default ctor
   * @param tsdb The TSD to which we belong
   * @param metric A metric to match on, may be null
   * @param tags One or more tags to match on, may be null
   */
  public TimeSeriesLookup(final TSDB tsdb, final SearchQuery query) {
    this.tsdb = tsdb;
    this.query = query;
    tsuids = Collections.synchronizedList(new ArrayList());
  }
  
  /**
   * Lookup time series associated with the given metric, tagk, tagv or tag 
   * pairs. Either the meta table or the data table will be scanned. If no
   * metric is given, a full table scan must be performed and this call may take
   * a long time to complete. 
   * When dumping to stdout, if an ID can't be looked up, it will be logged and
   * skipped.
   * @return A list of TSUIDs matching the given lookup query.
   * @throws NoSuchUniqueName if any of the given names fail to resolve to a 
   * UID.
   */
  public List lookup() {
<<<<<<< HEAD
    LOG.info(query.toString());
    boolean limit_reached = false;
    final StringBuilder tagv_filter = new StringBuilder();
    final Scanner scanner = getScanner(tagv_filter);
    final List tsuids = new ArrayList();
    final Pattern tagv_regex = tagv_filter.length() > 1 ? 
        Pattern.compile(tagv_filter.toString()) : null;
=======
    try {
      return lookupAsync().join();
    } catch (InterruptedException e) {
      LOG.error("Interrupted performing lookup", e);
      Thread.currentThread().interrupt();
      return null;
    } catch (DeferredGroupException e) {
      final Throwable ex = Exceptions.getCause(e);
      if (ex instanceof NoSuchUniqueName) {
        throw (NoSuchUniqueName)ex;
      }
      throw new RuntimeException("Unexpected exception", ex);
    } catch (NoSuchUniqueName e) {
      throw e;
    } catch (Exception e) {
      throw new RuntimeException("Unexpected exception", e);
    }
  }
  
  /**
   * Lookup time series associated with the given metric, tagk, tagv or tag 
   * pairs. Either the meta table or the data table will be scanned. If no
   * metric is given, a full table scan must be performed and this call may take
   * a long time to complete. 
   * When dumping to stdout, if an ID can't be looked up, it will be logged and
   * skipped.
   * @return A list of TSUIDs matching the given lookup query.
   * @throws NoSuchUniqueName if any of the given names fail to resolve to a 
   * UID.
   * @since 2.2
   */
  public Deferred> lookupAsync() {
    final Pattern tagv_regex = tagv_filter != null ? 
        Pattern.compile(tagv_filter) : null;
    
>>>>>>> next
    // we don't really know what size the UIDs will resolve to so just grab
    // a decent amount.
    final StringBuffer buf = to_stdout ? new StringBuffer(2048) : null;
    final long start = System.currentTimeMillis();
    final int limit;
    if (query.getLimit() > 0) {
      if (query.useMeta() || Const.SALT_WIDTH() < 1) {
        limit = query.getLimit();
      } else if (query.getLimit() < Const.SALT_BUCKETS()) {
        limit = 1;
      } else {
        limit = query.getLimit() / Const.SALT_BUCKETS();
      }
    } else {
      limit = 0;
    }
        
    class ScannerCB implements Callback>, 
      ArrayList>> {
      private final Scanner scanner;
      // used to avoid dupes when scanning the data table
      private byte[] last_tsuid = null;
      private int rows_read;
      
      ScannerCB(final Scanner scanner) {
        this.scanner = scanner;
      }
      
      Deferred> scan() {
        return scanner.nextRows().addCallbackDeferring(this);
      }
      
      @Override
      public Deferred> call(final ArrayList> rows)
          throws Exception {
        if (rows == null) {
          scanner.close();
          if (query.useMeta() || Const.SALT_WIDTH() < 1) {
            LOG.debug("Lookup query matched " + tsuids.size() + " time series in " +
                (System.currentTimeMillis() - start) + " ms");
          }
          return Deferred.fromResult(tsuids);
        }
        
        for (final ArrayList row : rows) {
          if (limit > 0 && rows_read >= limit) {
            // little recursion to close the scanner and log above.
            return call(null);
          }
          final byte[] tsuid = query.useMeta() ? row.get(0).key() : 
            UniqueId.getTSUIDFromKey(row.get(0).key(), TSDB.metrics_width(), 
                Const.TIMESTAMP_BYTES);
          
          // TODO - there MUST be a better way than creating a ton of temp
          // string objects.
          if (tagv_regex != null && 
              !tagv_regex.matcher(new String(tsuid, CHARSET)).find()) {
            continue;
          }
          
          if (to_stdout) {
            if (last_tsuid != null && Bytes.memcmp(last_tsuid, tsuid) == 0) {
              continue;
            }
            last_tsuid = tsuid;
            
            try {
              buf.append(UniqueId.uidToString(tsuid)).append(" ");
              buf.append(RowKey.metricNameAsync(tsdb, tsuid)
                  .joinUninterruptibly());
              buf.append(" ");
              
              final List tag_ids = UniqueId.getTagPairsFromTSUID(tsuid);
              final Map resolved_tags = 
                  Tags.resolveIdsAsync(tsdb, tag_ids).joinUninterruptibly();
              for (final Map.Entry tag_pair : 
                  resolved_tags.entrySet()) {
                buf.append(tag_pair.getKey()).append("=")
                   .append(tag_pair.getValue()).append(" ");
              }
            } catch (NoSuchUniqueId nsui) {
              LOG.error("Unable to resolve UID in TSUID (" + 
                  UniqueId.uidToString(tsuid) + ") " + nsui.getMessage());
            }
            buf.setLength(0); // reset the buffer so we can re-use it
          } else {
            if(tsuids.size() < query.getLimit()) {
              tsuids.add(tsuid);
            } else {
              limit_reached = true;
              break;
            }
          }
          ++rows_read;
        }
<<<<<<< HEAD
        if(limit_reached) {
          break;
        }
=======
        
        return scan();
      }
      
      @Override
      public String toString() {
        return "Scanner callback";
      }
    }
    
    class CompleteCB implements Callback, ArrayList>> {
      @Override
      public List call(final ArrayList> unused) throws Exception {
        LOG.debug("Lookup query matched " + tsuids.size() + " time series in " +
            (System.currentTimeMillis() - start) + " ms");
        return tsuids;
      }
      @Override
      public String toString() {
        return "Final async lookup callback";
>>>>>>> next
      }
    }
    
    class UIDCB implements Callback>, Object> {
      @Override
      public Deferred> call(Object arg0) throws Exception {
        if (!query.useMeta() && Const.SALT_WIDTH() > 0 && metric_uid != null) {
          final ArrayList>> deferreds = 
              new ArrayList>>(Const.SALT_BUCKETS());
          for (int i = 0; i < Const.SALT_BUCKETS(); i++) {
            deferreds.add(new ScannerCB(getScanner(i)).scan());
          }
          return Deferred.group(deferreds).addCallback(new CompleteCB());
        } else {
          return new ScannerCB(getScanner(0)).scan();
        }
      }
      @Override
      public String toString() {
        return "UID resolution callback";
      }
    }
    
    return resolveUIDs().addCallbackDeferring(new UIDCB());
  }
  
  /**
   * Resolves the metric and tag strings to their UIDs
   * @return A deferred to wait on for resolution to complete.
   */
  private Deferred