zipkin.storage.cassandra.CassandraSpanStore Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of zipkin-storage-cassandra Show documentation
There is a newer version: 2.8.4
Show newest version
/**
 * Copyright 2015-2018 The OpenZipkin Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package zipkin.storage.cassandra;

import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.PreparedStatement;
import com.datastax.driver.core.ProtocolVersion;
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.querybuilder.QueryBuilder;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.ContiguousSet;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.common.collect.Ordering;
import com.google.common.collect.Range;
import com.google.common.util.concurrent.AsyncFunction;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import zipkin.Codec;
import zipkin.DependencyLink;
import zipkin.Span;
import zipkin.internal.CorrectForClockSkew;
import zipkin.internal.Dependencies;
import zipkin.internal.DependencyLinker;
import zipkin.internal.GroupByTraceId;
import zipkin.internal.MergeById;
import zipkin.internal.Nullable;
import zipkin.storage.QueryRequest;
import zipkin.storage.guava.GuavaSpanStore;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.DiscreteDomain.integers;
import static com.google.common.util.concurrent.Futures.allAsList;
import static com.google.common.util.concurrent.Futures.immediateFailedFuture;
import static com.google.common.util.concurrent.Futures.immediateFuture;
import static com.google.common.util.concurrent.Futures.transform;
import static zipkin.internal.Util.getDays;

public final class CassandraSpanStore implements GuavaSpanStore {
  private static final Logger LOG = LoggerFactory.getLogger(CassandraSpanStore.class);

  static final ListenableFuture> EMPTY_LIST =
      immediateFuture(Collections.emptyList());

  private final int maxTraceCols;
  private final int indexFetchMultiplier;
  private final boolean strictTraceId;
  private final Session session;
  private final TimestampCodec timestampCodec;
  private final Set buckets;
  private final PreparedStatement selectTraces;
  private final PreparedStatement selectDependencies;
  private final PreparedStatement selectServiceNames;
  private final PreparedStatement selectSpanNames;
  private final PreparedStatement selectTraceIdsByServiceName;
  private final PreparedStatement selectTraceIdsByServiceNames;
  private final PreparedStatement selectTraceIdsBySpanName;
  private final PreparedStatement selectTraceIdsByAnnotation;
  private final Function> traceIdToTimestamp;

  CassandraSpanStore(Session session, int bucketCount, int maxTraceCols, int indexFetchMultiplier,
      boolean strictTraceId) {
    this.session = session;
    this.maxTraceCols = maxTraceCols;
    this.indexFetchMultiplier = indexFetchMultiplier;
    this.strictTraceId = strictTraceId;

    ProtocolVersion protocolVersion = session.getCluster()
        .getConfiguration().getProtocolOptions().getProtocolVersion();
    this.timestampCodec = new TimestampCodec(protocolVersion);
    this.buckets = ContiguousSet.create(Range.closedOpen(0, bucketCount), integers());

    selectTraces = session.prepare(
        QueryBuilder.select("trace_id", "span")
            .from("traces")
            .where(QueryBuilder.in("trace_id", QueryBuilder.bindMarker("trace_id")))
            .limit(QueryBuilder.bindMarker("limit_")));

    selectDependencies = session.prepare(
        QueryBuilder.select("dependencies")
            .from("dependencies")
            .where(QueryBuilder.in("day", QueryBuilder.bindMarker("days"))));

    selectServiceNames = session.prepare(
        QueryBuilder.select("service_name")
            .from(Tables.SERVICE_NAMES));

    selectSpanNames = session.prepare(
        QueryBuilder.select("span_name")
            .from(Tables.SPAN_NAMES)
            .where(QueryBuilder.eq("service_name", QueryBuilder.bindMarker("service_name")))
            .and(QueryBuilder.eq("bucket", QueryBuilder.bindMarker("bucket")))
            .limit(QueryBuilder.bindMarker("limit_")));

    selectTraceIdsByServiceName = session.prepare(
        QueryBuilder.select("ts", "trace_id")
            .from(Tables.SERVICE_NAME_INDEX)
            .where(QueryBuilder.eq("service_name", QueryBuilder.bindMarker("service_name")))
            .and(QueryBuilder.in("bucket", QueryBuilder.bindMarker("bucket")))
            .and(QueryBuilder.gte("ts", QueryBuilder.bindMarker("start_ts")))
            .and(QueryBuilder.lte("ts", QueryBuilder.bindMarker("end_ts")))
            .limit(QueryBuilder.bindMarker("limit_"))
            .orderBy(QueryBuilder.desc("ts")));

    selectTraceIdsBySpanName = session.prepare(
        QueryBuilder.select("ts", "trace_id")
            .from(Tables.SERVICE_SPAN_NAME_INDEX)
            .where(
                QueryBuilder.eq("service_span_name", QueryBuilder.bindMarker("service_span_name")))
            .and(QueryBuilder.gte("ts", QueryBuilder.bindMarker("start_ts")))
            .and(QueryBuilder.lte("ts", QueryBuilder.bindMarker("end_ts")))
            .limit(QueryBuilder.bindMarker("limit_"))
            .orderBy(QueryBuilder.desc("ts")));

    selectTraceIdsByAnnotation = session.prepare(
        QueryBuilder.select("ts", "trace_id")
            .from(Tables.ANNOTATIONS_INDEX)
            .where(QueryBuilder.eq("annotation", QueryBuilder.bindMarker("annotation")))
            .and(QueryBuilder.in("bucket", QueryBuilder.bindMarker("bucket")))
            .and(QueryBuilder.gte("ts", QueryBuilder.bindMarker("start_ts")))
            .and(QueryBuilder.lte("ts", QueryBuilder.bindMarker("end_ts")))
            .limit(QueryBuilder.bindMarker("limit_"))
            .orderBy(QueryBuilder.desc("ts")));

    if (protocolVersion.compareTo(ProtocolVersion.V4) < 0) {
      LOG.warn("Please update Cassandra to 2.2 or later, as some features may fail");
      // Log vs failing on "Partition KEY part service_name cannot be restricted by IN relation"
      selectTraceIdsByServiceNames = null;
    } else {
      selectTraceIdsByServiceNames = session.prepare(
          QueryBuilder.select("ts", "trace_id")
              .from(Tables.SERVICE_NAME_INDEX)
              .where(QueryBuilder.in("service_name", QueryBuilder.bindMarker("service_name")))
              .and(QueryBuilder.in("bucket", QueryBuilder.bindMarker("bucket")))
              .and(QueryBuilder.gte("ts", QueryBuilder.bindMarker("start_ts")))
              .and(QueryBuilder.lte("ts", QueryBuilder.bindMarker("end_ts")))
              .limit(QueryBuilder.bindMarker("limit_"))
              .orderBy(QueryBuilder.desc("ts")));
    }

    traceIdToTimestamp = new Function>() {
      @Override public Map apply(ResultSet input) {
        Map result = new LinkedHashMap<>();
        for (Row row : input) {
          result.put(row.getLong("trace_id"), timestampCodec.deserialize(row, "ts"));
        }
        return result;
      }
    };
  }

  /**
   * This fans out into a potentially large amount of requests related to the amount of annotations
   * queried. The returned future will fail if any of the inputs fail.
   *
   * When {@link QueryRequest#serviceName service name} is unset, service names will be
   * fetched eagerly, implying an additional query.
   */
  @Override
  public ListenableFuture>> getTraces(final QueryRequest request) {
    checkArgument(request.minDuration == null,
      "getTraces with duration is unsupported. Upgrade to the new cassandra3 schema.");
    // Over fetch on indexes as they don't return distinct (trace id, timestamp) rows.
    final int traceIndexFetchSize = request.limit * indexFetchMultiplier;
    ListenableFuture> traceIdToTimestamp;
    if (request.spanName != null) {
      traceIdToTimestamp = getTraceIdsBySpanName(request.serviceName, request.spanName,
          request.endTs * 1000, request.lookback * 1000, traceIndexFetchSize);
    } else if (request.serviceName != null) {
      traceIdToTimestamp = getTraceIdsByServiceNames(Collections.singletonList(request.serviceName),
          request.endTs * 1000, request.lookback * 1000, traceIndexFetchSize);
    } else {
      checkArgument(selectTraceIdsByServiceNames != null,
          "getTraces without serviceName requires Cassandra 2.2 or later");
      traceIdToTimestamp = transform(getServiceNames(),
          new AsyncFunction, Map>() {
            @Override
            public ListenableFuture> apply(@Nullable List serviceNames) {
              return getTraceIdsByServiceNames(serviceNames,
                  request.endTs * 1000, request.lookback * 1000, traceIndexFetchSize);
            }
          });
    }

    List annotationKeys = CassandraUtil.annotationKeys(request);

    ListenableFuture> traceIds;
    if (annotationKeys.isEmpty()) {
      // Simplest case is when there is no annotation query. Limit is valid since there's no AND
      // query that could reduce the results returned to less than the limit.
      traceIds = Futures.transform(traceIdToTimestamp, CassandraUtil.keyset());
    } else {
      // While a valid port of the scala cassandra span store (from zipkin 1.35), there is a fault.
      // each annotation key is an intersection, meaning we likely return < traceIndexFetchSize.
      List>> futureKeySetsToIntersect = new ArrayList<>();
      if (request.spanName != null) {
        futureKeySetsToIntersect.add(traceIdToTimestamp);
      }
      for (String annotationKey : annotationKeys) {
        futureKeySetsToIntersect.add(getTraceIdsByAnnotation(annotationKey,
            request.endTs * 1000, request.lookback * 1000, traceIndexFetchSize));
      }
      // We achieve the AND goal, by intersecting each of the key sets.
      traceIds = Futures.transform(allAsList(futureKeySetsToIntersect), CassandraUtil.intersectKeySets());
    }
    return transform(traceIds, new AsyncFunction, List>>() {
      @Override
      public ListenableFuture>> apply(@Nullable Set traceIds) {
        traceIds = ImmutableSet.copyOf(Iterators.limit(traceIds.iterator(), request.limit));
        return transform(getSpansByTraceIds(traceIds, maxTraceCols),
            new Function, List>>() {
              @Override public List> apply(@Nullable List input) {
                // Indexes only contain Span.traceId, so our matches are imprecise on Span.traceIdHigh
                return FluentIterable.from(GroupByTraceId.apply(input, strictTraceId, true))
                    .filter(new Predicate>() {
                      @Override public boolean apply(List trace) {
                        return trace.get(0).traceIdHigh == 0 || request.test(trace);
                      }
                    })
                    .toList();
              }
            });
      }

      @Override public String toString() {
        return "getSpansByTraceIds";
      }
    });
  }

  @Override public ListenableFuture> getRawTrace(long traceId) {
    return getRawTrace(0L, traceId);
  }

  /**
   * Since the schema doesn't have a unique index on {@link Span#traceIdHigh}, we have to filter
   * client-side.
   */
  @Override public ListenableFuture> getRawTrace(final long traceIdHigh, long traceIdLow) {
    return transform(getSpansByTraceIds(Collections.singleton(traceIdLow), maxTraceCols),
        new Function, List>() {
          @Override public List apply(@Nullable List input) {
            if (strictTraceId) {
              Iterator spans = input.iterator();
              while (spans.hasNext()) {
                long nextTraceIdHigh = spans.next().traceIdHigh;
                if (nextTraceIdHigh != 0L && nextTraceIdHigh != traceIdHigh) {
                  spans.remove();
                }
              }
            }
            return input.isEmpty() ? null : input;
          }
        });
  }

  @Override public ListenableFuture> getTrace(long traceId) {
    return getTrace(0L, traceId);
  }

  @Override public ListenableFuture> getTrace(long traceIdHigh, long traceIdLow) {
    return transform(getRawTrace(traceIdHigh, traceIdLow), AdjustTrace.INSTANCE);
  }

  enum AdjustTrace implements Function, List> {
    INSTANCE;

    @Override public List apply(@Nullable Collection input) {
      List result = CorrectForClockSkew.apply(MergeById.apply(input));
      return result.isEmpty() ? null : result;
    }

    @Override public String toString(){
      return "AdjustTrace";
    }
  }

  @Override public ListenableFuture> getServiceNames() {
    try {
      BoundStatement bound = CassandraUtil.bindWithName(selectServiceNames, "select-service-names");
      return transform(session.executeAsync(bound), new Function>() {
            @Override public List apply(@Nullable ResultSet input) {
              Set serviceNames = new HashSet<>();
              for (Row row : input) {
                serviceNames.add(row.getString("service_name"));
              }
              return Ordering.natural().sortedCopy(serviceNames);
            }
          }
      );
    } catch (RuntimeException ex) {
      return immediateFailedFuture(ex);
    }
  }

  @Override public ListenableFuture> getSpanNames(String serviceName) {
    if (serviceName == null || serviceName.isEmpty()) return EMPTY_LIST;
    serviceName = checkNotNull(serviceName, "serviceName").toLowerCase();
    int bucket = 0;
    try {
      BoundStatement bound = CassandraUtil.bindWithName(selectSpanNames, "select-span-names")
          .setString("service_name", serviceName)
          .setInt("bucket", bucket)
          // no one is ever going to browse so many span names
          .setInt("limit_", 1000);

      return transform(session.executeAsync(bound), new Function>() {
            @Override public List apply(@Nullable ResultSet input) {
              Set spanNames = new HashSet<>();
              for (Row row : input) {
                spanNames.add(row.getString("span_name"));
              }
              return Ordering.natural().sortedCopy(spanNames);
            }
          }
      );
    } catch (RuntimeException ex) {
      return immediateFailedFuture(ex);
    }
  }

  @Override public ListenableFuture> getDependencies(long endTs,
      @Nullable Long lookback) {
    List days = getDays(endTs, lookback);
    try {
      BoundStatement bound = CassandraUtil.bindWithName(selectDependencies, "select-dependencies")
          .setList("days", days);
      return transform(session.executeAsync(bound), ConvertDependenciesResponse.INSTANCE);
    } catch (RuntimeException ex) {
      return immediateFailedFuture(ex);
    }
  }

  enum ConvertDependenciesResponse implements Function> {
    INSTANCE;

    @Override public List apply(@Nullable ResultSet rs) {
      ImmutableList.Builder unmerged = ImmutableList.builder();
      for (Row row : rs) {
        ByteBuffer encodedDayOfDependencies = row.getBytes("dependencies");
        for (DependencyLink link : Dependencies.fromThrift(encodedDayOfDependencies).links) {
          unmerged.add(link);
        }
      }
      return DependencyLinker.merge(unmerged.build());
    }

    @Override public String toString(){
      return "MergeDependencies";
    }
  }

  /**
   * Get the available trace information from the storage system. Spans in trace should be sorted by
   * the first annotation timestamp in that span. First event should be first in the spans list. 
   * The return list will contain only spans that have been found, thus the return list may not
   * match the provided list of ids.
   */
  ListenableFuture> getSpansByTraceIds(Set traceIds, int limit) {
    checkNotNull(traceIds, "traceIds");
    if (traceIds.isEmpty()) {
      return immediateFuture(Collections.emptyList());
    }

    try {
      BoundStatement bound = CassandraUtil.bindWithName(selectTraces, "select-traces")
          .setSet("trace_id", traceIds)
          .setInt("limit_", limit);

      bound.setFetchSize(Integer.MAX_VALUE);

      return transform(session.executeAsync(bound),
          new Function>() {
            @Override public List apply(@Nullable ResultSet input) {
              List result = new ArrayList<>(input.getAvailableWithoutFetching());
              for (Row row : input) {
                result.add(Codec.THRIFT.readSpan(row.getBytes("span")));
              }
              return result;
            }
          }
      );
    } catch (RuntimeException ex) {
      return immediateFailedFuture(ex);
    }
  }

  ListenableFuture> getTraceIdsByServiceNames(List serviceNames, long endTs,
      long lookback, int limit) {
    if (serviceNames.isEmpty()) return immediateFuture(Collections.emptyMap());

    long startTs = Math.max(endTs - lookback, 0); // >= 1970
    try {
      // This guards use of "in" query to give people a little more time to move off Cassandra 2.1
      // Note that it will still fail when serviceNames.size() > 1
      BoundStatement bound = serviceNames.size() == 1 ?
          CassandraUtil.bindWithName(selectTraceIdsByServiceName, "select-trace-ids-by-service-name")
              .setString("service_name", serviceNames.get(0))
              .setSet("bucket", buckets)
              .setBytesUnsafe("start_ts", timestampCodec.serialize(startTs))
              .setBytesUnsafe("end_ts", timestampCodec.serialize(endTs))
              .setInt("limit_", limit) :
          CassandraUtil.bindWithName(selectTraceIdsByServiceNames, "select-trace-ids-by-service-names")
              .setList("service_name", serviceNames)
              .setSet("bucket", buckets)
              .setBytesUnsafe("start_ts", timestampCodec.serialize(startTs))
              .setBytesUnsafe("end_ts", timestampCodec.serialize(endTs))
              .setInt("limit_", limit);

      bound.setFetchSize(Integer.MAX_VALUE);

      return transform(session.executeAsync(bound), traceIdToTimestamp);
    } catch (RuntimeException ex) {
      return immediateFailedFuture(ex);
    }
  }

  ListenableFuture> getTraceIdsBySpanName(String serviceName,
      String spanName, long endTs, long lookback, int limit) {
    checkArgument(serviceName != null, "serviceName required on spanName query");
    checkArgument(spanName != null, "spanName required on spanName query");
    String serviceSpanName = serviceName + "." + spanName;
    long startTs = Math.max(endTs - lookback, 0); // >= 1970
    try {
      BoundStatement bound = CassandraUtil.bindWithName(selectTraceIdsBySpanName, "select-trace-ids-by-span-name")
          .setString("service_span_name", serviceSpanName)
          .setBytesUnsafe("start_ts", timestampCodec.serialize(startTs))
          .setBytesUnsafe("end_ts", timestampCodec.serialize(endTs))
          .setInt("limit_", limit);

      return transform(session.executeAsync(bound), traceIdToTimestamp);
    } catch (RuntimeException ex) {
      return immediateFailedFuture(ex);
    }
  }

  ListenableFuture> getTraceIdsByAnnotation(String annotationKey,
      long endTs, long lookback, int limit) {
    long startTs = Math.max(endTs - lookback, 0); // >= 1970
    try {
      BoundStatement bound =
          CassandraUtil.bindWithName(selectTraceIdsByAnnotation, "select-trace-ids-by-annotation")
              .setBytes("annotation", CassandraUtil.toByteBuffer(annotationKey))
              .setSet("bucket", buckets)
              .setBytesUnsafe("start_ts", timestampCodec.serialize(startTs))
              .setBytesUnsafe("end_ts", timestampCodec.serialize(endTs))
              .setInt("limit_", limit);

      bound.setFetchSize(Integer.MAX_VALUE);

      return transform(session.executeAsync(bound), new Function>() {
            @Override public Map apply(@Nullable ResultSet input) {
              Map traceIdsToTimestamps = new LinkedHashMap<>();
              for (Row row : input) {
                traceIdsToTimestamps.put(row.getLong("trace_id"),
                    timestampCodec.deserialize(row, "ts"));
              }
              return traceIdsToTimestamps;
            }
          }
      );
    } catch (CharacterCodingException | RuntimeException ex) {
      return immediateFailedFuture(ex);
    }
  }
}