All Downloads are FREE. Search and download functionalities are using the official Maven repository.

zipkin.storage.elasticsearch.ElasticsearchSpanStore Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/**
 * Copyright 2015-2016 The OpenZipkin Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */
package zipkin.storage.elasticsearch;

import com.google.common.base.Function;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Ordering;
import com.google.common.util.concurrent.AsyncFunction;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.RangeQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.bucket.nested.Nested;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Order;
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
import org.elasticsearch.search.aggregations.metrics.tophits.TopHits;
import zipkin.Codec;
import zipkin.DependencyLink;
import zipkin.Span;
import zipkin.internal.CorrectForClockSkew;
import zipkin.internal.MergeById;
import zipkin.internal.Nullable;
import zipkin.internal.Util;
import zipkin.storage.QueryRequest;
import zipkin.storage.guava.GuavaSpanStore;

import static com.google.common.util.concurrent.Futures.immediateFuture;
import static com.google.common.util.concurrent.Futures.transform;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsQuery;

final class ElasticsearchSpanStore implements GuavaSpanStore {
  /**
   * The maximum count of raw spans returned in a trace query.
   *
   * 

Not configurable as it implies adjustments to the index template (index.max_result_window) * and user settings * *

See https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-from-size.html */ static final int MAX_RAW_SPANS = 10000; // the default elasticsearch allowed limit static final long ONE_DAY_IN_MILLIS = TimeUnit.DAYS.toMillis(1); static final ListenableFuture> EMPTY_LIST = immediateFuture(Collections.emptyList()); static final Ordering> TRACE_DESCENDING = Ordering.from(new Comparator>() { @Override public int compare(List left, List right) { return right.get(0).compareTo(left.get(0)); } }); private final Client client; private final IndexNameFormatter indexNameFormatter; ElasticsearchSpanStore(Client client, IndexNameFormatter indexNameFormatter) { this.client = client; this.indexNameFormatter = indexNameFormatter; } @Override public ListenableFuture>> getTraces(QueryRequest request) { long endMillis = request.endTs; long beginMillis = endMillis - request.lookback; BoolQueryBuilder filter = boolQuery() .must(rangeQuery("timestamp") .gte(TimeUnit.MILLISECONDS.toMicros(beginMillis)) .lte(TimeUnit.MILLISECONDS.toMicros(endMillis))); if (request.serviceName != null) { filter.must(boolQuery() .should(termQuery("annotations.endpoint.serviceName", request.serviceName)) .should(nestedQuery( "binaryAnnotations", termQuery("binaryAnnotations.endpoint.serviceName", request.serviceName)))); } if (request.spanName != null) { filter.must(termQuery("name", request.spanName)); } for (String annotation : request.annotations) { filter.must(termQuery("annotations.value", annotation)); } for (Map.Entry annotation : request.binaryAnnotations.entrySet()) { // In our index template, we make sure the binaryAnnotation value is indexed as string, // meaning non-string values won't even be indexed at all. This means that we can only // match string values here, which happens to be exactly what we want. filter.must(nestedQuery("binaryAnnotations", boolQuery() .must(termQuery("binaryAnnotations.key", annotation.getKey())) .must(termQuery("binaryAnnotations.value", annotation.getValue())))); } if (request.minDuration != null) { RangeQueryBuilder durationQuery = rangeQuery("duration").gte(request.minDuration); if (request.maxDuration != null) { durationQuery.lte(request.maxDuration); } filter.must(durationQuery); } List strings = computeIndices(beginMillis, endMillis); final String[] indices = strings.toArray(new String[strings.size()]); // We need to filter to traces that contain at least one span that matches the request, // but the zipkin API is supposed to order traces by first span, regardless of if it was // filtered or not. This is not possible without either multiple, heavyweight queries // or complex multiple indexing, defeating much of the elegance of using elasticsearch for this. // So we fudge and order on the first span among the filtered spans - in practice, there should // be no significant difference in user experience since span start times are usually very // close to each other in human time. SearchRequestBuilder elasticRequest = client.prepareSearch(indices) .setIndicesOptions(IndicesOptions.lenientExpandOpen()) .setTypes(ElasticsearchConstants.SPAN) .setQuery(boolQuery().must(matchAllQuery()).filter(filter)) .setSize(0) .addAggregation( AggregationBuilders.terms("traceId_agg") .field("traceId") .subAggregation(AggregationBuilders.min("timestamps_agg").field("timestamp")) .order(Order.aggregation("timestamps_agg", false)) .size(request.limit)); ListenableFuture traceIds = ElasticFutures.toGuava(elasticRequest.execute()); return transform(traceIds, new AsyncFunction>>() { @Override public ListenableFuture>> apply(SearchResponse input) { if (input.getAggregations() == null || input.getAggregations().get("traceId_agg") == null) { return Futures.immediateFuture(Collections.>emptyList()); } Terms traceIdsAgg = input.getAggregations().get("traceId_agg"); List traceIds = new ArrayList<>(); for (Terms.Bucket bucket : traceIdsAgg.getBuckets()) { traceIds.add(Util.lowerHexToUnsignedLong(bucket.getKeyAsString())); } return getTracesByIds(traceIds, indices); } } ); } @Override public ListenableFuture> getTrace(long traceId) { return transform(getRawTrace(traceId), new Function, List>() { @Override public List apply(List input) { return input == null ? null : CorrectForClockSkew.apply(MergeById.apply(input)); } }); } @Override public ListenableFuture> getRawTrace(long traceId) { SearchRequestBuilder elasticRequest = client.prepareSearch(indexNameFormatter.catchAll()) .setTypes(ElasticsearchConstants.SPAN) .setSize(MAX_RAW_SPANS) .setQuery(termQuery("traceId", Util.toLowerHex(traceId))); return Futures.transform(ElasticFutures.toGuava(elasticRequest.execute()), new Function>() { @Override public List apply(SearchResponse response) { if (response.getHits().totalHits() == 0) { return null; } ImmutableList.Builder trace = ImmutableList.builder(); for (SearchHit hit : response.getHits()) { trace.add(Codec.JSON.readSpan(hit.getSourceRef().toBytes())); } return trace.build(); } }); } ListenableFuture>> getTracesByIds(Collection traceIds, String[] indices) { List traceIdsStr = new ArrayList<>(traceIds.size()); for (long traceId : traceIds) { traceIdsStr.add(Util.toLowerHex(traceId)); } SearchRequestBuilder elasticRequest = client.prepareSearch(indices) .setIndicesOptions(IndicesOptions.lenientExpandOpen()) .setTypes(ElasticsearchConstants.SPAN) .setSize(MAX_RAW_SPANS) .setQuery(termsQuery("traceId", traceIdsStr)); return Futures.transform(ElasticFutures.toGuava(elasticRequest.execute()), ConvertTracesResponse.INSTANCE); } enum ConvertTracesResponse implements Function>> { INSTANCE; @Override public List> apply(SearchResponse response) { ArrayListMultimap groupedSpans = ArrayListMultimap.create(); for (SearchHit hit : response.getHits()) { Span span = Codec.JSON.readSpan(hit.getSourceRef().toBytes()); groupedSpans.put(span.traceId, span); } List> result = new ArrayList<>(groupedSpans.size()); for (Long traceId : groupedSpans.keySet()) { result.add(CorrectForClockSkew.apply(MergeById.apply(groupedSpans.get(traceId)))); } return TRACE_DESCENDING.immutableSortedCopy(result); } } @Override public ListenableFuture> getServiceNames() { SearchRequestBuilder elasticRequest = client.prepareSearch(indexNameFormatter.catchAll()) .setTypes(ElasticsearchConstants.SPAN) .setQuery(matchAllQuery()) .setSize(0) .addAggregation(AggregationBuilders.terms("annotationServiceName_agg") .field("annotations.endpoint.serviceName") .size(0)) .addAggregation(AggregationBuilders.nested("binaryAnnotations_agg") .path("binaryAnnotations") .subAggregation(AggregationBuilders.terms("binaryAnnotationsServiceName_agg") .field("binaryAnnotations.endpoint.serviceName") .size(0))); return Futures.transform(ElasticFutures.toGuava(elasticRequest.execute()), ConvertServiceNamesResponse.INSTANCE); } enum ConvertServiceNamesResponse implements Function> { INSTANCE; @Override public List apply(SearchResponse response) { if (response.getAggregations() == null) { return Collections.emptyList(); } SortedSet serviceNames = new TreeSet<>(); Terms annotationServiceNamesAgg = response.getAggregations().get("annotationServiceName_agg"); if (annotationServiceNamesAgg != null) { for (Terms.Bucket bucket : annotationServiceNamesAgg.getBuckets()) { if (!bucket.getKeyAsString().isEmpty()) { serviceNames.add(bucket.getKeyAsString()); } } } Nested binaryAnnotationsAgg = response.getAggregations().get("binaryAnnotations_agg"); if (binaryAnnotationsAgg != null && binaryAnnotationsAgg.getAggregations() != null) { Terms binaryAnnotationServiceNamesAgg = binaryAnnotationsAgg.getAggregations() .get("binaryAnnotationsServiceName_agg"); if (binaryAnnotationServiceNamesAgg != null) { for (Terms.Bucket bucket : binaryAnnotationServiceNamesAgg.getBuckets()) { if (!bucket.getKeyAsString().isEmpty()) { serviceNames.add(bucket.getKeyAsString()); } } } } return ImmutableList.copyOf(serviceNames); } } @Override public ListenableFuture> getSpanNames(String serviceName) { if (Strings.isNullOrEmpty(serviceName)) { return EMPTY_LIST; } serviceName = serviceName.toLowerCase(); QueryBuilder filter = boolQuery() .should(termQuery("annotations.endpoint.serviceName", serviceName)) .should(termQuery("binaryAnnotations.endpoint.serviceName", serviceName)); SearchRequestBuilder elasticRequest = client.prepareSearch(indexNameFormatter.catchAll()) .setTypes(ElasticsearchConstants.SPAN) .setQuery(boolQuery().must(matchAllQuery()).filter(filter)) .setSize(0) .addAggregation(AggregationBuilders.terms("name_agg") .order(Order.term(true)) .field("name") .size(0)); return Futures.transform(ElasticFutures.toGuava(elasticRequest.execute()), ConvertSpanNameResponse.INSTANCE); } enum ConvertSpanNameResponse implements Function> { INSTANCE; @Override public List apply(SearchResponse response) { Terms namesAgg = response.getAggregations().get("name_agg"); if (namesAgg == null) { return Collections.emptyList(); } ImmutableList.Builder spanNames = ImmutableList.builder(); for (Terms.Bucket bucket : namesAgg.getBuckets()) { spanNames.add(bucket.getKeyAsString()); } return spanNames.build(); } } @Override public ListenableFuture> getDependencies(long endMillis, @Nullable Long lookback) { long beginMillis = lookback != null ? endMillis - lookback : 0; // We just return all dependencies in the days that fall within endTs and lookback as // dependency links themselves don't have timestamps. List strings = computeIndices(beginMillis, endMillis); SearchRequestBuilder elasticRequest = client.prepareSearch( strings.toArray(new String[strings.size()])) .setIndicesOptions(IndicesOptions.lenientExpandOpen()) .setTypes(ElasticsearchConstants.DEPENDENCY_LINK) .addAggregation(AggregationBuilders.terms("parent_child_agg") .field("parent_child") .subAggregation(AggregationBuilders.topHits("hits_agg") .setSize(1)) .subAggregation(AggregationBuilders.sum("callCount_agg") .field("callCount"))) .setQuery(matchAllQuery()); return Futures.transform(ElasticFutures.toGuava(elasticRequest.execute()), ConvertDependenciesResponse.INSTANCE); } enum ConvertDependenciesResponse implements Function> { INSTANCE; @Override public List apply(SearchResponse response) { if (response.getAggregations() == null) { return Collections.emptyList(); } Terms parentChildAgg = response.getAggregations().get("parent_child_agg"); if (parentChildAgg == null) { return Collections.emptyList(); } ImmutableList.Builder links = ImmutableList.builder(); for (Terms.Bucket bucket : parentChildAgg.getBuckets()) { TopHits hitsAgg = bucket.getAggregations().get("hits_agg"); Sum callCountAgg = bucket.getAggregations().get("callCount_agg"); // We would have no bucket if there wasn't a hit, so this should always be non-empty. SearchHit hit = hitsAgg.getHits().getAt(0); DependencyLink link = Codec.JSON.readDependencyLink(hit.getSourceRef().toBytes()); link = link.toBuilder().callCount((long) callCountAgg.getValue()).build(); links.add(link); } return links.build(); } } private List computeIndices(long beginMillis, long endMillis) { beginMillis = Util.midnightUTC(beginMillis); endMillis = Util.midnightUTC(endMillis); List indices = new ArrayList<>(); // If a leap second is involved, the same index will be specified twice. // It shouldn't be a big deal. for (long currentMillis = beginMillis; currentMillis <= endMillis; currentMillis += ONE_DAY_IN_MILLIS) { indices.add(indexNameFormatter.indexNameForTimestamp(currentMillis)); } return indices; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy