All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.druid.query.select.SelectQueryQueryToolChest Maven / Gradle / Ivy

/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.query.select;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Supplier;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
import io.druid.java.util.common.StringUtils;
import io.druid.java.util.common.granularity.Granularity;
import io.druid.java.util.common.guava.Comparators;
import io.druid.java.util.common.guava.Sequence;
import io.druid.java.util.common.guava.nary.BinaryFn;
import io.druid.query.CacheStrategy;
import io.druid.query.DefaultGenericQueryMetricsFactory;
import io.druid.query.GenericQueryMetricsFactory;
import io.druid.query.IntervalChunkingQueryRunnerDecorator;
import io.druid.query.Query;
import io.druid.query.QueryMetrics;
import io.druid.query.QueryPlus;
import io.druid.query.QueryRunner;
import io.druid.query.QueryToolChest;
import io.druid.query.Result;
import io.druid.query.ResultGranularTimestampComparator;
import io.druid.query.ResultMergeQueryRunner;
import io.druid.query.aggregation.MetricManipulationFn;
import io.druid.query.dimension.DimensionSpec;
import io.druid.query.filter.DimFilter;
import io.druid.timeline.DataSegmentUtils;
import io.druid.timeline.LogicalSegment;
import org.joda.time.DateTime;
import org.joda.time.Interval;

import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

/**
 */
public class SelectQueryQueryToolChest extends QueryToolChest, SelectQuery>
{
  private static final byte SELECT_QUERY = 0x16;
  private static final TypeReference OBJECT_TYPE_REFERENCE =
      new TypeReference()
      {
      };
  private static final TypeReference> TYPE_REFERENCE =
      new TypeReference>()
      {
      };

  private final ObjectMapper jsonMapper;
  private final IntervalChunkingQueryRunnerDecorator intervalChunkingQueryRunnerDecorator;
  private final Supplier configSupplier;
  private final GenericQueryMetricsFactory queryMetricsFactory;

  public SelectQueryQueryToolChest(
      ObjectMapper jsonMapper,
      IntervalChunkingQueryRunnerDecorator intervalChunkingQueryRunnerDecorator,
      Supplier configSupplier
  )
  {
    this(jsonMapper, intervalChunkingQueryRunnerDecorator, configSupplier, new DefaultGenericQueryMetricsFactory(jsonMapper));
  }

  @Inject
  public SelectQueryQueryToolChest(
      ObjectMapper jsonMapper,
      IntervalChunkingQueryRunnerDecorator intervalChunkingQueryRunnerDecorator,
      Supplier configSupplier,
      GenericQueryMetricsFactory queryMetricsFactory
  )
  {
    this.jsonMapper = jsonMapper;
    this.intervalChunkingQueryRunnerDecorator = intervalChunkingQueryRunnerDecorator;
    this.configSupplier = configSupplier;
    this.queryMetricsFactory = queryMetricsFactory;
  }

  @Override
  public QueryRunner> mergeResults(
      QueryRunner> queryRunner
  )
  {
    return new ResultMergeQueryRunner>(queryRunner)
    {
      @Override
      protected Ordering> makeOrdering(Query> query)
      {
        return ResultGranularTimestampComparator.create(
            ((SelectQuery) query).getGranularity(), query.isDescending()
        );
      }

      @Override
      protected BinaryFn, Result, Result> createMergeFn(
          Query> input
      )
      {
        SelectQuery query = (SelectQuery) input;
        return new SelectBinaryFn(
            query.getGranularity(),
            query.getPagingSpec(),
            query.isDescending()
        );
      }
    };
  }

  @Override
  public QueryMetrics> makeMetrics(SelectQuery query)
  {
    return queryMetricsFactory.makeMetrics(query);
  }

  @Override
  public Function, Result> makePreComputeManipulatorFn(
      final SelectQuery query, final MetricManipulationFn fn
  )
  {
    return Functions.identity();
  }

  @Override
  public TypeReference> getResultTypeReference()
  {
    return TYPE_REFERENCE;
  }

  @Override
  public CacheStrategy, Object, SelectQuery> getCacheStrategy(final SelectQuery query)
  {

    return new CacheStrategy, Object, SelectQuery>()
    {
      private final List dimensionSpecs =
          query.getDimensions() != null ? query.getDimensions() : Collections.emptyList();
      private final List dimOutputNames = dimensionSpecs.size() > 0 ?
          Lists.transform(dimensionSpecs, DimensionSpec::getOutputName) : Collections.emptyList();

      @Override
      public boolean isCacheable(SelectQuery query, boolean willMergeRunners)
      {
        return true;
      }

      @Override
      public byte[] computeCacheKey(SelectQuery query)
      {
        final DimFilter dimFilter = query.getDimensionsFilter();
        final byte[] filterBytes = dimFilter == null ? new byte[]{} : dimFilter.getCacheKey();
        final byte[] granularityBytes = query.getGranularity().getCacheKey();

        final List dimensionSpecs =
            query.getDimensions() != null ? query.getDimensions() : Collections.emptyList();
        final byte[][] dimensionsBytes = new byte[dimensionSpecs.size()][];
        int dimensionsBytesSize = 0;
        int index = 0;
        for (DimensionSpec dimension : dimensionSpecs) {
          dimensionsBytes[index] = dimension.getCacheKey();
          dimensionsBytesSize += dimensionsBytes[index].length;
          ++index;
        }

        final Set metrics = Sets.newTreeSet();
        if (query.getMetrics() != null) {
          metrics.addAll(query.getMetrics());
        }

        final byte[][] metricBytes = new byte[metrics.size()][];
        int metricBytesSize = 0;
        index = 0;
        for (String metric : metrics) {
          metricBytes[index] = StringUtils.toUtf8(metric);
          metricBytesSize += metricBytes[index].length;
          ++index;
        }

        final byte[] virtualColumnsCacheKey = query.getVirtualColumns().getCacheKey();
        final byte isDescendingByte = query.isDescending() ? (byte) 1 : 0;

        final ByteBuffer queryCacheKey = ByteBuffer
            .allocate(
                2
                + granularityBytes.length
                + filterBytes.length
                + query.getPagingSpec().getCacheKey().length
                + dimensionsBytesSize
                + metricBytesSize
                + virtualColumnsCacheKey.length
            )
            .put(SELECT_QUERY)
            .put(granularityBytes)
            .put(filterBytes)
            .put(query.getPagingSpec().getCacheKey())
            .put(isDescendingByte);

        for (byte[] dimensionsByte : dimensionsBytes) {
          queryCacheKey.put(dimensionsByte);
        }

        for (byte[] metricByte : metricBytes) {
          queryCacheKey.put(metricByte);
        }

        queryCacheKey.put(virtualColumnsCacheKey);

        return queryCacheKey.array();
      }

      @Override
      public TypeReference getCacheObjectClazz()
      {
        return OBJECT_TYPE_REFERENCE;
      }

      @Override
      public Function, Object> prepareForCache()
      {
        return new Function, Object>()
        {
          @Override
          public Object apply(final Result input)
          {
            if (!dimOutputNames.isEmpty()) {
              return Arrays.asList(
                  input.getTimestamp().getMillis(),
                  input.getValue().getPagingIdentifiers(),
                  input.getValue().getDimensions(),
                  input.getValue().getMetrics(),
                  input.getValue().getEvents(),
                  dimOutputNames
              );
            }
            return Arrays.asList(
                input.getTimestamp().getMillis(),
                input.getValue().getPagingIdentifiers(),
                input.getValue().getDimensions(),
                input.getValue().getMetrics(),
                input.getValue().getEvents()
            );
          }
        };
      }

      @Override
      public Function> pullFromCache()
      {
        return new Function>()
        {
          private final Granularity granularity = query.getGranularity();

          @Override
          public Result apply(Object input)
          {
            List results = (List) input;
            Iterator resultIter = results.iterator();

            DateTime timestamp = granularity.toDateTime(((Number) resultIter.next()).longValue());

            Map pageIdentifier = jsonMapper.convertValue(
                resultIter.next(), new TypeReference>() {}
                );
            Set dimensionSet = jsonMapper.convertValue(
                resultIter.next(), new TypeReference>() {}
            );
            Set metricSet = jsonMapper.convertValue(
                resultIter.next(), new TypeReference>() {}
            );
            List eventHolders = jsonMapper.convertValue(
                resultIter.next(), new TypeReference>() {}
                );
            // check the condition that outputName of cached result should be updated
            if (resultIter.hasNext()) {
              List cachedOutputNames = (List) resultIter.next();
              Preconditions.checkArgument(cachedOutputNames.size() == dimOutputNames.size(),
                  "Cache hit but different number of dimensions??");
              for (int idx = 0; idx < dimOutputNames.size(); idx++) {
                if (!cachedOutputNames.get(idx).equals(dimOutputNames.get(idx))) {
                  // rename outputName in the EventHolder
                  for (EventHolder eventHolder: eventHolders) {
                    Object obj = eventHolder.getEvent().remove(cachedOutputNames.get(idx));
                    if (obj != null) {
                      eventHolder.getEvent().put(dimOutputNames.get(idx), obj);
                    }
                  }
                }
              }
            }

            return new Result<>(
                timestamp,
                new SelectResultValue(
                    pageIdentifier,
                    dimensionSet,
                    metricSet,
                    eventHolders
                )
            );
          }
        };
      }
    };
  }

  @Override
  public QueryRunner> preMergeQueryDecoration(final QueryRunner> runner)
  {
    return intervalChunkingQueryRunnerDecorator.decorate(
        new QueryRunner>()
        {
          @Override
          public Sequence> run(
              QueryPlus> queryPlus, Map responseContext
          )
          {
            SelectQuery selectQuery = (SelectQuery) queryPlus.getQuery();
            if (selectQuery.getDimensionsFilter() != null) {
              selectQuery = selectQuery.withDimFilter(selectQuery.getDimensionsFilter().optimize());
              queryPlus = queryPlus.withQuery(selectQuery);
            }
            return runner.run(queryPlus, responseContext);
          }
        }, this);
  }

  @Override
  public  List filterSegments(SelectQuery query, List segments)
  {
    // at the point where this code is called, only one datasource should exist.
    final String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames());

    PagingSpec pagingSpec = query.getPagingSpec();
    Map paging = pagingSpec.getPagingIdentifiers();
    if (paging == null || paging.isEmpty()) {
      return segments;
    }

    final Granularity granularity = query.getGranularity();

    // A paged select query using a UnionDataSource will return pagingIdentifiers from segments in more than one
    // dataSource which confuses subsequent queries and causes a failure. To avoid this, filter only the paging keys
    // that are applicable to this dataSource so that each dataSource in a union query gets the appropriate keys.
    final Iterable filteredPagingKeys = Iterables.filter(
        paging.keySet(), new Predicate()
        {
          @Override
          public boolean apply(String input)
          {
            return DataSegmentUtils.valueOf(dataSource, input) != null;
          }
        }
    );

    List intervals = Lists.newArrayList(
        Iterables.transform(filteredPagingKeys, DataSegmentUtils.INTERVAL_EXTRACTOR(dataSource))
    );
    Collections.sort(
        intervals, query.isDescending() ? Comparators.intervalsByEndThenStart()
                                        : Comparators.intervalsByStartThenEnd()
    );

    TreeMap granularThresholds = Maps.newTreeMap();
    for (Interval interval : intervals) {
      if (query.isDescending()) {
        long granularEnd = granularity.bucketStart(interval.getEnd()).getMillis();
        Long currentEnd = granularThresholds.get(granularEnd);
        if (currentEnd == null || interval.getEndMillis() > currentEnd) {
          granularThresholds.put(granularEnd, interval.getEndMillis());
        }
      } else {
        long granularStart = granularity.bucketStart(interval.getStart()).getMillis();
        Long currentStart = granularThresholds.get(granularStart);
        if (currentStart == null || interval.getStartMillis() < currentStart) {
          granularThresholds.put(granularStart, interval.getStartMillis());
        }
      }
    }

    List queryIntervals = Lists.newArrayList(segments);

    Iterator it = queryIntervals.iterator();
    if (query.isDescending()) {
      while (it.hasNext()) {
        Interval interval = it.next().getInterval();
        Map.Entry ceiling = granularThresholds.ceilingEntry(granularity.bucketStart(interval.getEnd()).getMillis());
        if (ceiling == null || interval.getStartMillis() >= ceiling.getValue()) {
          it.remove();
        }
      }
    } else {
      while (it.hasNext()) {
        Interval interval = it.next().getInterval();
        Map.Entry floor = granularThresholds.floorEntry(granularity.bucketStart(interval.getStart()).getMillis());
        if (floor == null || interval.getEndMillis() <= floor.getValue()) {
          it.remove();
        }
      }
    }
    return queryIntervals;
  }
}