All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.druid.benchmark.query.SelectBenchmark Maven / Gradle / Ivy

/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.benchmark.query;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Files;
import io.druid.benchmark.datagen.BenchmarkDataGenerator;
import io.druid.benchmark.datagen.BenchmarkSchemaInfo;
import io.druid.benchmark.datagen.BenchmarkSchemas;
import io.druid.concurrent.Execs;
import io.druid.data.input.InputRow;
import io.druid.data.input.Row;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.hll.HyperLogLogHash;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.java.util.common.granularity.Granularities;
import io.druid.java.util.common.guava.Sequence;
import io.druid.java.util.common.guava.Sequences;
import io.druid.java.util.common.logger.Logger;
import io.druid.query.Druids;
import io.druid.query.FinalizeResultsQueryRunner;
import io.druid.query.Query;
import io.druid.query.QueryRunner;
import io.druid.query.QueryRunnerFactory;
import io.druid.query.QueryToolChest;
import io.druid.query.Result;
import io.druid.query.TableDataSource;
import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde;
import io.druid.query.dimension.DefaultDimensionSpec;
import io.druid.query.select.EventHolder;
import io.druid.query.select.PagingSpec;
import io.druid.query.select.SelectQuery;
import io.druid.query.select.SelectQueryConfig;
import io.druid.query.select.SelectQueryEngine;
import io.druid.query.select.SelectQueryQueryToolChest;
import io.druid.query.select.SelectQueryRunnerFactory;
import io.druid.query.select.SelectResultValue;
import io.druid.query.spec.MultipleIntervalSegmentSpec;
import io.druid.query.spec.QuerySegmentSpec;
import io.druid.segment.IncrementalIndexSegment;
import io.druid.segment.IndexIO;
import io.druid.segment.IndexMergerV9;
import io.druid.segment.IndexSpec;
import io.druid.segment.QueryableIndex;
import io.druid.segment.QueryableIndexSegment;
import io.druid.segment.column.ColumnConfig;
import io.druid.segment.incremental.IncrementalIndex;
import io.druid.segment.incremental.IncrementalIndexSchema;
import io.druid.segment.incremental.OnheapIncrementalIndex;
import io.druid.segment.serde.ComplexMetrics;
import org.apache.commons.io.FileUtils;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;

@State(Scope.Benchmark)
@Fork(jvmArgsPrepend = "-server", value = 1)
@Warmup(iterations = 10)
@Measurement(iterations = 25)
public class SelectBenchmark
{
  @Param({"1"})
  private int numSegments;

  @Param({"25000"})
  private int rowsPerSegment;

  @Param({"basic.A"})
  private String schemaAndQuery;

  @Param({"1000"})
  private int pagingThreshold;

  private static final Logger log = new Logger(SelectBenchmark.class);
  private static final int RNG_SEED = 9999;
  private static final IndexMergerV9 INDEX_MERGER_V9;
  private static final IndexIO INDEX_IO;
  public static final ObjectMapper JSON_MAPPER;

  private List incIndexes;
  private List qIndexes;

  private QueryRunnerFactory factory;

  private BenchmarkSchemaInfo schemaInfo;
  private Druids.SelectQueryBuilder queryBuilder;
  private SelectQuery query;
  private File tmpDir;

  private ExecutorService executorService;

  static {
    JSON_MAPPER = new DefaultObjectMapper();
    INDEX_IO = new IndexIO(
        JSON_MAPPER,
        new ColumnConfig()
        {
          @Override
          public int columnCacheSizeBytes()
          {
            return 0;
          }
        }
    );
    INDEX_MERGER_V9 = new IndexMergerV9(JSON_MAPPER, INDEX_IO);
  }

  private static final Map> SCHEMA_QUERY_MAP = new LinkedHashMap<>();

  private void setupQueries()
  {
    // queries for the basic schema
    Map basicQueries = new LinkedHashMap<>();
    BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");

    { // basic.A
      QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));

      Druids.SelectQueryBuilder queryBuilderA =
          Druids.newSelectQueryBuilder()
                .dataSource(new TableDataSource("blah"))
                .dimensionSpecs(DefaultDimensionSpec.toSpec(Arrays.asList()))
                .metrics(Arrays.asList())
                .intervals(intervalSpec)
                .granularity(Granularities.ALL)
                .descending(false);

      basicQueries.put("A", queryBuilderA);
    }

    SCHEMA_QUERY_MAP.put("basic", basicQueries);
  }

  @Setup
  public void setup() throws IOException
  {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());

    if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
      ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
    }

    executorService = Execs.multiThreaded(numSegments, "SelectThreadPool");

    setupQueries();

    String[] schemaQuery = schemaAndQuery.split("\\.");
    String schemaName = schemaQuery[0];
    String queryName = schemaQuery[1];

    schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schemaName);
    queryBuilder = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
    queryBuilder.pagingSpec(PagingSpec.newSpec(pagingThreshold));
    query = queryBuilder.build();

    incIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
      BenchmarkDataGenerator gen = new BenchmarkDataGenerator(
          schemaInfo.getColumnSchemas(),
          RNG_SEED + i,
          schemaInfo.getDataInterval(),
          rowsPerSegment
      );

      IncrementalIndex incIndex = makeIncIndex();

      for (int j = 0; j < rowsPerSegment; j++) {
        InputRow row = gen.nextRow();
        if (j % 10000 == 0) {
          log.info(j + " rows generated.");
        }
        incIndex.add(row);
      }
      incIndexes.add(incIndex);
    }

    tmpDir = Files.createTempDir();
    log.info("Using temp dir: " + tmpDir.getAbsolutePath());

    qIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
      File indexFile = INDEX_MERGER_V9.persist(
          incIndexes.get(i),
          tmpDir,
          new IndexSpec()
      );
      QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
      qIndexes.add(qIndex);
    }

    final Supplier selectConfigSupplier = Suppliers.ofInstance(new SelectQueryConfig(true));

    factory = new SelectQueryRunnerFactory(
        new SelectQueryQueryToolChest(
            JSON_MAPPER,
            QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator(),
            selectConfigSupplier
        ),
        new SelectQueryEngine(selectConfigSupplier),
        QueryBenchmarkUtil.NOOP_QUERYWATCHER
    );
  }

  @TearDown
  public void tearDown() throws IOException
  {
    FileUtils.deleteDirectory(tmpDir);
  }

  private IncrementalIndex makeIncIndex()
  {
    return new OnheapIncrementalIndex(
        new IncrementalIndexSchema.Builder()
            .withQueryGranularity(Granularities.NONE)
            .withMetrics(schemaInfo.getAggsArray())
            .withDimensionsSpec(new DimensionsSpec(null, null, null))
            .build(),
        true,
        false,
        true,
        rowsPerSegment
    );
  }

  private static  List runQuery(QueryRunnerFactory factory, QueryRunner runner, Query query)
  {

    QueryToolChest toolChest = factory.getToolchest();
    QueryRunner theRunner = new FinalizeResultsQueryRunner<>(
        toolChest.mergeResults(toolChest.preMergeQueryDecoration(runner)),
        toolChest
    );

    Sequence queryResult = theRunner.run(query, Maps.newHashMap());
    return Sequences.toList(queryResult, Lists.newArrayList());
  }

  // don't run this benchmark with a query that doesn't use QueryGranularities.ALL,
  // this pagination function probably doesn't work correctly in that case.
  private SelectQuery incrementQueryPagination(SelectQuery query, SelectResultValue prevResult)
  {
    Map pagingIdentifiers = prevResult.getPagingIdentifiers();
    Map newPagingIdentifers = new HashMap<>();

    for (String segmentId : pagingIdentifiers.keySet()) {
      int newOffset = pagingIdentifiers.get(segmentId) + 1;
      newPagingIdentifers.put(segmentId, newOffset);
    }

    return query.withPagingSpec(new PagingSpec(newPagingIdentifers, pagingThreshold));
  }

  @Benchmark
  @BenchmarkMode(Mode.AverageTime)
  @OutputTimeUnit(TimeUnit.MICROSECONDS)
  public void queryIncrementalIndex(Blackhole blackhole) throws Exception
  {
    SelectQuery queryCopy = query.withPagingSpec(PagingSpec.newSpec(pagingThreshold));

    String segmentId = "incIndex";
    QueryRunner runner = QueryBenchmarkUtil.makeQueryRunner(
        factory,
        segmentId,
        new IncrementalIndexSegment(incIndexes.get(0), segmentId)
    );

    boolean done = false;
    while (!done) {
      List> results = SelectBenchmark.runQuery(factory, runner, queryCopy);
      SelectResultValue result = results.get(0).getValue();
      if (result.getEvents().size() == 0) {
        done = true;
      } else {
        for (EventHolder eh : result.getEvents()) {
          blackhole.consume(eh);
        }
        queryCopy = incrementQueryPagination(queryCopy, result);
      }
    }
  }


  @Benchmark
  @BenchmarkMode(Mode.AverageTime)
  @OutputTimeUnit(TimeUnit.MICROSECONDS)
  public void queryQueryableIndex(Blackhole blackhole) throws Exception
  {
    SelectQuery queryCopy = query.withPagingSpec(PagingSpec.newSpec(pagingThreshold));

    String segmentId = "qIndex";
    QueryRunner> runner = QueryBenchmarkUtil.makeQueryRunner(
        factory,
        segmentId,
        new QueryableIndexSegment(segmentId, qIndexes.get(0))
    );

    boolean done = false;
    while (!done) {
      List> results = SelectBenchmark.runQuery(factory, runner, queryCopy);
      SelectResultValue result = results.get(0).getValue();
      if (result.getEvents().size() == 0) {
        done = true;
      } else {
        for (EventHolder eh : result.getEvents()) {
          blackhole.consume(eh);
        }
        queryCopy = incrementQueryPagination(queryCopy, result);
      }
    }
  }


  @Benchmark
  @BenchmarkMode(Mode.AverageTime)
  @OutputTimeUnit(TimeUnit.MICROSECONDS)
  public void queryMultiQueryableIndex(Blackhole blackhole) throws Exception
  {
    SelectQuery queryCopy = query.withPagingSpec(PagingSpec.newSpec(pagingThreshold));

    String segmentName;
    List>> singleSegmentRunners = Lists.newArrayList();
    QueryToolChest toolChest = factory.getToolchest();
    for (int i = 0; i < numSegments; i++) {
      segmentName = "qIndex" + i;
      QueryRunner> runner = QueryBenchmarkUtil.makeQueryRunner(
          factory,
          segmentName,
          new QueryableIndexSegment(segmentName, qIndexes.get(i))
      );
      singleSegmentRunners.add(toolChest.preMergeQueryDecoration(runner));
    }

    QueryRunner theRunner = toolChest.postMergeQueryDecoration(
        new FinalizeResultsQueryRunner<>(
            toolChest.mergeResults(factory.mergeRunners(executorService, singleSegmentRunners)),
            toolChest
        )
    );


    boolean done = false;
    while (!done) {
      Sequence> queryResult = theRunner.run(queryCopy, Maps.newHashMap());
      List> results = Sequences.toList(queryResult, Lists.>newArrayList());
      
      SelectResultValue result = results.get(0).getValue();

      if (result.getEvents().size() == 0) {
        done = true;
      } else {
        for (EventHolder eh : result.getEvents()) {
          blackhole.consume(eh);
        }
        queryCopy = incrementQueryPagination(queryCopy, result);
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy