All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pinot.perf.BenchmarkRangeIndex Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pinot.perf;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.function.DoubleSupplier;
import java.util.function.LongSupplier;
import java.util.stream.IntStream;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.segment.local.segment.creator.impl.inv.BitSlicedRangeIndexCreator;
import org.apache.pinot.segment.local.segment.creator.impl.inv.RangeIndexCreator;
import org.apache.pinot.segment.local.segment.index.readers.BitSlicedRangeIndexReader;
import org.apache.pinot.segment.local.segment.index.readers.RangeIndexReaderImpl;
import org.apache.pinot.segment.spi.ColumnMetadata;
import org.apache.pinot.segment.spi.index.creator.RawValueBasedInvertedIndexCreator;
import org.apache.pinot.segment.spi.index.metadata.ColumnMetadataImpl;
import org.apache.pinot.segment.spi.index.reader.RangeIndexReader;
import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.roaringbitmap.IntIterator;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.roaringbitmap.buffer.MutableRoaringBitmap;

import static org.apache.pinot.segment.spi.V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION;


public class BenchmarkRangeIndex {

  private static final String COLUMN_NAME = "col";

  @State(Scope.Benchmark)
  public static class BaseState {
    @Param({"INT", "LONG", "FLOAT", "DOUBLE"})
    protected FieldSpec.DataType _dataType;

    @Param({
        "NORMAL(0,1)", "NORMAL(10000000,1000)",
        "EXP(0.0001)", "EXP(0.5)",
        "UNIFORM(0,100000000000)", "UNIFORM(100000000000, 100000000100)",
        "POWER(0,1000000,3)", "POWER(0,1000000000,1)"
    })
    protected String _scenario;

    @Param({"1000000", "10000000", "100000000"})
    protected int _numDocs;

    @Param("42")
    long _seed;

    protected FieldSpec _fieldSpec;

    protected File _indexDir;

    protected Object _values;

    public void setup()
        throws IOException {
      _fieldSpec = new DimensionFieldSpec(COLUMN_NAME, _dataType, true);
      _indexDir = new File(FileUtils.getTempDirectory(), "BenchmarkRangeIndex");
      FileUtils.forceMkdir(_indexDir);
      switch (_dataType) {
        case INT: {
          LongSupplier supplier = Distribution.createLongSupplier(_seed, _scenario);
          int[] values = new int[_numDocs];
          for (int i = 0; i < values.length; i++) {
            values[i] = (int) supplier.getAsLong();
          }
          _values = values;
          break;
        }
        case LONG: {
          LongSupplier supplier = Distribution.createLongSupplier(_seed, _scenario);
          long[] values = new long[_numDocs];
          for (int i = 0; i < values.length; i++) {
            values[i] = supplier.getAsLong();
          }
          _values = values;
          break;
        }
        case FLOAT: {
          DoubleSupplier supplier = Distribution.createDoubleSupplier(_seed, _scenario);
          float[] values = new float[_numDocs];
          for (int i = 0; i < values.length; i++) {
            values[i] = (float) supplier.getAsDouble();
          }
          _values = values;
          break;
        }
        case DOUBLE: {
          DoubleSupplier supplier = Distribution.createDoubleSupplier(_seed, _scenario);
          double[] values = new double[_numDocs];
          for (int i = 0; i < values.length; i++) {
            values[i] = supplier.getAsDouble();
          }
          _values = values;
          break;
        }
        default:
          throw new RuntimeException("impossible");
      }
    }

    @TearDown(Level.Trial)
    public void tearDown()
        throws IOException {
      FileUtils.deleteQuietly(_indexDir);
    }

    protected Comparable max() {
      if (_values instanceof int[]) {
        return Arrays.stream((int[]) _values).max().orElse(0);
      }
      if (_values instanceof long[]) {
        return Arrays.stream((long[]) _values).max().orElse(0);
      }
      if (_values instanceof double[]) {
        return Arrays.stream((double[]) _values).max().orElse(0);
      }
      if (_values instanceof float[]) {
        return ((float[]) _values)[IntStream.range(0, _numDocs)
            .reduce(0, (i, j) -> ((float[]) _values)[i] >= ((float[]) _values)[j] ? i : j)];
      }
      return null;
    }

    protected Comparable min() {
      if (_values instanceof int[]) {
        return Arrays.stream((int[]) _values).min().orElse(0);
      }
      if (_values instanceof long[]) {
        return Arrays.stream((long[]) _values).min().orElse(0);
      }
      if (_values instanceof double[]) {
        return Arrays.stream((double[]) _values).min().orElse(0);
      }
      if (_values instanceof float[]) {
        return ((float[]) _values)[IntStream.range(0, _numDocs)
            .reduce(0, (i, j) -> ((float[]) _values)[i] < ((float[]) _values)[j] ? i : j)];
      }
      return null;
    }
  }

  @State(Scope.Benchmark)
  public static class RangeIndexV1CreatorState extends BaseState {

    RangeIndexCreator _creator;

    @Setup(Level.Iteration)
    public void setup()
        throws IOException {
      super.setup();
      _creator = new RangeIndexCreator(_indexDir, _fieldSpec, _dataType, -1, -1, _numDocs, _numDocs);
    }
  }

  @State(Scope.Benchmark)
  public static class RangeIndexV2CreatorState extends BaseState {

    BitSlicedRangeIndexCreator _creator;

    @Setup(Level.Iteration)
    public void setup()
        throws IOException {
      super.setup();
      _creator = new BitSlicedRangeIndexCreator(_indexDir, _fieldSpec, min(), max());
    }
  }

  @State(Scope.Benchmark)
  public static abstract class QueryState extends BaseState {
    @Param({"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"})
    int _decile;

    Object _deciles;

    PinotDataBuffer _buffer;

    @Setup(Level.Iteration)
    public void setup()
        throws IOException {
      super.setup();
      try (RawValueBasedInvertedIndexCreator creator = newCreator()) {
        addValues(creator, _dataType, _values);
      }
      _buffer = PinotDataBuffer.mapReadOnlyBigEndianFile(
          new File(_indexDir, COLUMN_NAME + BITMAP_RANGE_INDEX_FILE_EXTENSION));
      computeDeciles();
    }

    @TearDown(Level.Trial)
    public void tearDown()
        throws IOException {
      _buffer.close();
      super.tearDown();
    }

    protected abstract RawValueBasedInvertedIndexCreator newCreator()
        throws IOException;

    private void computeDeciles() {
      switch (_dataType) {
        case INT: {
          int[] deciles = new int[11];
          int[] sorted = Arrays.copyOf((int[]) _values, _numDocs);
          Arrays.sort(sorted);
          for (int i = 0, d = 0; i < sorted.length; i += (sorted.length / 10)) {
            deciles[d++] = sorted[i];
          }
          deciles[10] = sorted[_numDocs - 1];
          _deciles = deciles;
          break;
        }
        case LONG: {
          long[] deciles = new long[11];
          long[] sorted = Arrays.copyOf((long[]) _values, _numDocs);
          Arrays.sort(sorted);
          for (int i = 0, d = 0; i < sorted.length; i += (sorted.length / 10)) {
            deciles[d++] = sorted[i];
          }
          deciles[10] = sorted[_numDocs - 1];
          _deciles = deciles;
          break;
        }
        case FLOAT: {
          float[] deciles = new float[11];
          float[] sorted = Arrays.copyOf((float[]) _values, _numDocs);
          Arrays.sort(sorted);
          for (int i = 0, d = 0; i < sorted.length; i += (sorted.length / 10)) {
            deciles[d++] = sorted[i];
          }
          deciles[10] = sorted[_numDocs - 1];
          _deciles = deciles;
          break;
        }
        case DOUBLE: {
          double[] deciles = new double[11];
          double[] sorted = Arrays.copyOf((double[]) _values, _numDocs);
          Arrays.sort(sorted);
          for (int i = 0, d = 0; i < sorted.length; i += (sorted.length / 10)) {
            deciles[d++] = sorted[i];
          }
          deciles[10] = sorted[_numDocs - 1];
          _deciles = deciles;
          break;
        }
        default:
          throw new RuntimeException("impossible");
      }
    }
  }

  @State(Scope.Benchmark)
  public static class RangeIndexV1State extends QueryState {

    RangeIndexReader _reader;

    @Setup(Level.Trial)
    public void setup()
        throws IOException {
      super.setup();
      _reader = new RangeIndexReaderImpl(_buffer);
    }

    @Override
    protected RawValueBasedInvertedIndexCreator newCreator()
        throws IOException {
      return new RangeIndexCreator(_indexDir, _fieldSpec, _dataType, -1, -1, _numDocs, _numDocs);
    }
  }

  @State(Scope.Benchmark)
  public static class RangeIndexV2State extends QueryState {

    RangeIndexReader _reader;

    @Setup(Level.Trial)
    public void setup()
        throws IOException {
      super.setup();
      _reader = new BitSlicedRangeIndexReader(_buffer, metadata());
    }

    private ColumnMetadata metadata() {
      return new ColumnMetadataImpl.Builder()
          .setFieldSpec(_fieldSpec)
          .setTotalDocs(_numDocs)
          .setHasDictionary(false)
          .setMaxValue(max())
          .setMinValue(min())
          .build();
    }

    @Override
    protected RawValueBasedInvertedIndexCreator newCreator() {
      return new BitSlicedRangeIndexCreator(_indexDir, _fieldSpec, min(), max());
    }
  }

  @Benchmark
  @BenchmarkMode(Mode.SingleShotTime)
  public void createV1(RangeIndexV1CreatorState state)
      throws IOException {
    try (RangeIndexCreator creator = state._creator) {
      addValues(creator, state._dataType, state._values);
    }
  }

  @Benchmark
  @BenchmarkMode(Mode.SingleShotTime)
  public void createV2(RangeIndexV2CreatorState state)
      throws IOException {
    try (BitSlicedRangeIndexCreator creator = state._creator) {
      addValues(creator, state._dataType, state._values);
    }
  }

  @Benchmark
  @BenchmarkMode(Mode.AverageTime)
  public ImmutableRoaringBitmap queryV1(RangeIndexV1State state) {
    return query(state._reader, state._dataType, state._decile, state._deciles, state._values);
  }

  @Benchmark
  @BenchmarkMode(Mode.AverageTime)
  public ImmutableRoaringBitmap queryV2(RangeIndexV2State state) {
    return query(state._reader, state._dataType, state._decile, state._deciles, state._values);
  }

  private static ImmutableRoaringBitmap query(RangeIndexReader reader,
      FieldSpec.DataType dataType, int decile, Object deciles, Object values) {
    switch (dataType) {
      case INT: {
        int[] ints = (int[]) deciles;
        ImmutableRoaringBitmap matching = reader.getMatchingDocIds(ints[decile], ints[decile + 1]);
        ImmutableRoaringBitmap partial = reader.getPartiallyMatchingDocIds(ints[decile], ints[decile + 1]);
        // emulate SVScanDocIdIterator without needing to set it up
        if (partial != null) {
          int[] intValues = (int[]) values;
          int min = ints[decile];
          int max = ints[decile + 1];
          MutableRoaringBitmap result = new MutableRoaringBitmap();
          IntIterator docIdIterator = partial.getIntIterator();
          while (docIdIterator.hasNext()) {
            int next = docIdIterator.next();
            if (intValues[next] >= min && intValues[next] <= max) {
              result.add(next);
            }
          }
          if (matching != null) {
            result.or(matching);
          }
          return result;
        }
        return matching;
      }
      case LONG: {
        long[] longs = (long[]) deciles;
        ImmutableRoaringBitmap matching = reader.getMatchingDocIds(longs[decile], longs[decile + 1]);
        ImmutableRoaringBitmap partial = reader.getPartiallyMatchingDocIds(longs[decile], longs[decile + 1]);
        // emulate SVScanDocIdIterator without needing to set it up
        if (partial != null) {
          long[] longValues = (long[]) values;
          long min = longs[decile];
          long max = longs[decile + 1];
          MutableRoaringBitmap result = new MutableRoaringBitmap();
          IntIterator docIdIterator = partial.getIntIterator();
          while (docIdIterator.hasNext()) {
            int next = docIdIterator.next();
            if (longValues[next] >= min && longValues[next] <= max) {
              result.add(next);
            }
          }
          if (matching != null) {
            result.or(matching);
          }
          return result;
        }
        return matching;
      }
      case FLOAT: {
        float[] floats = (float[]) deciles;
        ImmutableRoaringBitmap matching = reader.getMatchingDocIds(floats[decile], floats[decile + 1]);
        ImmutableRoaringBitmap partial = reader.getPartiallyMatchingDocIds(floats[decile], floats[decile + 1]);
        // emulate SVScanDocIdIterator without needing to set it up
        if (partial != null) {
          float[] floatValues = (float[]) values;
          float min = floats[decile];
          float max = floats[decile + 1];
          MutableRoaringBitmap result = new MutableRoaringBitmap();
          IntIterator docIdIterator = partial.getIntIterator();
          while (docIdIterator.hasNext()) {
            int next = docIdIterator.next();
            if (floatValues[next] >= min && floatValues[next] <= max) {
              result.add(next);
            }
          }
          if (matching != null) {
            result.or(matching);
          }
          return result;
        }
        return matching;
      }
      case DOUBLE: {
        double[] doubles = (double[]) deciles;
        ImmutableRoaringBitmap matching = reader.getMatchingDocIds(doubles[decile], doubles[decile + 1]);
        ImmutableRoaringBitmap partial = reader.getPartiallyMatchingDocIds(doubles[decile], doubles[decile + 1]);
        // emulate SVScanDocIdIterator without needing to set it up
        if (partial != null) {
          double[] doubleValues = (double[]) values;
          double min = doubles[decile];
          double max = doubles[decile + 1];
          MutableRoaringBitmap result = new MutableRoaringBitmap();
          IntIterator docIdIterator = partial.getIntIterator();
          while (docIdIterator.hasNext()) {
            int next = docIdIterator.next();
            if (doubleValues[next] >= min && doubleValues[next] <= max) {
              result.add(next);
            }
          }
          if (matching != null) {
            result.or(matching);
          }
          return result;
        }
        return matching;
      }
      default:
        throw new RuntimeException("impossible");
    }
  }

  private static void addValues(RawValueBasedInvertedIndexCreator creator, FieldSpec.DataType dataType, Object values)
      throws IOException {
    switch (dataType) {
      case INT:
        for (int value : (int[]) values) {
          creator.add(value);
        }
        break;
      case LONG:
        for (long value : (long[]) values) {
          creator.add(value);
        }
        break;
      case FLOAT:
        for (float value : (float[]) values) {
          creator.add(value);
        }
        break;
      case DOUBLE:
        for (double value : (double[]) values) {
          creator.add(value);
        }
        break;
      default:
        throw new RuntimeException("won't happen");
    }
    creator.seal();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy