All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.segment.QueryableIndexIndexableAdapter Maven / Gradle / Ivy

There is a newer version: 30.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.segment;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.column.BaseColumn;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnFormat;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.data.BitmapValues;
import org.apache.druid.segment.data.CloseableIndexed;
import org.apache.druid.segment.data.ImmutableBitmapValues;
import org.apache.druid.segment.data.IndexedIterable;
import org.apache.druid.segment.index.semantic.DictionaryEncodedValueIndex;
import org.apache.druid.segment.nested.NestedCommonFormatColumn;
import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
import org.apache.druid.segment.nested.SortedValueDictionary;
import org.apache.druid.segment.selector.settable.SettableColumnValueSelector;
import org.apache.druid.segment.selector.settable.SettableLongColumnValueSelector;
import org.apache.druid.utils.CloseableUtils;
import org.joda.time.Interval;

import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/**
 *
 */
public class QueryableIndexIndexableAdapter implements IndexableAdapter
{
  private final int numRows;
  private final QueryableIndex input;
  private final ImmutableList availableDimensions;
  private final Metadata metadata;

  public QueryableIndexIndexableAdapter(QueryableIndex input)
  {
    this.input = input;
    numRows = input.getNumRows();
    availableDimensions = ImmutableList.copyOf(input.getAvailableDimensions());
    this.metadata = input.getMetadata();
  }

  public QueryableIndex getQueryableIndex()
  {
    return input;
  }

  @Override
  public Interval getDataInterval()
  {
    return input.getDataInterval();
  }

  @Override
  public int getNumRows()
  {
    return numRows;
  }

  @Override
  public List getDimensionNames()
  {
    return availableDimensions;
  }

  @Override
  public List getMetricNames()
  {
    final Set columns = Sets.newLinkedHashSet(input.getColumnNames());
    final HashSet dimensions = Sets.newHashSet(getDimensionNames());
    return ImmutableList.copyOf(Sets.difference(columns, dimensions));
  }

  @Nullable
  @Override
  public > CloseableIndexed getDimValueLookup(String dimension)
  {
    final ColumnHolder columnHolder = input.getColumnHolder(dimension);

    if (columnHolder == null) {
      return null;
    }

    final BaseColumn col = columnHolder.getColumn();

    if (!(col instanceof DictionaryEncodedColumn)) {
      // this shouldn't happen, but if it does, try to close to prevent a leak
      try {
        col.close();
      }
      catch (IOException e) {
        throw new RuntimeException(e);
      }
      return null;
    }

    @SuppressWarnings("unchecked")
    DictionaryEncodedColumn dict = (DictionaryEncodedColumn) col;

    return new CloseableIndexed()
    {

      @Override
      public int size()
      {
        return dict.getCardinality();
      }

      @Override
      public T get(int index)
      {
        return dict.lookupName(index);
      }

      @Override
      public int indexOf(T value)
      {
        return dict.lookupId(value);
      }

      @Override
      public Iterator iterator()
      {
        return IndexedIterable.create(this).iterator();
      }

      @Override
      public void inspectRuntimeShape(RuntimeShapeInspector inspector)
      {
        inspector.visit("dict", dict);
      }

      @Override
      public void close() throws IOException
      {
        dict.close();
      }
    };
  }

  @Nullable
  @Override
  public NestedColumnMergable getNestedColumnMergeables(String columnName)
  {
    final ColumnHolder columnHolder = input.getColumnHolder(columnName);

    if (columnHolder == null) {
      return null;
    }
    final ColumnFormat format = columnHolder.getColumnFormat();
    if (!(format instanceof NestedCommonFormatColumn.Format
          || format instanceof NestedDataComplexTypeSerde.NestedColumnFormatV4)) {
      return null;
    }

    final BaseColumn col = columnHolder.getColumn();
    if (col instanceof NestedCommonFormatColumn) {
      NestedCommonFormatColumn column = (NestedCommonFormatColumn) col;
      return new NestedColumnMergable(
          new SortedValueDictionary(
              column.getStringDictionary(),
              column.getLongDictionary(),
              column.getDoubleDictionary(),
              column.getArrayDictionary(),
              column
          ),
          column.getFieldTypeInfo(),
          ColumnType.NESTED_DATA.equals(column.getLogicalType()),
          false,
          null
      );
    }

    // this shouldn't happen because of the format check, but if it does try to close the column just in case
    try {
      col.close();
    }
    catch (IOException e) {
      throw new RuntimeException(e);
    }
    return null;
  }

  @Override
  public RowIteratorImpl getRows()
  {
    return new RowIteratorImpl();
  }

  /**
   * On {@link #moveToNext()} and {@link #mark()}, this class copies all column values into a set of {@link
   * SettableColumnValueSelector} instances. Alternative approach was to save only offset in column and use the same
   * column value selectors as in {@link QueryableIndexStorageAdapter}. The approach with "caching" in {@link
   * SettableColumnValueSelector}s is chosen for two reasons:
   *  1) Avoid re-reading column values from serialized format multiple times (because they are accessed multiple times)
   *     For comparison, it's not a factor for {@link QueryableIndexStorageAdapter} because during query processing,
   *     column values are usually accessed just once per offset, if aggregator or query runner are written sanely.
   *     Avoiding re-reads is especially important for object columns, because object deserialization is potentially
   *     expensive.
   *  2) {@link #mark()} is a "lookbehind" style functionality, in compressed columnar format, that would cause
   *     repetitive excessive decompressions on the block boundaries. E. g. see {@link
   *     org.apache.druid.segment.data.BlockLayoutColumnarDoublesSupplier} and similar classes. Some special support for
   *     "lookbehind" could be added to these classes, but it's significant extra complexity.
   */
  class RowIteratorImpl implements TransformableRowIterator
  {
    private final Closer closer;
    private final ColumnCache columnCache;

    private final SimpleAscendingOffset offset = new SimpleAscendingOffset(numRows);
    private final int maxValidOffset = numRows - 1;

    private final ColumnValueSelector offsetTimestampSelector;
    private final ColumnValueSelector[] offsetDimensionValueSelectors;
    private final ColumnValueSelector[] offsetMetricSelectors;

    private final SettableLongColumnValueSelector rowTimestampSelector = new SettableLongColumnValueSelector();
    private final SettableColumnValueSelector[] rowDimensionValueSelectors;
    private final SettableColumnValueSelector[] rowMetricSelectors;
    private final RowPointer rowPointer;

    private final SettableLongColumnValueSelector markedTimestampSelector = new SettableLongColumnValueSelector();
    private final SettableColumnValueSelector[] markedDimensionValueSelectors;
    private final SettableColumnValueSelector[] markedMetricSelectors;
    private final TimeAndDimsPointer markedRowPointer;

    boolean first = true;

    RowIteratorImpl()
    {
      this.closer = Closer.create();
      this.columnCache = new ColumnCache(input, closer);

      final ColumnSelectorFactory columnSelectorFactory = new QueryableIndexColumnSelectorFactory(
          VirtualColumns.EMPTY,
          false,
          offset,
          columnCache
      );

      offsetTimestampSelector = columnSelectorFactory.makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);

      final List dimensionHandlers = new ArrayList<>(input.getDimensionHandlers().values());

      offsetDimensionValueSelectors = dimensionHandlers
          .stream()
          .map(DimensionHandler::getDimensionName)
          .map(columnSelectorFactory::makeColumnValueSelector)
          .toArray(ColumnValueSelector[]::new);

      List metricNames = getMetricNames();
      offsetMetricSelectors =
          metricNames.stream().map(columnSelectorFactory::makeColumnValueSelector).toArray(ColumnValueSelector[]::new);

      rowDimensionValueSelectors = dimensionHandlers
          .stream()
          .map(DimensionHandler::makeNewSettableEncodedValueSelector)
          .toArray(SettableColumnValueSelector[]::new);
      rowMetricSelectors = metricNames
          .stream()
          .map(metric -> input.getColumnHolder(metric).makeNewSettableColumnValueSelector())
          .toArray(SettableColumnValueSelector[]::new);

      rowPointer = new RowPointer(
          rowTimestampSelector,
          rowDimensionValueSelectors,
          dimensionHandlers,
          rowMetricSelectors,
          metricNames,
          offset::getOffset
      );

      markedDimensionValueSelectors = dimensionHandlers
          .stream()
          .map(DimensionHandler::makeNewSettableEncodedValueSelector)
          .toArray(SettableColumnValueSelector[]::new);
      markedMetricSelectors = metricNames
          .stream()
          .map(metric -> input.getColumnHolder(metric).makeNewSettableColumnValueSelector())
          .toArray(SettableColumnValueSelector[]::new);
      markedRowPointer = new TimeAndDimsPointer(
          markedTimestampSelector,
          markedDimensionValueSelectors,
          dimensionHandlers,
          markedMetricSelectors,
          metricNames
      );
    }

    @Override
    public TimeAndDimsPointer getMarkedPointer()
    {
      return markedRowPointer;
    }

    /**
     * When a segment is produced using "rollup", each row is guaranteed to have different dimensions, so this method
     * could be optimized to have just "return true;" body.
     * TODO record in the segment metadata if each row has different dims or not, to be able to apply this optimization.
     */
    @Override
    public boolean hasTimeAndDimsChangedSinceMark()
    {
      return markedRowPointer.compareTo(rowPointer) != 0;
    }

    @Override
    public void close()
    {
      CloseableUtils.closeAndWrapExceptions(closer);
    }

    @Override
    public RowPointer getPointer()
    {
      return rowPointer;
    }

    @Override
    public boolean moveToNext()
    {
      if (first) {
        first = false;
        if (offset.withinBounds()) {
          setRowPointerValues();
          return true;
        } else {
          return false;
        }
      } else {
        if (offset.getOffset() < maxValidOffset) {
          offset.increment();
          setRowPointerValues();
          return true;
        } else {
          // Don't update rowPointer's values here, to conform to the RowIterator.getPointer() specification.
          return false;
        }
      }
    }

    private void setRowPointerValues()
    {
      rowTimestampSelector.setValue(offsetTimestampSelector.getLong());
      for (int i = 0; i < offsetDimensionValueSelectors.length; i++) {
        rowDimensionValueSelectors[i].setValueFrom(offsetDimensionValueSelectors[i]);
      }
      for (int i = 0; i < offsetMetricSelectors.length; i++) {
        rowMetricSelectors[i].setValueFrom(offsetMetricSelectors[i]);
      }
    }

    @Override
    public void mark()
    {
      markedTimestampSelector.setValue(rowTimestampSelector.getLong());
      for (int i = 0; i < rowDimensionValueSelectors.length; i++) {
        markedDimensionValueSelectors[i].setValueFrom(rowDimensionValueSelectors[i]);
      }
      for (int i = 0; i < rowMetricSelectors.length; i++) {
        markedMetricSelectors[i].setValueFrom(rowMetricSelectors[i]);
      }
    }
  }

  @Override
  public ColumnCapabilities getCapabilities(String column)
  {
    return input.getColumnHolder(column).getCapabilities();
  }

  @Override
  public ColumnFormat getFormat(String column)
  {
    return input.getColumnHolder(column).getColumnFormat();
  }

  @Override
  public BitmapValues getBitmapValues(String dimension, int dictId)
  {
    final ColumnHolder columnHolder = input.getColumnHolder(dimension);
    if (columnHolder == null) {
      return BitmapValues.EMPTY;
    }

    final ColumnIndexSupplier indexSupplier = columnHolder.getIndexSupplier();
    if (indexSupplier == null) {
      return BitmapValues.EMPTY;
    }
    final DictionaryEncodedValueIndex bitmaps = indexSupplier.as(DictionaryEncodedValueIndex.class);
    if (bitmaps == null) {
      return BitmapValues.EMPTY;
    }

    if (dictId >= 0) {
      return new ImmutableBitmapValues(bitmaps.getBitmap(dictId));
    } else {
      return BitmapValues.EMPTY;
    }
  }

  @Override
  public Metadata getMetadata()
  {
    return metadata;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy