All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.segment.join.table.FrameBasedIndexedTable Maven / Gradle / Ivy

There is a newer version: 30.0.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.segment.join.table;

import com.google.common.base.Preconditions;
import com.google.common.math.IntMath;
import org.apache.druid.frame.Frame;
import org.apache.druid.frame.read.FrameReader;
import org.apache.druid.frame.read.columnar.FrameColumnReader;
import org.apache.druid.frame.read.columnar.FrameColumnReaders;
import org.apache.druid.frame.segment.FrameStorageAdapter;
import org.apache.druid.frame.segment.columnar.FrameQueryableIndex;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Intervals;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.guava.Sequences;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.query.FrameBasedInlineDataSource;
import org.apache.druid.query.FrameSignaturePair;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.Cursor;
import org.apache.druid.segment.NilColumnValueSelector;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.SimpleAscendingOffset;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.BaseColumn;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

public class FrameBasedIndexedTable implements IndexedTable
{
  private static final Logger LOG = new Logger(FrameBasedIndexedTable.class);

  private final Set keyColumns;
  private final RowSignature rowSignature;
  private final String version;
  private final List keyColumnsIndexes;
  private final int numRows;
  private final List frameQueryableIndexes = new ArrayList<>();
  private final List cumulativeRowCount = new ArrayList<>();


  public FrameBasedIndexedTable(
      final FrameBasedInlineDataSource frameBasedInlineDataSource,
      final Set keyColumns,
      final String version
  )
  {
    this.keyColumns = keyColumns;
    this.version = version;
    this.rowSignature = frameBasedInlineDataSource.getRowSignature();

    int rowCount = 0;
    for (FrameSignaturePair frameSignaturePair : frameBasedInlineDataSource.getFrames()) {
      Frame frame = frameSignaturePair.getFrame();
      RowSignature frameRowSignature = frameSignaturePair.getRowSignature();
      frameQueryableIndexes.add(new FrameQueryableIndex(
          frame,
          frameRowSignature,
          createColumnReaders(frameRowSignature)
      ));
      rowCount += frame.numRows();
      cumulativeRowCount.add(rowCount);
    }

    this.numRows = rowCount;

    final ArrayList indexBuilders = new ArrayList<>(rowSignature.size());
    final List keyColumnNames = new ArrayList<>(keyColumns.size());

    for (int i = 0; i < rowSignature.size(); i++) {
      final RowBasedIndexBuilder m;
      final String columnName = rowSignature.getColumnName(i);
      if (keyColumns.contains(columnName)) {
        final ColumnType keyType =
            rowSignature.getColumnType(i).orElse(IndexedTableJoinMatcher.DEFAULT_KEY_TYPE);

        m = new RowBasedIndexBuilder(keyType);
        keyColumnNames.add(columnName);
      } else {
        m = null;
      }
      indexBuilders.add(m);
    }

    final Sequence cursors = Sequences.concat(
        frameBasedInlineDataSource
            .getFrames()
            .stream()
            .map(frameSignaturePair -> {
              Frame frame = frameSignaturePair.getFrame();
              RowSignature rowSignature = frameSignaturePair.getRowSignature();
              FrameStorageAdapter frameStorageAdapter =
                  new FrameStorageAdapter(frame, FrameReader.create(rowSignature), Intervals.ETERNITY);
              return frameStorageAdapter.makeCursors(
                                            null,
                                            Intervals.ETERNITY,
                                            VirtualColumns.EMPTY,
                                            Granularities.ALL,
                                            false,
                                            null
                                        );
            })
            .collect(Collectors.toList())
    );

    final Sequence sequence = Sequences.map(
        cursors,
        cursor -> {
          if (cursor == null) {
            return 0;
          }
          int rowNumber = 0;
          ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory();

          // this should really be optimized to use dimension selectors where possible to populate indexes from bitmap
          // indexes, but, an optimization for another day
          final List selectors = keyColumnNames
              .stream()
              .map(columnSelectorFactory::makeColumnValueSelector)
              .collect(Collectors.toList());

          while (!cursor.isDone()) {
            for (int keyColumnSelectorIndex = 0; keyColumnSelectorIndex < selectors.size(); keyColumnSelectorIndex++) {
              final String keyColumnName = keyColumnNames.get(keyColumnSelectorIndex);
              final int columnPosition = rowSignature.indexOf(keyColumnName);
              final RowBasedIndexBuilder keyColumnIndexBuilder = indexBuilders.get(columnPosition);
              keyColumnIndexBuilder.add(selectors.get(keyColumnSelectorIndex).getObject());
            }

            if (rowNumber % 100_000 == 0) {
              if (rowNumber == 0) {
                LOG.debug("Indexed first row for frame based datasource");
              } else {
                LOG.debug("Indexed row %s for frame based datasource", rowNumber);
              }
            }
            rowNumber++;
            cursor.advance();
          }
          return rowNumber;
        }
    );

    Integer totalRows = sequence.accumulate(0, (accumulated, in) -> accumulated += in);

    this.keyColumnsIndexes = indexBuilders.stream()
                                          .map(builder -> builder != null ? builder.build() : null)
                                          .collect(Collectors.toList());

    LOG.info("Created FrameBasedIndexedTable with %s rows.", totalRows);
  }

  @Override
  public String version()
  {
    return version;
  }

  @Override
  public Set keyColumns()
  {
    return keyColumns;
  }

  @Override
  public RowSignature rowSignature()
  {
    return rowSignature;
  }

  @Override
  public int numRows()
  {
    return numRows;
  }

  @Override
  public Index columnIndex(int column)
  {
    return RowBasedIndexedTable.getKeyColumnIndex(column, keyColumnsIndexes);

  }

  @Override
  public Reader columnReader(int column)
  {

    if (!rowSignature.contains(column)) {
      throw new IAE("Column[%d] is not a valid column for the frame based datasource", column);
    }

    String columnName = rowSignature.getColumnName(column);
    final SimpleAscendingOffset offset = new SimpleAscendingOffset(numRows());
    final List> columnValueSelectors = new ArrayList<>();
    final Set closeables = new HashSet<>();

    for (QueryableIndex frameQueryableIndex : frameQueryableIndexes) {
      ColumnHolder columnHolder = frameQueryableIndex.getColumnHolder(columnName);
      if (columnHolder == null) {
        columnValueSelectors.add(NilColumnValueSelector.instance());
      } else {
        BaseColumn baseColumn = columnHolder.getColumn();
        columnValueSelectors.add(baseColumn.makeColumnValueSelector(offset));
        closeables.add(baseColumn);
      }
    }

    return new Reader()
    {
      @Nullable
      @Override
      public Object read(int row)
      {
        int frameIndex = binSearch(cumulativeRowCount, row);
        if (frameIndex == frameQueryableIndexes.size()) {
          throw new IndexOutOfBoundsException(
              StringUtils.format("Requested row index [%d], Max row count [%d]", row, numRows())
          );
        }
        // The offset needs to be changed as well
        int adjustedOffset = frameIndex == 0
                             ? row
                             : IntMath.checkedSubtract(row, cumulativeRowCount.get(frameIndex - 1));
        offset.setCurrentOffset(adjustedOffset);
        return columnValueSelectors.get(frameIndex).getObject();
      }

      @Override
      public void close() throws IOException
      {
        for (Closeable c : closeables) {
          c.close();
        }
      }
    };
  }

  @Override
  public boolean isCacheable()
  {
    return false;
  }

  @Override
  public void close()
  {

  }

  @Override
  public Optional acquireReferences()
  {
    return Optional.of(
        () -> {
        }
    );
  }

  private List createColumnReaders(RowSignature rowSignature)
  {
    final List columnReaders = new ArrayList<>(rowSignature.size());

    for (int columnNumber = 0; columnNumber < rowSignature.size(); columnNumber++) {
      ColumnType columnType = Preconditions.checkNotNull(
          rowSignature.getColumnType(columnNumber).orElse(null),
          "Type for column [%s]",
          rowSignature.getColumnName(columnNumber)
      );
      columnReaders.add(FrameColumnReaders.create(rowSignature.getColumnName(columnNumber), columnNumber, columnType));
    }

    return columnReaders;
  }

  /**
   * This method finds out the frame which contains the row indexed "row" from the cumulative array
   * This is basically a binary search where we have to find the FIRST element which is STRICTLY GREATER than
   * the "row" provided
   * 

* Note: row is the index (therefore it is 0-indexed) */ private int binSearch(List arr, int row) { int start = 0; int end = arr.size(); while (start < end) { // This will be rounded down in case (start + end) is odd. Therefore middle will always be less than // end and will be equal to or greater than start int middle = start + (end - start) / 2; // If the "middle" satisfies the below predicate, then we can move the end backward because every element after // middle would be satisfying the predicate if (arr.get(middle) > row) { end = middle; } else { start = middle + 1; } } return start; // Note: at this point, end == start } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy