org.apache.hudi.source.stats.ColumnStatsIndices Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.source.stats;

import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.collection.Tuple3;
import org.apache.hudi.common.util.hash.ColumnIndexID;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.metadata.HoodieMetadataPayload;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.util.AvroSchemaConverter;
import org.apache.hudi.util.AvroToRowDataConverters;
import org.apache.hudi.util.RowDataProjection;

import org.apache.avro.generic.GenericRecord;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.StringData;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.hudi.common.util.ValidationUtils.checkState;

/**
 * Utilities for abstracting away heavy-lifting of interactions with Metadata Table's Column Stats Index,
 * providing convenient interfaces to read it, transpose, etc.
 */
public class ColumnStatsIndices {
  private static final DataType METADATA_DATA_TYPE = getMetadataDataType();
  private static final DataType COL_STATS_DATA_TYPE = getColStatsDataType();
  private static final int[] COL_STATS_TARGET_POS = getColStatsTargetPos();

  // the column schema:
  // |- file_name: string
  // |- min_val: row
  // |- max_val: row
  // |- null_cnt: long
  // |- val_cnt: long
  // |- column_name: string
  private static final int ORD_FILE_NAME = 0;
  private static final int ORD_MIN_VAL = 1;
  private static final int ORD_MAX_VAL = 2;
  private static final int ORD_NULL_CNT = 3;
  private static final int ORD_VAL_CNT = 4;
  private static final int ORD_COL_NAME = 5;

  private ColumnStatsIndices() {
  }

  public static List readColumnStatsIndex(String basePath, HoodieMetadataConfig metadataConfig, String[] targetColumns) {
    // NOTE: If specific columns have been provided, we can considerably trim down amount of data fetched
    //       by only fetching Column Stats Index records pertaining to the requested columns.
    //       Otherwise, we fall back to read whole Column Stats Index
    ValidationUtils.checkArgument(targetColumns.length > 0,
        "Column stats is only valid when push down filters have referenced columns");
    final List metadataRows = readColumnStatsIndexByColumns(basePath, targetColumns, metadataConfig);
    return projectNestedColStatsColumns(metadataRows);
  }

  private static List projectNestedColStatsColumns(List rows) {
    int pos = HoodieMetadataRecord.SCHEMA$.getField(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).pos();
    RowDataProjection projection = RowDataProjection.instanceV2((RowType) COL_STATS_DATA_TYPE.getLogicalType(), COL_STATS_TARGET_POS);
    return rows.stream().parallel()
        .map(row -> {
          RowData columnStatsField = row.getRow(pos, 9);
          return projection.project(columnStatsField);
        }).collect(Collectors.toList());
  }

  /**
   * Transposes and converts the raw table format of the Column Stats Index representation,
   * where each row/record corresponds to individual (column, file) pair, into the table format
   * where each row corresponds to single file with statistic for individual columns collated
   * w/in such row:
   * 
   * Metadata Table Column Stats Index format:
   *
   * 
   *  +---------------------------+------------+------------+------------+-------------+
   *  |        fileName           | columnName |  minValue  |  maxValue  |  num_nulls  |
   *  +---------------------------+------------+------------+------------+-------------+
   *  | one_base_file.parquet     |          A |          1 |         10 |           0 |
   *  | another_base_file.parquet |          A |        -10 |          0 |           5 |
   *  +---------------------------+------------+------------+------------+-------------+
   * 
   * 
   * Returned table format
   *
   * 
   *  +---------------------------+------------+------------+-------------+
   *  |          file             | A_minValue | A_maxValue | A_nullCount |
   *  +---------------------------+------------+------------+-------------+
   *  | one_base_file.parquet     |          1 |         10 |           0 |
   *  | another_base_file.parquet |        -10 |          0 |           5 |
   *  +---------------------------+------------+------------+-------------+
   * 
   * 
   * NOTE: Column Stats Index might potentially contain statistics for many columns (if not all), while
   * query at hand might only be referencing a handful of those. As such, we collect all the
   * column references from the filtering expressions, and only transpose records corresponding to the
   * columns referenced in those
   *
   * @param colStats     RowData list bearing raw Column Stats Index table
   * @param queryColumns target columns to be included into the final table
   * @param tableSchema  schema of the source data table
   * @return reshaped table according to the format outlined above
   */
  public static Pair, String[]> transposeColumnStatsIndex(List colStats, String[] queryColumns, RowType tableSchema) {

    Map tableFieldTypeMap = tableSchema.getFields().stream()
        .collect(Collectors.toMap(RowType.RowField::getName, RowType.RowField::getType));

    // NOTE: We have to collect list of indexed columns to make sure we properly align the rows
    //       w/in the transposed dataset: since some files might not have all the columns indexed
    //       either due to the Column Stats Index config changes, schema evolution, etc. we have
    //       to make sure that all the rows w/in transposed data-frame are properly padded (with null
    //       values) for such file-column combinations
    Set indexedColumns = colStats.stream().map(row -> row.getString(ORD_COL_NAME)
        .toString()).collect(Collectors.toSet());

    // NOTE: We're sorting the columns to make sure final index schema matches layout
    //       of the transposed table
    TreeSet sortedTargetColumns = Arrays.stream(queryColumns).sorted()
        .filter(indexedColumns::contains)
        .collect(Collectors.toCollection(TreeSet::new));

    final Map converters = new ConcurrentHashMap<>();
    Map> fileNameToRows = colStats.stream().parallel()
        .filter(row -> sortedTargetColumns.contains(row.getString(ORD_COL_NAME).toString()))
        .map(row -> {
          if (row.isNullAt(ORD_MIN_VAL) && row.isNullAt(ORD_MAX_VAL)) {
            // Corresponding row could be null in either of the 2 cases
            //    - Column contains only null values (in that case both min/max have to be nulls)
            //    - This is a stubbed Column Stats record (used as a tombstone)
            return row;
          } else {
            String colName = row.getString(ORD_COL_NAME).toString();
            LogicalType colType = tableFieldTypeMap.get(colName);
            return unpackMinMaxVal(row, colType, converters);
          }
        }).collect(Collectors.groupingBy(rowData -> rowData.getString(ORD_FILE_NAME)));

    return Pair.of(foldRowsByFiles(sortedTargetColumns, fileNameToRows), sortedTargetColumns.toArray(new String[0]));
  }

  private static List foldRowsByFiles(
      TreeSet sortedTargetColumns,
      Map> fileNameToRows) {
    return fileNameToRows.values().stream().parallel().map(rows -> {
      // Rows seq is always non-empty (otherwise it won't be grouped into)
      StringData fileName = rows.get(0).getString(ORD_FILE_NAME);
      long valueCount = rows.get(0).getLong(ORD_VAL_CNT);

      // To properly align individual rows (corresponding to a file) w/in the transposed projection, we need
      // to align existing column-stats for individual file with the list of expected ones for the
      // whole transposed projection (a superset of all files)
      Map columnRowsMap = rows.stream()
          .collect(Collectors.toMap(row -> row.getString(ORD_COL_NAME).toString(), row -> row));
      SortedMap alignedColumnRowsMap = new TreeMap<>();
      sortedTargetColumns.forEach(col -> alignedColumnRowsMap.put(col, columnRowsMap.get(col)));

      List columnStats = alignedColumnRowsMap.values().stream().map(row -> {
        if (row == null) {
          // NOTE: Since we're assuming missing column to essentially contain exclusively
          //       null values, we set null-count to be equal to value-count (this behavior is
          //       consistent with reading non-existent columns from Parquet)
          return Tuple3.of(null, null, valueCount);
        } else {
          GenericRowData gr = (GenericRowData) row;
          return Tuple3.of(gr.getField(ORD_MIN_VAL), gr.getField(ORD_MAX_VAL), gr.getField(ORD_NULL_CNT));
        }
      }).collect(Collectors.toList());
      GenericRowData foldedRow = new GenericRowData(2 + 3 * columnStats.size());
      foldedRow.setField(0, fileName);
      foldedRow.setField(1, valueCount);
      for (int i = 0; i < columnStats.size(); i++) {
        Tuple3 stats = columnStats.get(i);
        int startPos = 2 + 3 * i;
        foldedRow.setField(startPos, stats.f0);
        foldedRow.setField(startPos + 1, stats.f1);
        foldedRow.setField(startPos + 2, stats.f2);
      }
      return foldedRow;
    }).collect(Collectors.toList());
  }

  private static RowData unpackMinMaxVal(
      RowData row,
      LogicalType colType,
      Map converters) {

    RowData minValueStruct = row.getRow(ORD_MIN_VAL, 1);
    RowData maxValueStruct = row.getRow(ORD_MAX_VAL, 1);

    checkState(minValueStruct != null && maxValueStruct != null,
        "Invalid Column Stats record: either both min/max have to be null, or both have to be non-null");

    Object minValue = tryUnpackNonNullVal(minValueStruct, colType, converters);
    Object maxValue = tryUnpackNonNullVal(maxValueStruct, colType, converters);

    // the column schema:
    // |- file_name: string
    // |- min_val: row
    // |- max_val: row
    // |- null_cnt: long
    // |- val_cnt: long
    // |- column_name: string

    GenericRowData unpackedRow = new GenericRowData(row.getArity());
    unpackedRow.setField(0, row.getString(0));
    unpackedRow.setField(1, minValue);
    unpackedRow.setField(2, maxValue);
    unpackedRow.setField(3, row.getLong(3));
    unpackedRow.setField(4, row.getLong(4));
    unpackedRow.setField(5, row.getString(5));

    return unpackedRow;
  }

  private static Object tryUnpackNonNullVal(
      RowData rowData,
      LogicalType colType,
      Map converters) {
    for (int i = 0; i < rowData.getArity(); i++) {
      // row data converted from avro is definitely generic.
      Object nested = ((GenericRowData) rowData).getField(i);
      if (nested != null) {
        return doUnpack(nested, colType, converters);
      }
    }
    return null;
  }

  private static Object doUnpack(
      Object rawVal,
      LogicalType logicalType,
      Map converters) {
    AvroToRowDataConverters.AvroToRowDataConverter converter =
        converters.computeIfAbsent(logicalType, k -> AvroToRowDataConverters.createConverter(logicalType));
    return converter.convert(rawVal);
  }

  private static List readColumnStatsIndexByColumns(
      String basePath,
      String[] targetColumns,
      HoodieMetadataConfig metadataConfig) {

    // Read Metadata Table's Column Stats Index into Flink's RowData list by
    //    - Fetching the records from CSI by key-prefixes (encoded column names)
    //    - Deserializing fetched records into [[RowData]]s
    HoodieTableMetadata metadataTable = HoodieTableMetadata.create(
        HoodieFlinkEngineContext.DEFAULT,
        metadataConfig, basePath);

    // TODO encoding should be done internally w/in HoodieBackedTableMetadata
    List encodedTargetColumnNames = Arrays.stream(targetColumns)
        .map(colName -> new ColumnIndexID(colName).asBase64EncodedString()).collect(Collectors.toList());

    HoodieData> records =
        metadataTable.getRecordsByKeyPrefixes(encodedTargetColumnNames, HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS, false);

    org.apache.hudi.util.AvroToRowDataConverters.AvroToRowDataConverter converter =
        AvroToRowDataConverters.createRowConverter((RowType) METADATA_DATA_TYPE.getLogicalType());
    return records.collectAsList().stream().parallel().map(record -> {
          // schema and props are ignored for generating metadata record from the payload
          // instead, the underlying file system, or bloom filter, or columns stats metadata (part of payload) are directly used
          GenericRecord genericRecord;
          try {
            genericRecord = (GenericRecord) record.getData().getInsertValue(null, null).orElse(null);
          } catch (IOException e) {
            throw new HoodieException("Exception while getting insert value from metadata payload");
          }
          return (RowData) converter.convert(genericRecord);
        }
    ).collect(Collectors.toList());
  }

  // -------------------------------------------------------------------------
  //  Utilities
  // -------------------------------------------------------------------------
  private static DataType getMetadataDataType() {
    return AvroSchemaConverter.convertToDataType(HoodieMetadataRecord.SCHEMA$);
  }

  private static DataType getColStatsDataType() {
    int pos = HoodieMetadataRecord.SCHEMA$.getField(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).pos();
    return METADATA_DATA_TYPE.getChildren().get(pos);
  }

  // the column schema:
  // |- file_name: string
  // |- min_val: row
  // |- max_val: row
  // |- null_cnt: long
  // |- val_cnt: long
  // |- column_name: string
  private static int[] getColStatsTargetPos() {
    RowType colStatsRowType = (RowType) COL_STATS_DATA_TYPE.getLogicalType();
    return Stream.of(
            HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME)
        .mapToInt(colStatsRowType::getFieldIndex)
        .toArray();
  }
}