All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.source.stats.FileStatsIndex Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.source.stats;

import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.VisibleForTesting;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.collection.Tuple3;
import org.apache.hudi.common.util.hash.ColumnIndexID;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.metadata.HoodieMetadataPayload;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.source.prune.ColumnStatsProbe;
import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
import org.apache.hudi.util.AvroToRowDataConverters;
import org.apache.hudi.util.DataTypeUtils;
import org.apache.hudi.util.FlinkClientUtil;
import org.apache.hudi.util.RowDataProjection;

import org.apache.avro.generic.GenericRecord;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.StringData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;

import static org.apache.hudi.common.util.ValidationUtils.checkState;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.COL_STATS_DATA_TYPE;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.COL_STATS_TARGET_POS;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.METADATA_DATA_TYPE;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.ORD_COL_NAME;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.ORD_FILE_NAME;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.ORD_MAX_VAL;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.ORD_MIN_VAL;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.ORD_NULL_CNT;
import static org.apache.hudi.source.stats.ColumnStatsSchemas.ORD_VAL_CNT;

/**
 * An index support implementation that leverages Column Stats Index to prune files,
 * including utilities for abstracting away heavy-lifting of interactions with the index,
 * providing convenient interfaces to read it, transpose, etc.
 */
public class FileStatsIndex implements ColumnStatsIndex {
  private static final long serialVersionUID = 1L;
  private static final Logger LOG = LoggerFactory.getLogger(FileStatsIndex.class);
  private final RowType rowType;
  private final String basePath;
  private final HoodieMetadataConfig metadataConfig;
  private HoodieTableMetadata metadataTable;

  public FileStatsIndex(
      String basePath,
      RowType rowType,
      HoodieMetadataConfig metadataConfig) {
    this.basePath = basePath;
    this.rowType = rowType;
    this.metadataConfig = metadataConfig;
  }

  @Override
  public String getIndexPartitionName() {
    return HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS;
  }

  public HoodieTableMetadata getMetadataTable() {
    // initialize the metadata table lazily
    if (this.metadataTable == null) {
      this.metadataTable = HoodieTableMetadata.create(
          HoodieFlinkEngineContext.DEFAULT,
          new HoodieHadoopStorage(basePath, FlinkClientUtil.getHadoopConf()),
          metadataConfig,
          basePath);
    }
    return this.metadataTable;
  }

  @Override
  public Set computeCandidateFiles(ColumnStatsProbe probe, List allFiles) {
    if (probe == null) {
      return null;
    }
    try {
      String[] targetColumns = probe.getReferencedCols();
      final List statsRows = readColumnStatsIndexByColumns(targetColumns);
      return candidatesInMetadataTable(probe, statsRows, allFiles);
    } catch (Throwable t) {
      LOG.warn("Read {} for data skipping error", getIndexPartitionName(), t);
      return null;
    }
  }

  @Override
  public Set computeCandidatePartitions(ColumnStatsProbe probe, List allPartitions) {
    throw new UnsupportedOperationException("This method is not supported by " + this.getClass().getSimpleName());
  }

  /**
   * Computes pruned list of candidates' names based on provided list of data filters.
   * conditions, by leveraging Metadata Table's Column Statistics index (hereon referred as ColStats for brevity)
   * bearing "min", "max", "num_nulls" statistics for all columns.
   *
   * 

NOTE: This method has to return complete set of the candidates, since only provided candidates will * ultimately be scanned as part of query execution. Hence, this method has to maintain the * invariant of conservatively including every candidate's name, that is NOT referenced in its index. * *

The {@code filters} must all be simple. * * @param probe The column stats probe built from push-down filters. * @param indexRows The raw column stats records. * @param oriCandidates The original candidates to be pruned. * * @return set of pruned (data-skipped) candidate names */ protected Set candidatesInMetadataTable( @Nullable ColumnStatsProbe probe, List indexRows, List oriCandidates) { if (probe == null) { return null; } String[] referencedCols = probe.getReferencedCols(); final Pair, String[]> colStatsTable = transposeColumnStatsIndex(indexRows, referencedCols); List transposedColStats = colStatsTable.getLeft(); String[] queryCols = colStatsTable.getRight(); if (queryCols.length == 0) { // the indexed columns have no intersection with the referenced columns, returns early return null; } RowType.RowField[] queryFields = DataTypeUtils.projectRowFields(rowType, queryCols); Set allIndexedFiles = transposedColStats.stream().parallel() .map(row -> row.getString(0).toString()) .collect(Collectors.toSet()); Set candidateFiles = transposedColStats.stream().parallel() .filter(row -> probe.test(row, queryFields)) .map(row -> row.getString(0).toString()) .collect(Collectors.toSet()); // NOTE: Col-Stats Index isn't guaranteed to have complete set of statistics for every // base-file: since it's bound to clustering, which could occur asynchronously // at arbitrary point in time, and is not likely to be touching all the base files. // // To close that gap, we manually compute the difference b/w all indexed (by col-stats-index) // files and all outstanding base-files, and make sure that all base files not // represented w/in the index are included in the output of this method oriCandidates.removeAll(allIndexedFiles); candidateFiles.addAll(oriCandidates); return candidateFiles; } private static List projectNestedColStatsColumns(List rows) { int pos = HoodieMetadataRecord.SCHEMA$.getField(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).pos(); RowDataProjection projection = RowDataProjection.instanceV2((RowType) COL_STATS_DATA_TYPE.getLogicalType(), COL_STATS_TARGET_POS); return rows.stream().parallel() .map(row -> { RowData columnStatsField = row.getRow(pos, 9); return projection.project(columnStatsField); }).collect(Collectors.toList()); } /** * Transposes and converts the raw table format of the Column Stats Index representation, * where each row/record corresponds to individual (column, file) pair, into the table format * where each row corresponds to single file with statistic for individual columns collated * w/in such row: *

* Metadata Table Column Stats Index format: * *

   *  +---------------------------+------------+------------+------------+-------------+
   *  |        fileName           | columnName |  minValue  |  maxValue  |  num_nulls  |
   *  +---------------------------+------------+------------+------------+-------------+
   *  | one_base_file.parquet     |          A |          1 |         10 |           0 |
   *  | another_base_file.parquet |          A |        -10 |          0 |           5 |
   *  +---------------------------+------------+------------+------------+-------------+
   * 
*

* Returned table format * *

   *  +---------------------------+------------+------------+-------------+
   *  |          file             | A_minValue | A_maxValue | A_nullCount |
   *  +---------------------------+------------+------------+-------------+
   *  | one_base_file.parquet     |          1 |         10 |           0 |
   *  | another_base_file.parquet |        -10 |          0 |           5 |
   *  +---------------------------+------------+------------+-------------+
   * 
*

* NOTE: Column Stats Index might potentially contain statistics for many columns (if not all), while * query at hand might only be referencing a handful of those. As such, we collect all the * column references from the filtering expressions, and only transpose records corresponding to the * columns referenced in those * * @param colStats RowData list bearing raw Column Stats Index table * @param queryColumns target columns to be included into the final table * @return reshaped table according to the format outlined above */ @VisibleForTesting public Pair, String[]> transposeColumnStatsIndex(List colStats, String[] queryColumns) { Map tableFieldTypeMap = rowType.getFields().stream() .collect(Collectors.toMap(RowType.RowField::getName, RowType.RowField::getType)); // NOTE: We have to collect list of indexed columns to make sure we properly align the rows // w/in the transposed dataset: since some files might not have all the columns indexed // either due to the Column Stats Index config changes, schema evolution, etc. we have // to make sure that all the rows w/in transposed data-frame are properly padded (with null // values) for such file-column combinations Set indexedColumns = colStats.stream().map(row -> row.getString(ORD_COL_NAME) .toString()).collect(Collectors.toSet()); // NOTE: We're sorting the columns to make sure final index schema matches layout // of the transposed table TreeSet sortedTargetColumns = Arrays.stream(queryColumns).sorted() .filter(indexedColumns::contains) .collect(Collectors.toCollection(TreeSet::new)); final Map converters = new ConcurrentHashMap<>(); Map> fileNameToRows = colStats.stream().parallel() .filter(row -> sortedTargetColumns.contains(row.getString(ORD_COL_NAME).toString())) .map(row -> { if (row.isNullAt(ORD_MIN_VAL) && row.isNullAt(ORD_MAX_VAL)) { // Corresponding row could be null in either of the 2 cases // - Column contains only null values (in that case both min/max have to be nulls) // - This is a stubbed Column Stats record (used as a tombstone) return row; } else { String colName = row.getString(ORD_COL_NAME).toString(); LogicalType colType = tableFieldTypeMap.get(colName); return unpackMinMaxVal(row, colType, converters); } }).collect(Collectors.groupingBy(rowData -> rowData.getString(ORD_FILE_NAME))); return Pair.of(foldRowsByFiles(sortedTargetColumns, fileNameToRows), sortedTargetColumns.toArray(new String[0])); } private static List foldRowsByFiles( TreeSet sortedTargetColumns, Map> fileNameToRows) { return fileNameToRows.values().stream().parallel().map(rows -> { // Rows seq is always non-empty (otherwise it won't be grouped into) StringData fileName = rows.get(0).getString(ORD_FILE_NAME); long valueCount = rows.get(0).getLong(ORD_VAL_CNT); // To properly align individual rows (corresponding to a file) w/in the transposed projection, we need // to align existing column-stats for individual file with the list of expected ones for the // whole transposed projection (a superset of all files) Map columnRowsMap = rows.stream() .collect(Collectors.toMap(row -> row.getString(ORD_COL_NAME).toString(), row -> row)); SortedMap alignedColumnRowsMap = new TreeMap<>(); sortedTargetColumns.forEach(col -> alignedColumnRowsMap.put(col, columnRowsMap.get(col))); List columnStats = alignedColumnRowsMap.values().stream().map(row -> { if (row == null) { // NOTE: Since we're assuming missing column to essentially contain exclusively // null values, we set null-count to be equal to value-count (this behavior is // consistent with reading non-existent columns from Parquet) return Tuple3.of(null, null, valueCount); } else { GenericRowData gr = (GenericRowData) row; return Tuple3.of(gr.getField(ORD_MIN_VAL), gr.getField(ORD_MAX_VAL), gr.getField(ORD_NULL_CNT)); } }).collect(Collectors.toList()); GenericRowData foldedRow = new GenericRowData(2 + 3 * columnStats.size()); foldedRow.setField(0, fileName); foldedRow.setField(1, valueCount); for (int i = 0; i < columnStats.size(); i++) { Tuple3 stats = columnStats.get(i); int startPos = 2 + 3 * i; foldedRow.setField(startPos, stats.f0); foldedRow.setField(startPos + 1, stats.f1); foldedRow.setField(startPos + 2, stats.f2); } return foldedRow; }).collect(Collectors.toList()); } private static RowData unpackMinMaxVal( RowData row, LogicalType colType, Map converters) { RowData minValueStruct = row.getRow(ORD_MIN_VAL, 1); RowData maxValueStruct = row.getRow(ORD_MAX_VAL, 1); checkState(minValueStruct != null && maxValueStruct != null, "Invalid Column Stats record: either both min/max have to be null, or both have to be non-null"); Object minValue = tryUnpackNonNullVal(minValueStruct, colType, converters); Object maxValue = tryUnpackNonNullVal(maxValueStruct, colType, converters); // the column schema: // |- file_name: string // |- min_val: row // |- max_val: row // |- null_cnt: long // |- val_cnt: long // |- column_name: string GenericRowData unpackedRow = new GenericRowData(row.getArity()); unpackedRow.setField(0, row.getString(0)); unpackedRow.setField(1, minValue); unpackedRow.setField(2, maxValue); unpackedRow.setField(3, row.getLong(3)); unpackedRow.setField(4, row.getLong(4)); unpackedRow.setField(5, row.getString(5)); return unpackedRow; } private static Object tryUnpackNonNullVal( RowData rowData, LogicalType colType, Map converters) { for (int i = 0; i < rowData.getArity(); i++) { // row data converted from avro is definitely generic. Object nested = ((GenericRowData) rowData).getField(i); if (nested != null) { return doUnpack(nested, colType, converters); } } return null; } private static Object doUnpack( Object rawVal, LogicalType logicalType, Map converters) { AvroToRowDataConverters.AvroToRowDataConverter converter = converters.computeIfAbsent(logicalType, k -> AvroToRowDataConverters.createConverter(logicalType, true)); return converter.convert(rawVal); } @VisibleForTesting public List readColumnStatsIndexByColumns(String[] targetColumns) { // NOTE: If specific columns have been provided, we can considerably trim down amount of data fetched // by only fetching Column Stats Index records pertaining to the requested columns. // Otherwise, we fall back to read whole Column Stats Index ValidationUtils.checkArgument(targetColumns.length > 0, "Column stats is only valid when push down filters have referenced columns"); // Read Metadata Table's column stats Flink's RowData list by // - Fetching the records by key-prefixes (encoded column names) // - Deserializing fetched records into [[RowData]]s // TODO encoding should be done internally w/in HoodieBackedTableMetadata List encodedTargetColumnNames = Arrays.stream(targetColumns) .map(colName -> new ColumnIndexID(colName).asBase64EncodedString()).collect(Collectors.toList()); HoodieData> records = getMetadataTable().getRecordsByKeyPrefixes(encodedTargetColumnNames, getIndexPartitionName(), false); org.apache.hudi.util.AvroToRowDataConverters.AvroToRowDataConverter converter = AvroToRowDataConverters.createRowConverter((RowType) METADATA_DATA_TYPE.getLogicalType()); List rows = records.collectAsList().stream().parallel().map(record -> { // schema and props are ignored for generating metadata record from the payload // instead, the underlying file system, or bloom filter, or columns stats metadata (part of payload) are directly used GenericRecord genericRecord; try { genericRecord = (GenericRecord) record.getData().getInsertValue(null, null).orElse(null); } catch (IOException e) { throw new HoodieException("Exception while getting insert value from metadata payload"); } return (RowData) converter.convert(genericRecord); } ).collect(Collectors.toList()); return projectNestedColStatsColumns(rows); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy