All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.source.stats.ColumnStatsSchemas Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.source.stats;

import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.metadata.HoodieMetadataPayload;
import org.apache.hudi.util.AvroSchemaConverter;

import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.RowType;

import java.util.stream.Stream;

/**
 * Utility class for column stats schema.
 */
public class ColumnStatsSchemas {
  // -------------------------------------------------------------------------
  //  Utilities
  // -------------------------------------------------------------------------
  public static final DataType METADATA_DATA_TYPE = getMetadataDataType();
  public static final DataType COL_STATS_DATA_TYPE = getColStatsDataType();
  public static final int[] COL_STATS_TARGET_POS = getColStatsTargetPos();

  // the column schema:
  // |- file_name: string
  // |- min_val: row
  // |- max_val: row
  // |- null_cnt: long
  // |- val_cnt: long
  // |- column_name: string
  public static final int ORD_FILE_NAME = 0;
  public static final int ORD_MIN_VAL = 1;
  public static final int ORD_MAX_VAL = 2;
  public static final int ORD_NULL_CNT = 3;
  public static final int ORD_VAL_CNT = 4;
  public static final int ORD_COL_NAME = 5;

  private static DataType getMetadataDataType() {
    return AvroSchemaConverter.convertToDataType(HoodieMetadataRecord.SCHEMA$);
  }

  private static DataType getColStatsDataType() {
    int pos = HoodieMetadataRecord.SCHEMA$.getField(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).pos();
    return METADATA_DATA_TYPE.getChildren().get(pos);
  }

  // the column schema:
  // |- file_name: string
  // |- min_val: row
  // |- max_val: row
  // |- null_cnt: long
  // |- val_cnt: long
  // |- column_name: string
  private static int[] getColStatsTargetPos() {
    RowType colStatsRowType = (RowType) COL_STATS_DATA_TYPE.getLogicalType();
    return Stream.of(
            HoodieMetadataPayload.COLUMN_STATS_FIELD_FILE_NAME,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_MIN_VALUE,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_MAX_VALUE,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_NULL_COUNT,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_VALUE_COUNT,
            HoodieMetadataPayload.COLUMN_STATS_FIELD_COLUMN_NAME)
        .mapToInt(colStatsRowType::getFieldIndex)
        .toArray();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy