All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.snowflake.ingest.streaming.internal.FileColumnProperties Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2024 Snowflake Computing Inc. All rights reserved.
 */

package net.snowflake.ingest.streaming.internal;

import static net.snowflake.ingest.streaming.internal.BinaryStringUtils.truncateBytesAsHex;
import static net.snowflake.ingest.utils.Constants.EP_NV_UNKNOWN;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.math.BigInteger;
import java.util.Objects;
import net.snowflake.ingest.utils.ErrorCode;
import net.snowflake.ingest.utils.SFException;
import org.apache.parquet.schema.LogicalTypeAnnotation;

/** Audit register endpoint/FileColumnPropertyDTO property list. */
class FileColumnProperties {
  private int columnOrdinal;
  private Integer fieldId;
  private String minStrValue;

  private String maxStrValue;

  private String collation;

  private String minStrNonCollated;

  private String maxStrNonCollated;

  // 128-bit precision needed
  private BigInteger minIntValue;

  // 128-bit precision needed
  private BigInteger maxIntValue;

  // 64-bit precision
  private Double minRealValue;

  // 64-bit precision
  private Double maxRealValue;

  private long distinctValues;

  private long nullCount;

  // for elements in repeated columns
  private Long numberOfValues;

  // for binary or string columns
  private long maxLength;

  // Default value to use for min/max int when all data in the given column is NULL
  public static final BigInteger DEFAULT_MIN_MAX_INT_VAL_FOR_EP = BigInteger.valueOf(0);

  // Default value to use for min/max real when all data in the given column is NULL
  public static final Double DEFAULT_MIN_MAX_REAL_VAL_FOR_EP = 0d;

  // Default value to use for min/max string when all data in the given Iceberg column is NULL
  public static final String DEFAULT_MIN_MAX_STR_VAL_FOR_EP = "";

  /**
   * @param setAllDefaultValues Whether to set defaults for ALL fields, or only some. BDEC sets it
   *     for all but iceberg does not.
   */
  FileColumnProperties(RowBufferStats stats, boolean setAllDefaultValues) {
    this.setColumnOrdinal(stats.getOrdinal());
    this.setFieldId(stats.getFieldId());
    this.setCollation(stats.getCollationDefinitionString());

    if (setAllDefaultValues) {
      /* Set every column to default value for FDN columns if the all row values are null */
      setIntValues(stats);
      setRealValues(stats);
      setStringValues(stats, false /* replaceNullWithEmptyString */);
    } else {
      /* Only set corresponding min/max stats to default value for Iceberg columns if the all row values are null */
      switch (stats.getPrimitiveType().getPrimitiveTypeName()) {
        case BOOLEAN:
        case INT32:
        case INT64:
          setIntValues(stats);
          break;

        case FLOAT:
        case DOUBLE:
          setRealValues(stats);
          break;

        case BINARY:
          setStringValues(stats, true /* replaceNullWithEmptyString */);
          break;

        case FIXED_LEN_BYTE_ARRAY:
          if (stats.getPrimitiveType().getLogicalTypeAnnotation()
              instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
            setIntValues(stats);
          } else {
            setStringValues(stats, true /* replaceNullWithEmptyString */);
          }
          break;

        default:
          throw new SFException(
              ErrorCode.INTERNAL_ERROR,
              "Unsupported Iceberg column type: "
                  + stats.getPrimitiveType().getPrimitiveTypeName());
      }
    }

    this.setMaxLength(stats.getCurrentMaxLength());
    this.setMaxStrNonCollated(null);
    this.setMinStrNonCollated(null);
    this.setNullCount(stats.getCurrentNullCount());
    this.setDistinctValues(stats.getDistinctValues());

    if (stats.getNumberOfValues() != EP_NV_UNKNOWN) {
      this.setNumberOfValues(stats.getNumberOfValues());
    }
  }

  private void setIntValues(RowBufferStats stats) {
    this.setMaxIntValue(
        stats.getCurrentMaxIntValue() == null
            ? DEFAULT_MIN_MAX_INT_VAL_FOR_EP
            : stats.getCurrentMaxIntValue());

    this.setMinIntValue(
        stats.getCurrentMinIntValue() == null
            ? DEFAULT_MIN_MAX_INT_VAL_FOR_EP
            : stats.getCurrentMinIntValue());
  }

  private void setRealValues(RowBufferStats stats) {
    this.setMaxRealValue(
        stats.getCurrentMaxRealValue() == null
            ? DEFAULT_MIN_MAX_REAL_VAL_FOR_EP
            : stats.getCurrentMaxRealValue());

    this.setMinRealValue(
        stats.getCurrentMinRealValue() == null
            ? DEFAULT_MIN_MAX_REAL_VAL_FOR_EP
            : stats.getCurrentMinRealValue());
  }

  private void setStringValues(RowBufferStats stats, boolean replaceNullWithEmptyString) {
    // current hex-encoded max value, truncated up to 32 bytes
    if (stats.getCurrentMaxStrValue() != null) {
      String truncatedAsHex =
          truncateBytesAsHex(stats.getCurrentMaxStrValue(), true /* truncateUp */);
      this.setMaxStrValue(truncatedAsHex);
    } else if (replaceNullWithEmptyString) {
      this.setMaxStrValue(DEFAULT_MIN_MAX_STR_VAL_FOR_EP);
    }

    // current hex-encoded min value, truncated down to 32 bytes
    if (stats.getCurrentMinStrValue() != null) {
      String truncatedAsHex =
          truncateBytesAsHex(stats.getCurrentMinStrValue(), false /* truncateUp */);
      this.setMinStrValue(truncatedAsHex);
    } else if (replaceNullWithEmptyString) {
      this.setMinStrValue(DEFAULT_MIN_MAX_STR_VAL_FOR_EP);
    }
  }

  @JsonProperty("columnId")
  public int getColumnOrdinal() {
    return columnOrdinal;
  }

  public void setColumnOrdinal(int columnOrdinal) {
    this.columnOrdinal = columnOrdinal;
  }

  @JsonProperty("fieldId")
  @JsonInclude(JsonInclude.Include.NON_NULL)
  public Integer getFieldId() {
    return fieldId;
  }

  public void setFieldId(Integer fieldId) {
    this.fieldId = fieldId;
  }

  // Annotation required in order to have package private fields serialized
  @JsonProperty("minStrValue")
  String getMinStrValue() {
    return minStrValue;
  }

  void setMinStrValue(String minStrValue) {
    this.minStrValue = minStrValue;
  }

  @JsonProperty("maxStrValue")
  String getMaxStrValue() {
    return maxStrValue;
  }

  void setMaxStrValue(String maxStrValue) {
    this.maxStrValue = maxStrValue;
  }

  @JsonProperty("minIntValue")
  BigInteger getMinIntValue() {
    return minIntValue;
  }

  void setMinIntValue(BigInteger minIntValue) {
    this.minIntValue = minIntValue;
  }

  @JsonProperty("maxIntValue")
  BigInteger getMaxIntValue() {
    return maxIntValue;
  }

  void setMaxIntValue(BigInteger maxIntValue) {
    this.maxIntValue = maxIntValue;
  }

  @JsonProperty("nullCount")
  long getNullCount() {
    return nullCount;
  }

  void setNullCount(long nullCount) {
    this.nullCount = nullCount;
  }

  @JsonProperty("minRealValue")
  Double getMinRealValue() {
    return minRealValue;
  }

  void setMinRealValue(Double minRealValue) {
    this.minRealValue = minRealValue;
  }

  @JsonProperty("maxRealValue")
  Double getMaxRealValue() {
    return maxRealValue;
  }

  void setMaxRealValue(Double maxRealValue) {
    this.maxRealValue = maxRealValue;
  }

  @JsonProperty("distinctValues")
  long getDistinctValues() {
    return distinctValues;
  }

  void setDistinctValues(long distinctValues) {
    this.distinctValues = distinctValues;
  }

  @JsonProperty("maxLength")
  long getMaxLength() {
    return maxLength;
  }

  void setMaxLength(long maxLength) {
    this.maxLength = maxLength;
  }

  @JsonProperty("collation")
  String getCollation() {
    return collation;
  }

  void setCollation(String collation) {
    this.collation = collation;
  }

  @JsonProperty("minStrNonCollated")
  String getMinStrNonCollated() {
    return minStrNonCollated;
  }

  void setMinStrNonCollated(String minStrNonCollated) {
    this.minStrNonCollated = minStrNonCollated;
  }

  @JsonProperty("maxStrNonCollated")
  String getMaxStrNonCollated() {
    return maxStrNonCollated;
  }

  void setMaxStrNonCollated(String maxStrNonCollated) {
    this.maxStrNonCollated = maxStrNonCollated;
  }

  @JsonProperty("numberOfValues")
  @JsonInclude(JsonInclude.Include.NON_NULL)
  Long getNumberOfValues() {
    return numberOfValues;
  }

  void setNumberOfValues(Long numberOfValues) {
    this.numberOfValues = numberOfValues;
  }

  @Override
  public String toString() {
    final StringBuilder sb = new StringBuilder("{");
    sb.append("\"columnOrdinal\": ").append(columnOrdinal);
    sb.append(", \"fieldId\": ").append(fieldId);
    if (minIntValue != null) {
      sb.append(", \"minIntValue\": ").append(minIntValue);
      sb.append(", \"maxIntValue\": ").append(maxIntValue);
    } else if (minRealValue != null) {
      sb.append(", \"minRealValue\": ").append(minRealValue);
      sb.append(", \"maxRealValue\": ").append(maxRealValue);
    } else // string
    {
      sb.append(", \"minStrValue\": \"").append(minStrValue).append('"');
      sb.append(", \"maxStrValue\": \"").append(maxStrValue).append('"');
      sb.append(", \"minStrNonCollatedValue\": \"").append(minStrNonCollated).append('"');
      sb.append(", \"maxStrNonCollatedValue\": \"").append(maxStrNonCollated).append('"');
      sb.append(", \"collation\": \"").append(collation).append('"');
      sb.append(", \"maxLength\": ").append(maxLength);
    }
    sb.append(", \"distinctValues\": ").append(distinctValues);
    sb.append(", \"nullCount\": ").append(nullCount);
    sb.append(", \"numberOfValues\": ").append(numberOfValues);
    return sb.append('}').toString();
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) return true;
    if (o == null || getClass() != o.getClass()) return false;
    FileColumnProperties that = (FileColumnProperties) o;
    return Objects.equals(columnOrdinal, that.columnOrdinal)
        && distinctValues == that.distinctValues
        && nullCount == that.nullCount
        && maxLength == that.maxLength
        && Objects.equals(minStrValue, that.minStrValue)
        && Objects.equals(maxStrValue, that.maxStrValue)
        && Objects.equals(collation, that.collation)
        && Objects.equals(minStrNonCollated, that.minStrNonCollated)
        && Objects.equals(maxStrNonCollated, that.maxStrNonCollated)
        && Objects.equals(minIntValue, that.minIntValue)
        && Objects.equals(maxIntValue, that.maxIntValue)
        && Objects.equals(minRealValue, that.minRealValue)
        && Objects.equals(maxRealValue, that.maxRealValue);
  }

  @Override
  public int hashCode() {
    return Objects.hash(
        columnOrdinal,
        minStrValue,
        maxStrValue,
        collation,
        minStrNonCollated,
        maxStrNonCollated,
        minIntValue,
        maxIntValue,
        minRealValue,
        maxRealValue,
        distinctValues,
        nullCount,
        maxLength);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy