All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.snowflake.ingest.streaming.internal.RowBufferStats Maven / Gradle / Ivy

There is a newer version: 3.0.1
Show newest version
/*
 * Copyright (c) 2021 Snowflake Computing Inc. All rights reserved.
 */

package net.snowflake.ingest.streaming.internal;

import static net.snowflake.ingest.utils.Constants.EP_NDV_UNKNOWN;

import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import net.snowflake.ingest.utils.ErrorCode;
import net.snowflake.ingest.utils.SFException;

/** Keeps track of the active EP stats, used to generate a file EP info */
class RowBufferStats {

  private final int ordinal;
  private byte[] currentMinStrValue;
  private byte[] currentMaxStrValue;
  private BigInteger currentMinIntValue;
  private BigInteger currentMaxIntValue;
  private Double currentMinRealValue;
  private Double currentMaxRealValue;
  private long currentNullCount;
  // for binary or string columns
  private long currentMaxLength;
  private final String collationDefinitionString;
  /** Display name is required for the registration endpoint */
  private final String columnDisplayName;

  /** Creates empty stats */
  RowBufferStats(String columnDisplayName, String collationDefinitionString, int ordinal) {
    this.columnDisplayName = columnDisplayName;
    this.collationDefinitionString = collationDefinitionString;
    this.ordinal = ordinal;
    reset();
  }

  RowBufferStats(String columnDisplayName) {
    this(columnDisplayName, null, -1);
  }

  void reset() {
    this.currentMaxStrValue = null;
    this.currentMinStrValue = null;
    this.currentMaxIntValue = null;
    this.currentMinIntValue = null;
    this.currentMaxRealValue = null;
    this.currentMinRealValue = null;
    this.currentNullCount = 0;
    this.currentMaxLength = 0;
  }

  /** Create new statistics for the same column, with all calculated values set to empty */
  RowBufferStats forkEmpty() {
    return new RowBufferStats(
        this.getColumnDisplayName(), this.getCollationDefinitionString(), this.getOrdinal());
  }

  // TODO performance test this vs in place update
  static RowBufferStats getCombinedStats(RowBufferStats left, RowBufferStats right) {
    if (!Objects.equals(left.getCollationDefinitionString(), right.collationDefinitionString)) {
      throw new SFException(
          ErrorCode.INVALID_COLLATION_STRING,
          "Tried to combine stats for different collations",
          String.format(
              "left=%s, right=%s",
              left.getCollationDefinitionString(), right.getCollationDefinitionString()));
    }
    RowBufferStats combined =
        new RowBufferStats(
            left.columnDisplayName, left.getCollationDefinitionString(), left.getOrdinal());

    if (left.currentMinIntValue != null) {
      combined.addIntValue(left.currentMinIntValue);
      combined.addIntValue(left.currentMaxIntValue);
    }

    if (right.currentMinIntValue != null) {
      combined.addIntValue(right.currentMinIntValue);
      combined.addIntValue(right.currentMaxIntValue);
    }

    if (left.currentMinStrValue != null) {
      combined.addBinaryValue(left.currentMinStrValue);
      combined.addBinaryValue(left.currentMaxStrValue);
    }

    if (right.currentMinStrValue != null) {
      combined.addBinaryValue(right.currentMinStrValue);
      combined.addBinaryValue(right.currentMaxStrValue);
    }

    if (left.currentMinRealValue != null) {
      combined.addRealValue(left.currentMinRealValue);
      combined.addRealValue(left.currentMaxRealValue);
    }

    if (right.currentMinRealValue != null) {
      combined.addRealValue(right.currentMinRealValue);
      combined.addRealValue(right.currentMaxRealValue);
    }

    combined.currentNullCount = left.currentNullCount + right.currentNullCount;
    combined.currentMaxLength = Math.max(left.currentMaxLength, right.currentMaxLength);

    return combined;
  }

  void addStrValue(String value) {
    addBinaryValue(value.getBytes(StandardCharsets.UTF_8));
  }

  void addBinaryValue(byte[] valueBytes) {
    this.setCurrentMaxLength(valueBytes.length);

    // Check if new min/max string
    if (this.currentMinStrValue == null) {
      this.currentMinStrValue = valueBytes;
      this.currentMaxStrValue = valueBytes;
    } else {
      // Check if the input is less than the existing min
      if (compareUnsigned(currentMinStrValue, valueBytes) > 0) {
        this.currentMinStrValue = valueBytes;
        // Check if the input is more than the existing max
      } else if (compareUnsigned(currentMaxStrValue, valueBytes) < 0) {
        this.currentMaxStrValue = valueBytes;
      }
    }
  }

  byte[] getCurrentMinStrValue() {
    return currentMinStrValue;
  }

  byte[] getCurrentMaxStrValue() {
    return currentMaxStrValue;
  }

  void addIntValue(BigInteger value) {
    // Set new min/max value
    if (this.currentMinIntValue == null) {
      this.currentMinIntValue = value;
      this.currentMaxIntValue = value;
    } else if (this.currentMinIntValue.compareTo(value) > 0) {
      this.currentMinIntValue = value;
    } else if (this.currentMaxIntValue.compareTo(value) < 0) {
      this.currentMaxIntValue = value;
    }
  }

  BigInteger getCurrentMinIntValue() {
    return currentMinIntValue;
  }

  BigInteger getCurrentMaxIntValue() {
    return currentMaxIntValue;
  }

  void addRealValue(Double value) {
    // Set new min/max value
    if (this.currentMinRealValue == null) {
      this.currentMinRealValue = value;
      this.currentMaxRealValue = value;
    } else if (this.currentMinRealValue.compareTo(value) > 0) {
      this.currentMinRealValue = value;
    } else if (this.currentMaxRealValue.compareTo(value) < 0) {
      this.currentMaxRealValue = value;
    }
  }

  Double getCurrentMinRealValue() {
    return currentMinRealValue;
  }

  Double getCurrentMaxRealValue() {
    return currentMaxRealValue;
  }

  void incCurrentNullCount() {
    this.currentNullCount += 1;
  }

  long getCurrentNullCount() {
    return currentNullCount;
  }

  void setCurrentMaxLength(long currentMaxLength) {
    if (currentMaxLength > this.currentMaxLength) {
      this.currentMaxLength = currentMaxLength;
    }
  }

  long getCurrentMaxLength() {
    return currentMaxLength;
  }

  /**
   * Returns the number of distinct values (NDV). A value of -1 means the number is unknown
   *
   * @return -1 indicating the NDV is unknown
   */
  long getDistinctValues() {
    return EP_NDV_UNKNOWN;
  }

  String getCollationDefinitionString() {
    return collationDefinitionString;
  }

  String getColumnDisplayName() {
    return columnDisplayName;
  }

  public int getOrdinal() {
    return ordinal;
  }

  /**
   * Compares two byte arrays lexicographically. If the two arrays share a common prefix then the
   * lexicographic comparison is the result of comparing two elements, as if by Byte.compare(byte,
   * byte), at an index within the respective arrays that is the prefix length. Otherwise, one array
   * is a proper prefix of the other and, lexicographic comparison is the result of comparing the
   * two array lengths.
   *
   * @param a the first array to compare
   * @param b the second array to compare
   * @return the value 0 if the first and second array are equal and contain the same elements in
   *     the same order; a value less than 0 if the first array is lexicographically less than the
   *     second array; and a value greater than 0 if the first array is lexicographically greater
   *     than the second array
   */
  static int compareUnsigned(byte[] a, byte[] b) {
    if (a == b) return 0;

    for (int mismatchIdx = 0; mismatchIdx < Math.min(a.length, b.length); mismatchIdx++) {
      if (a[mismatchIdx] != b[mismatchIdx]) {
        return Byte.toUnsignedInt(a[mismatchIdx]) - Byte.toUnsignedInt(b[mismatchIdx]);
      }
    }

    return a.length - b.length;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy