parquet.column.statistics.Statistics Maven / Gradle / Ivy
/**
* Copyright 2012 Twitter, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package parquet.column.statistics;
import parquet.column.UnknownColumnTypeException;
import parquet.io.api.Binary;
import parquet.schema.PrimitiveType.PrimitiveTypeName;
import java.util.Arrays;
/**
* Statistics class to keep track of statistics in parquet pages and column chunks
*
* @author Katya Gonina
*/
public abstract class Statistics {
private boolean firstValueAccountedFor;
private long num_nulls;
public Statistics() {
firstValueAccountedFor = false;
num_nulls = 0;
}
/**
* Returns the typed statistics object based on the passed type parameter
* @param type PrimitiveTypeName type of the column
* @return instance of a typed statistics class
*/
public static Statistics getStatsBasedOnType(PrimitiveTypeName type) {
switch(type) {
case INT32:
return new IntStatistics();
case INT64:
return new LongStatistics();
case FLOAT:
return new FloatStatistics();
case DOUBLE:
return new DoubleStatistics();
case BOOLEAN:
return new BooleanStatistics();
case BINARY:
return new BinaryStatistics();
case INT96:
return new BinaryStatistics();
case FIXED_LEN_BYTE_ARRAY:
return new BinaryStatistics();
default:
throw new UnknownColumnTypeException(type);
}
}
/**
* updates statistics min and max using the passed value
* @param value value to use to update min and max
*/
public void updateStats(int value) {
throw new UnsupportedOperationException();
}
/**
* updates statistics min and max using the passed value
* @param value value to use to update min and max
*/
public void updateStats(long value) {
throw new UnsupportedOperationException();
}
/**
* updates statistics min and max using the passed value
* @param value value to use to update min and max
*/
public void updateStats(float value) {
throw new UnsupportedOperationException();
}
/**
* updates statistics min and max using the passed value
* @param value value to use to update min and max
*/
public void updateStats(double value) {
throw new UnsupportedOperationException();
}
/**
* updates statistics min and max using the passed value
* @param value value to use to update min and max
*/
public void updateStats(boolean value) {
throw new UnsupportedOperationException();
}
/**
* updates statistics min and max using the passed value
* @param value value to use to update min and max
*/
public void updateStats(Binary value) {
throw new UnsupportedOperationException();
}
/**
* Equality comparison method to compare two statistics objects.
* @param stats Statistics object to compare against
* @return true if objects are equal, false otherwise
*/
public boolean equals(Statistics stats) {
return Arrays.equals(stats.getMaxBytes(), this.getMaxBytes()) &&
Arrays.equals(stats.getMinBytes(), this.getMinBytes()) &&
stats.getNumNulls() == this.getNumNulls();
}
/**
* Hash code for the statistics object
* @return hash code int
*/
public int hashCode() {
return 31 * Arrays.hashCode(getMaxBytes()) + 17 * Arrays.hashCode(getMinBytes()) + Long.valueOf(this.getNumNulls()).hashCode();
}
/**
* Method to merge this statistics object with the object passed
* as parameter. Merging keeps the smallest of min values, largest of max
* values and combines the number of null counts.
* @param stats Statistics object to merge with
*/
public void mergeStatistics(Statistics stats) {
if (stats.isEmpty()) return;
if (this.getClass() == stats.getClass()) {
incrementNumNulls(stats.getNumNulls());
mergeStatisticsMinMax(stats);
} else {
throw new StatisticsClassException(this.getClass().toString(), stats.getClass().toString());
}
}
/**
* Abstract method to merge this statistics min and max with the values
* of the parameter object. Does not do any checks, only called internally.
* @param stats Statistics object to merge with
*/
abstract protected void mergeStatisticsMinMax(Statistics stats);
/**
* Abstract method to set min and max values from byte arrays.
* @param minBytes byte array to set the min value to
* @param maxBytes byte array to set the max value to
*/
abstract public void setMinMaxFromBytes(byte[] minBytes, byte[] maxBytes);
/**
* Abstract method to return the max value as a byte array
* @return byte array corresponding to the max value
*/
abstract public byte[] getMaxBytes();
/**
* Abstract method to return the min value as a byte array
* @return byte array corresponding to the min value
*/
abstract public byte[] getMinBytes();
/**
* toString() to display min, max, num_nulls in a string
*/
abstract public String toString();
/**
* Increments the null count by one
*/
public void incrementNumNulls() {
num_nulls++ ;
}
/**
* Increments the null count by the parameter value
* @param increment value to increment the null count by
*/
public void incrementNumNulls(long increment) {
num_nulls += increment ;
}
/**
* Returns the null count
* @return null count
*/
public long getNumNulls() {
return num_nulls;
}
/**
* Sets the number of nulls to the parameter value
* @param nulls null count to set the count to
*/
public void setNumNulls(long nulls) {
num_nulls = nulls;
}
/**
* Returns a boolean specifying if the Statistics object is empty,
* i.e does not contain valid statistics for the page/column yet
* @return true if object is empty, false otherwise
*/
public boolean isEmpty() {
return !firstValueAccountedFor;
}
protected void markAsNotEmpty() {
firstValueAccountedFor = true;
}
}