io.trino.parquet.reader.decoders.ValueDecoders Maven / Gradle / Ivy
Show all versions of trino-parquet Show documentation
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.parquet.reader.decoders;
import io.airlift.slice.Slice;
import io.trino.parquet.ParquetEncoding;
import io.trino.parquet.PrimitiveField;
import io.trino.parquet.reader.SimpleSliceInputStream;
import io.trino.parquet.reader.flat.BinaryBuffer;
import io.trino.spi.TrinoException;
import io.trino.spi.type.CharType;
import io.trino.spi.type.DecimalConversions;
import io.trino.spi.type.DecimalType;
import io.trino.spi.type.Decimals;
import io.trino.spi.type.Int128;
import io.trino.spi.type.TimeType;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.TimestampWithTimeZoneType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.column.values.ValuesReader;
import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import org.joda.time.DateTimeZone;
import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.parquet.ParquetEncoding.BYTE_STREAM_SPLIT;
import static io.trino.parquet.ParquetEncoding.DELTA_BYTE_ARRAY;
import static io.trino.parquet.ParquetEncoding.PLAIN;
import static io.trino.parquet.ParquetReaderUtils.toByteExact;
import static io.trino.parquet.ParquetReaderUtils.toShortExact;
import static io.trino.parquet.ParquetTypeUtils.checkBytesFitInShortDecimal;
import static io.trino.parquet.ParquetTypeUtils.getShortDecimalValue;
import static io.trino.parquet.ValuesType.VALUES;
import static io.trino.parquet.reader.decoders.ApacheParquetValueDecoders.BooleanApacheParquetValueDecoder;
import static io.trino.parquet.reader.decoders.ApacheParquetValueDecoders.DoubleApacheParquetValueDecoder;
import static io.trino.parquet.reader.decoders.ApacheParquetValueDecoders.FloatApacheParquetValueDecoder;
import static io.trino.parquet.reader.decoders.BooleanPlainValueDecoders.createBooleanPlainValueDecoder;
import static io.trino.parquet.reader.decoders.DeltaBinaryPackedDecoders.DeltaBinaryPackedByteDecoder;
import static io.trino.parquet.reader.decoders.DeltaBinaryPackedDecoders.DeltaBinaryPackedIntDecoder;
import static io.trino.parquet.reader.decoders.DeltaBinaryPackedDecoders.DeltaBinaryPackedLongDecoder;
import static io.trino.parquet.reader.decoders.DeltaBinaryPackedDecoders.DeltaBinaryPackedShortDecoder;
import static io.trino.parquet.reader.decoders.DeltaByteArrayDecoders.BinaryDeltaByteArrayDecoder;
import static io.trino.parquet.reader.decoders.DeltaByteArrayDecoders.BoundedVarcharDeltaByteArrayDecoder;
import static io.trino.parquet.reader.decoders.DeltaByteArrayDecoders.CharDeltaByteArrayDecoder;
import static io.trino.parquet.reader.decoders.DeltaLengthByteArrayDecoders.BinaryDeltaLengthDecoder;
import static io.trino.parquet.reader.decoders.DeltaLengthByteArrayDecoders.BoundedVarcharDeltaLengthDecoder;
import static io.trino.parquet.reader.decoders.DeltaLengthByteArrayDecoders.CharDeltaLengthDecoder;
import static io.trino.parquet.reader.decoders.PlainByteArrayDecoders.BinaryPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainByteArrayDecoders.BoundedVarcharPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainByteArrayDecoders.CharPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.FixedLengthPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.Int96TimestampPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.IntPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.IntToBytePlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.IntToShortPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.LongDecimalPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.LongPlainValueDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.ShortDecimalFixedLengthByteArrayDecoder;
import static io.trino.parquet.reader.decoders.PlainValueDecoders.UuidPlainValueDecoder;
import static io.trino.spi.StandardErrorCode.INVALID_CAST_ARGUMENT;
import static io.trino.spi.block.Fixed12Block.decodeFixed12First;
import static io.trino.spi.block.Fixed12Block.decodeFixed12Second;
import static io.trino.spi.block.Fixed12Block.encodeFixed12;
import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone;
import static io.trino.spi.type.Decimals.longTenToNth;
import static io.trino.spi.type.Decimals.overflows;
import static io.trino.spi.type.Decimals.rescale;
import static io.trino.spi.type.TimeZoneKey.UTC_KEY;
import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND;
import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND;
import static io.trino.spi.type.Timestamps.MILLISECONDS_PER_SECOND;
import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND;
import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MILLISECOND;
import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_DAY;
import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND;
import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MILLISECOND;
import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND;
import static io.trino.spi.type.Timestamps.round;
import static java.lang.Math.floorDiv;
import static java.lang.Math.floorMod;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
/**
* This class provides API for creating value decoders for given fields and encodings.
*
* This class is to replace most of the logic contained in ParquetEncoding enum
*/
public final class ValueDecoders
{
private final PrimitiveField field;
private final boolean vectorizedDecodingEnabled;
public ValueDecoders(PrimitiveField field)
{
this(field, false);
}
public ValueDecoders(PrimitiveField field, boolean vectorizedDecodingEnabled)
{
this.field = requireNonNull(field, "field is null");
this.vectorizedDecodingEnabled = vectorizedDecodingEnabled;
}
public ValueDecoder getDoubleDecoder(ParquetEncoding encoding)
{
if (PLAIN.equals(encoding)) {
return new LongPlainValueDecoder();
}
else if (BYTE_STREAM_SPLIT.equals(encoding)) {
return new DoubleApacheParquetValueDecoder(getApacheParquetReader(encoding));
}
throw wrongEncoding(encoding);
}
public ValueDecoder getRealDecoder(ParquetEncoding encoding)
{
if (PLAIN.equals(encoding)) {
return new IntPlainValueDecoder();
}
else if (BYTE_STREAM_SPLIT.equals(encoding)) {
return new FloatApacheParquetValueDecoder(getApacheParquetReader(encoding));
}
throw wrongEncoding(encoding);
}
public ValueDecoder getShortDecimalDecoder(ParquetEncoding encoding)
{
PrimitiveType primitiveType = field.getDescriptor().getPrimitiveType();
checkArgument(
primitiveType.getLogicalTypeAnnotation() instanceof DecimalLogicalTypeAnnotation,
"Column %s is not annotated as a decimal",
field);
return switch (primitiveType.getPrimitiveTypeName()) {
case INT64 -> getLongDecoder(encoding);
case INT32 -> getInt32ToLongDecoder(encoding);
case FIXED_LEN_BYTE_ARRAY -> getFixedWidthShortDecimalDecoder(encoding);
case BINARY -> getBinaryShortDecimalDecoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getLongDecimalDecoder(ParquetEncoding encoding)
{
return switch (field.getDescriptor().getPrimitiveType().getPrimitiveTypeName()) {
case FIXED_LEN_BYTE_ARRAY -> getFixedWidthLongDecimalDecoder(encoding);
case BINARY -> getBinaryLongDecimalDecoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getUuidDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new UuidPlainValueDecoder();
case DELTA_BYTE_ARRAY -> getDeltaUuidDecoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getLongDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new LongPlainValueDecoder();
case DELTA_BINARY_PACKED -> new DeltaBinaryPackedLongDecoder();
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getIntDecoder(ParquetEncoding encoding)
{
return switch (field.getDescriptor().getPrimitiveType().getPrimitiveTypeName()) {
case INT64 -> getInt64ToIntDecoder(encoding);
case INT32 -> getInt32Decoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getShortDecoder(ParquetEncoding encoding)
{
return switch (field.getDescriptor().getPrimitiveType().getPrimitiveTypeName()) {
case INT64 -> getInt64ToShortDecoder(encoding);
case INT32 -> getInt32ToShortDecoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getByteDecoder(ParquetEncoding encoding)
{
return switch (field.getDescriptor().getPrimitiveType().getPrimitiveTypeName()) {
case INT64 -> getInt64ToByteDecoder(encoding);
case INT32 -> getInt32ToByteDecoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getBooleanDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> createBooleanPlainValueDecoder(vectorizedDecodingEnabled);
case RLE -> new RleBitPackingHybridBooleanDecoder(vectorizedDecodingEnabled);
// BIT_PACKED is a deprecated encoding which should not be used anymore as per
// https://github.com/apache/parquet-format/blob/master/Encodings.md#bit-packed-deprecated-bit_packed--4
// An unoptimized decoder for this encoding is provided here for compatibility with old files or non-compliant writers
case BIT_PACKED -> new BooleanApacheParquetValueDecoder(getApacheParquetReader(encoding));
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getInt96TimestampDecoder(ParquetEncoding encoding)
{
if (PLAIN.equals(encoding)) {
// INT96 type has been deprecated as per https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0
// However, this encoding is still commonly encountered in parquet files.
return new Int96TimestampPlainValueDecoder();
}
throw wrongEncoding(encoding);
}
public ValueDecoder getFixedWidthShortDecimalDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new ShortDecimalFixedLengthByteArrayDecoder(field.getDescriptor());
case DELTA_BYTE_ARRAY -> getDeltaFixedWidthShortDecimalDecoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getFixedWidthLongDecimalDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new LongDecimalPlainValueDecoder(field.getDescriptor().getPrimitiveType().getTypeLength());
case DELTA_BYTE_ARRAY -> getDeltaFixedWidthLongDecimalDecoder(encoding);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getFixedWidthBinaryDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new FixedLengthPlainValueDecoder(field.getDescriptor().getPrimitiveType().getTypeLength());
case DELTA_BYTE_ARRAY -> new BinaryDeltaByteArrayDecoder();
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getBoundedVarcharBinaryDecoder(ParquetEncoding encoding)
{
Type trinoType = field.getType();
checkArgument(
trinoType instanceof VarcharType varcharType && !varcharType.isUnbounded(),
"Trino type %s is not a bounded varchar",
trinoType);
return switch (encoding) {
case PLAIN -> new BoundedVarcharPlainValueDecoder((VarcharType) trinoType);
case DELTA_LENGTH_BYTE_ARRAY -> new BoundedVarcharDeltaLengthDecoder((VarcharType) trinoType);
case DELTA_BYTE_ARRAY -> new BoundedVarcharDeltaByteArrayDecoder((VarcharType) trinoType);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getCharBinaryDecoder(ParquetEncoding encoding)
{
Type trinoType = field.getType();
checkArgument(
trinoType instanceof CharType,
"Trino type %s is not a char",
trinoType);
return switch (encoding) {
case PLAIN -> new CharPlainValueDecoder((CharType) trinoType);
case DELTA_LENGTH_BYTE_ARRAY -> new CharDeltaLengthDecoder((CharType) trinoType);
case DELTA_BYTE_ARRAY -> new CharDeltaByteArrayDecoder((CharType) trinoType);
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getBinaryDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new BinaryPlainValueDecoder();
case DELTA_LENGTH_BYTE_ARRAY -> new BinaryDeltaLengthDecoder();
case DELTA_BYTE_ARRAY -> new BinaryDeltaByteArrayDecoder();
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getInt32Decoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new IntPlainValueDecoder();
case DELTA_BINARY_PACKED -> new DeltaBinaryPackedIntDecoder();
default -> throw wrongEncoding(encoding);
};
}
private ValueDecoder getInt32ToShortDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new IntToShortPlainValueDecoder();
case DELTA_BINARY_PACKED -> new DeltaBinaryPackedShortDecoder();
default -> throw wrongEncoding(encoding);
};
}
private ValueDecoder getInt32ToByteDecoder(ParquetEncoding encoding)
{
return switch (encoding) {
case PLAIN -> new IntToBytePlainValueDecoder();
case DELTA_BINARY_PACKED -> new DeltaBinaryPackedByteDecoder();
default -> throw wrongEncoding(encoding);
};
}
public ValueDecoder getTimeMicrosDecoder(ParquetEncoding encoding)
{
return new InlineTransformDecoder<>(
getLongDecoder(encoding),
(values, offset, length) -> {
for (int i = offset; i < offset + length; i++) {
values[i] = values[i] * PICOSECONDS_PER_MICROSECOND;
}
});
}
public ValueDecoder getTimeMillisDecoder(ParquetEncoding encoding)
{
int precision = ((TimeType) field.getType()).getPrecision();
if (precision < 3) {
return new InlineTransformDecoder<>(
getInt32ToLongDecoder(encoding),
(values, offset, length) -> {
// decoded values are millis, round to lower precision and convert to picos
// modulo PICOSECONDS_PER_DAY is applied for the case when a value is rounded up to PICOSECONDS_PER_DAY
for (int i = offset; i < offset + length; i++) {
values[i] = (round(values[i], 3 - precision) * PICOSECONDS_PER_MILLISECOND) % PICOSECONDS_PER_DAY;
}
});
}
return new InlineTransformDecoder<>(
getInt32ToLongDecoder(encoding),
(values, offset, length) -> {
for (int i = offset; i < offset + length; i++) {
values[i] = values[i] * PICOSECONDS_PER_MILLISECOND;
}
});
}
public ValueDecoder getInt96ToShortTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
checkArgument(
field.getType() instanceof TimestampType timestampType && timestampType.isShort(),
"Trino type %s is not a short timestamp",
field.getType());
int precision = ((TimestampType) field.getType()).getPrecision();
ValueDecoder delegate = getInt96TimestampDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
int[] int96Buffer = new int[length * 3];
delegate.read(int96Buffer, 0, length);
for (int i = 0; i < length; i++) {
long epochSeconds = decodeFixed12First(int96Buffer, i);
long epochMicros;
if (timeZone == DateTimeZone.UTC) {
epochMicros = epochSeconds * MICROSECONDS_PER_SECOND;
}
else {
epochMicros = timeZone.convertUTCToLocal(epochSeconds * MILLISECONDS_PER_SECOND) * MICROSECONDS_PER_MILLISECOND;
}
int nanosOfSecond = (int) round(decodeFixed12Second(int96Buffer, i), 9 - precision);
values[offset + i] = epochMicros + nanosOfSecond / NANOSECONDS_PER_MICROSECOND;
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt96ToLongTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
checkArgument(
field.getType() instanceof TimestampType timestampType && !timestampType.isShort(),
"Trino type %s is not a long timestamp",
field.getType());
int precision = ((TimestampType) field.getType()).getPrecision();
return new InlineTransformDecoder<>(
getInt96TimestampDecoder(encoding),
(values, offset, length) -> {
for (int i = offset; i < offset + length; i++) {
long epochSeconds = decodeFixed12First(values, i);
int nanosOfSecond = decodeFixed12Second(values, i);
if (timeZone != DateTimeZone.UTC) {
epochSeconds = timeZone.convertUTCToLocal(epochSeconds * MILLISECONDS_PER_SECOND) / MILLISECONDS_PER_SECOND;
}
if (precision < 9) {
nanosOfSecond = (int) round(nanosOfSecond, 9 - precision);
}
encodeFixed12(
epochSeconds * MICROSECONDS_PER_SECOND + (nanosOfSecond / NANOSECONDS_PER_MICROSECOND), // epochMicros
(nanosOfSecond % NANOSECONDS_PER_MICROSECOND) * PICOSECONDS_PER_NANOSECOND, // picosOfMicro
values,
i);
}
});
}
public ValueDecoder getInt96ToShortTimestampWithTimeZoneDecoder(ParquetEncoding encoding)
{
checkArgument(
field.getType() instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && timestampWithTimeZoneType.isShort(),
"Trino type %s is not a short timestamp with timezone",
field.getType());
ValueDecoder delegate = getInt96TimestampDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
int[] int96Buffer = new int[length * 3];
delegate.read(int96Buffer, 0, length);
for (int i = 0; i < length; i++) {
long epochSeconds = decodeFixed12First(int96Buffer, i);
int nanosOfSecond = decodeFixed12Second(int96Buffer, i);
long utcMillis = epochSeconds * MILLISECONDS_PER_SECOND + (nanosOfSecond / NANOSECONDS_PER_MILLISECOND);
values[offset + i] = packDateTimeWithZone(utcMillis, UTC_KEY);
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt96ToLongTimestampWithTimeZoneDecoder(ParquetEncoding encoding)
{
checkArgument(
field.getType() instanceof TimestampWithTimeZoneType timestampType && !timestampType.isShort(),
"Trino type %s is not a long timestamp",
field.getType());
int precision = ((TimestampWithTimeZoneType) field.getType()).getPrecision();
return new InlineTransformDecoder<>(
getInt96TimestampDecoder(encoding),
(values, offset, length) -> {
for (int i = offset; i < offset + length; i++) {
long epochSeconds = decodeFixed12First(values, i);
int nanosOfSecond = decodeFixed12Second(values, i);
if (precision < 9) {
nanosOfSecond = (int) round(nanosOfSecond, 9 - precision);
}
long utcMillis = epochSeconds * MILLISECONDS_PER_SECOND + (nanosOfSecond / NANOSECONDS_PER_MILLISECOND);
encodeFixed12(
packDateTimeWithZone(utcMillis, UTC_KEY),
(nanosOfSecond % NANOSECONDS_PER_MILLISECOND) * PICOSECONDS_PER_NANOSECOND,
values,
i);
}
});
}
public ValueDecoder getInt64TimestampMillisToShortTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
checkArgument(
field.getType() instanceof TimestampType timestampType && timestampType.isShort(),
"Trino type %s is not a short timestamp",
field.getType());
int precision = ((TimestampType) field.getType()).getPrecision();
ValueDecoder valueDecoder = getLongDecoder(encoding);
if (precision < 3) {
return new InlineTransformDecoder<>(
valueDecoder,
(values, offset, length) -> {
// decoded values are epochMillis, round to lower precision and convert to epochMicros
for (int i = offset; i < offset + length; i++) {
long epochMillis = round(values[i], 3 - precision);
if (timeZone == DateTimeZone.UTC) {
values[i] = epochMillis * MICROSECONDS_PER_MILLISECOND;
}
else {
values[i] = timeZone.convertUTCToLocal(epochMillis) * MICROSECONDS_PER_MILLISECOND;
}
}
});
}
return new InlineTransformDecoder<>(
valueDecoder,
(values, offset, length) -> {
// decoded values are epochMillis, convert to epochMicros
for (int i = offset; i < offset + length; i++) {
if (timeZone == DateTimeZone.UTC) {
values[i] = values[i] * MICROSECONDS_PER_MILLISECOND;
}
else {
values[i] = timeZone.convertUTCToLocal(values[i]) * MICROSECONDS_PER_MILLISECOND;
}
}
});
}
public ValueDecoder getInt64TimestampMillsToShortTimestampWithTimeZoneDecoder(ParquetEncoding encoding)
{
checkArgument(
field.getType() instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && timestampWithTimeZoneType.isShort(),
"Trino type %s is not a short timestamp",
field.getType());
int precision = ((TimestampWithTimeZoneType) field.getType()).getPrecision();
ValueDecoder valueDecoder = getLongDecoder(encoding);
if (precision < 3) {
return new InlineTransformDecoder<>(
valueDecoder,
(values, offset, length) -> {
// decoded values are epochMillis, round to lower precision and convert to packed millis utc value
for (int i = offset; i < offset + length; i++) {
values[i] = packDateTimeWithZone(round(values[i], 3 - precision), UTC_KEY);
}
});
}
return new InlineTransformDecoder<>(
valueDecoder,
(values, offset, length) -> {
// decoded values are epochMillis, convert to packed millis utc value
for (int i = offset; i < offset + length; i++) {
values[i] = packDateTimeWithZone(values[i], UTC_KEY);
}
});
}
public ValueDecoder getInt64TimestampMicrosToShortTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
checkArgument(
field.getType() instanceof TimestampType timestampType && timestampType.isShort(),
"Trino type %s is not a short timestamp",
field.getType());
int precision = ((TimestampType) field.getType()).getPrecision();
ValueDecoder valueDecoder = getLongDecoder(encoding);
if (precision == 6) {
if (timeZone == DateTimeZone.UTC) {
return valueDecoder;
}
new InlineTransformDecoder<>(
valueDecoder,
(values, offset, length) -> {
for (int i = offset; i < offset + length; i++) {
long epochMicros = values[i];
long localMillis = timeZone.convertUTCToLocal(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND));
values[i] = (localMillis * MICROSECONDS_PER_MILLISECOND) + floorMod(epochMicros, MICROSECONDS_PER_MILLISECOND);
}
});
}
return new InlineTransformDecoder<>(
valueDecoder,
(values, offset, length) -> {
// decoded values are epochMicros, round to lower precision
for (int i = offset; i < offset + length; i++) {
long epochMicros = round(values[i], 6 - precision);
if (timeZone == DateTimeZone.UTC) {
values[i] = epochMicros;
}
else {
long localMillis = timeZone.convertUTCToLocal(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND));
values[i] = (localMillis * MICROSECONDS_PER_MILLISECOND) + floorMod(epochMicros, MICROSECONDS_PER_MILLISECOND);
}
}
});
}
public ValueDecoder getInt64TimestampMicrosToShortTimestampWithTimeZoneDecoder(ParquetEncoding encoding)
{
checkArgument(
field.getType() instanceof TimestampWithTimeZoneType timestampWithTimeZoneType && timestampWithTimeZoneType.isShort(),
"Trino type %s is not a short timestamp",
field.getType());
int precision = ((TimestampWithTimeZoneType) field.getType()).getPrecision();
return new InlineTransformDecoder<>(
getLongDecoder(encoding),
(values, offset, length) -> {
// decoded values are epochMicros, round to lower precision and convert to packed millis utc value
for (int i = offset; i < offset + length; i++) {
values[i] = packDateTimeWithZone(round(values[i], 6 - precision) / MICROSECONDS_PER_MILLISECOND, UTC_KEY);
}
});
}
public ValueDecoder getInt64TimestampNanosToShortTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
checkArgument(
field.getType() instanceof TimestampType timestampType && timestampType.isShort(),
"Trino type %s is not a short timestamp",
field.getType());
int precision = ((TimestampType) field.getType()).getPrecision();
return new InlineTransformDecoder<>(
getLongDecoder(encoding),
(values, offset, length) -> {
// decoded values are epochNanos, round to lower precision and convert to epochMicros
for (int i = offset; i < offset + length; i++) {
long epochNanos = round(values[i], 9 - precision);
if (timeZone == DateTimeZone.UTC) {
values[i] = epochNanos / NANOSECONDS_PER_MICROSECOND;
}
else {
long localMillis = timeZone.convertUTCToLocal(floorDiv(epochNanos, NANOSECONDS_PER_MILLISECOND));
values[i] = (localMillis * MICROSECONDS_PER_MILLISECOND) + floorDiv(floorMod(epochNanos, NANOSECONDS_PER_MILLISECOND), NANOSECONDS_PER_MICROSECOND);
}
}
});
}
public ValueDecoder getInt64TimestampMillisToLongTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
ValueDecoder delegate = getLongDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(int[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
// decoded values are epochMillis, convert to epochMicros
for (int i = 0; i < length; i++) {
if (timeZone == DateTimeZone.UTC) {
encodeFixed12(buffer[i] * MICROSECONDS_PER_MILLISECOND, 0, values, i + offset);
}
else {
encodeFixed12(timeZone.convertUTCToLocal(buffer[i]) * MICROSECONDS_PER_MILLISECOND, 0, values, i + offset);
}
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt64TimestampMicrosToLongTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
ValueDecoder delegate = getLongDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(int[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
// decoded values are epochMicros
for (int i = 0; i < length; i++) {
long epochMicros = buffer[i];
if (timeZone == DateTimeZone.UTC) {
encodeFixed12(epochMicros, 0, values, i + offset);
}
else {
long localMillis = timeZone.convertUTCToLocal(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND));
encodeFixed12((localMillis * MICROSECONDS_PER_MILLISECOND) + floorMod(epochMicros, MICROSECONDS_PER_MILLISECOND),
0,
values,
i + offset);
}
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt64TimestampMicrosToLongTimestampWithTimeZoneDecoder(ParquetEncoding encoding)
{
ValueDecoder delegate = getLongDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(int[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
// decoded values are epochMicros, convert to (packed epochMillisUtc, picosOfMilli)
for (int i = 0; i < length; i++) {
long epochMicros = buffer[i];
encodeFixed12(
packDateTimeWithZone(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND), UTC_KEY),
floorMod(epochMicros, MICROSECONDS_PER_MILLISECOND) * PICOSECONDS_PER_MICROSECOND,
values,
i + offset);
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt64TimestampNanosToLongTimestampDecoder(ParquetEncoding encoding, DateTimeZone timeZone)
{
ValueDecoder delegate = getLongDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(int[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
// decoded values are epochNanos, convert to (epochMicros, picosOfNanos)
for (int i = 0; i < length; i++) {
long epochNanos = buffer[i];
int picosOfNanos = floorMod(epochNanos, NANOSECONDS_PER_MICROSECOND) * PICOSECONDS_PER_NANOSECOND;
if (timeZone == DateTimeZone.UTC) {
encodeFixed12(
floorDiv(epochNanos, NANOSECONDS_PER_MICROSECOND),
picosOfNanos,
values,
i + offset);
}
else {
long localMillis = timeZone.convertUTCToLocal(floorDiv(epochNanos, NANOSECONDS_PER_MILLISECOND));
long microsFromNanos = floorMod(epochNanos, NANOSECONDS_PER_MILLISECOND) / NANOSECONDS_PER_MICROSECOND;
encodeFixed12(
(localMillis * MICROSECONDS_PER_MILLISECOND) + microsFromNanos,
picosOfNanos,
values,
i + offset);
}
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getFloatToDoubleDecoder(ParquetEncoding encoding)
{
ValueDecoder delegate = getRealDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
int[] buffer = new int[length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
values[offset + i] = Double.doubleToLongBits(Float.intBitsToFloat(buffer[i]));
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getBinaryLongDecimalDecoder(ParquetEncoding encoding)
{
return new BinaryToLongDecimalTransformDecoder(getBinaryDecoder(encoding));
}
public ValueDecoder getDeltaFixedWidthLongDecimalDecoder(ParquetEncoding encoding)
{
checkArgument(encoding.equals(DELTA_BYTE_ARRAY), "encoding %s is not DELTA_BYTE_ARRAY", encoding);
ColumnDescriptor descriptor = field.getDescriptor();
LogicalTypeAnnotation logicalTypeAnnotation = descriptor.getPrimitiveType().getLogicalTypeAnnotation();
checkArgument(
logicalTypeAnnotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation
&& decimalAnnotation.getPrecision() > Decimals.MAX_SHORT_PRECISION,
"Column %s is not a long decimal",
descriptor);
return new BinaryToLongDecimalTransformDecoder(new BinaryDeltaByteArrayDecoder());
}
public ValueDecoder getBinaryShortDecimalDecoder(ParquetEncoding encoding)
{
ValueDecoder delegate = getBinaryDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
BinaryBuffer buffer = new BinaryBuffer(length);
delegate.read(buffer, 0, length);
int[] offsets = buffer.getOffsets();
byte[] inputBytes = buffer.asSlice().byteArray();
for (int i = 0; i < length; i++) {
int positionOffset = offsets[i];
int positionLength = offsets[i + 1] - positionOffset;
if (positionLength > 8) {
throw new ParquetDecodingException("Unable to read BINARY type decimal of size " + positionLength + " as a short decimal");
}
// No need for checkBytesFitInShortDecimal as the standard requires variable binary decimals
// to be stored in minimum possible number of bytes
values[offset + i] = getShortDecimalValue(inputBytes, positionOffset, positionLength);
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getDeltaFixedWidthShortDecimalDecoder(ParquetEncoding encoding)
{
checkArgument(encoding.equals(DELTA_BYTE_ARRAY), "encoding %s is not DELTA_BYTE_ARRAY", encoding);
ColumnDescriptor descriptor = field.getDescriptor();
LogicalTypeAnnotation logicalTypeAnnotation = descriptor.getPrimitiveType().getLogicalTypeAnnotation();
checkArgument(
logicalTypeAnnotation instanceof DecimalLogicalTypeAnnotation decimalAnnotation
&& decimalAnnotation.getPrecision() <= Decimals.MAX_SHORT_PRECISION,
"Column %s is not a short decimal",
descriptor);
int typeLength = descriptor.getPrimitiveType().getTypeLength();
checkArgument(typeLength > 0 && typeLength <= 16, "Expected column %s to have type length in range (1-16)", descriptor);
return new ValueDecoder<>()
{
private final ValueDecoder delegate = new BinaryDeltaByteArrayDecoder();
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
BinaryBuffer buffer = new BinaryBuffer(length);
delegate.read(buffer, 0, length);
// Each position in FIXED_LEN_BYTE_ARRAY has fixed length
int bytesOffset = 0;
int bytesLength = typeLength;
if (typeLength > Long.BYTES) {
bytesOffset = typeLength - Long.BYTES;
bytesLength = Long.BYTES;
}
byte[] inputBytes = buffer.asSlice().byteArray();
int[] offsets = buffer.getOffsets();
for (int i = 0; i < length; i++) {
int inputOffset = offsets[i];
checkBytesFitInShortDecimal(inputBytes, inputOffset, bytesOffset, descriptor);
values[offset + i] = getShortDecimalValue(inputBytes, inputOffset + bytesOffset, bytesLength);
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getRescaledLongDecimalDecoder(ParquetEncoding encoding)
{
DecimalType decimalType = (DecimalType) field.getType();
DecimalLogicalTypeAnnotation decimalAnnotation = (DecimalLogicalTypeAnnotation) field.getDescriptor().getPrimitiveType().getLogicalTypeAnnotation();
if (decimalAnnotation.getPrecision() <= Decimals.MAX_SHORT_PRECISION) {
ValueDecoder delegate = getShortDecimalDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
Int128 rescaled = DecimalConversions.shortToLongCast(
buffer[i],
decimalAnnotation.getPrecision(),
decimalAnnotation.getScale(),
decimalType.getPrecision(),
decimalType.getScale());
values[2 * (offset + i)] = rescaled.getHigh();
values[2 * (offset + i) + 1] = rescaled.getLow();
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
return new InlineTransformDecoder<>(
getLongDecimalDecoder(encoding),
(values, offset, length) -> {
int endOffset = (offset + length) * 2;
for (int currentOffset = offset * 2; currentOffset < endOffset; currentOffset += 2) {
Int128 rescaled = DecimalConversions.longToLongCast(
Int128.valueOf(values[currentOffset], values[currentOffset + 1]),
decimalAnnotation.getPrecision(),
decimalAnnotation.getScale(),
decimalType.getPrecision(),
decimalType.getScale());
values[currentOffset] = rescaled.getHigh();
values[currentOffset + 1] = rescaled.getLow();
}
});
}
public ValueDecoder getRescaledShortDecimalDecoder(ParquetEncoding encoding)
{
DecimalType decimalType = (DecimalType) field.getType();
DecimalLogicalTypeAnnotation decimalAnnotation = (DecimalLogicalTypeAnnotation) field.getDescriptor().getPrimitiveType().getLogicalTypeAnnotation();
if (decimalAnnotation.getPrecision() <= Decimals.MAX_SHORT_PRECISION) {
long rescale = longTenToNth(Math.abs(decimalType.getScale() - decimalAnnotation.getScale()));
return new InlineTransformDecoder<>(
getShortDecimalDecoder(encoding),
(values, offset, length) -> {
for (int i = offset; i < offset + length; i++) {
values[i] = DecimalConversions.shortToShortCast(
values[i],
decimalAnnotation.getPrecision(),
decimalAnnotation.getScale(),
decimalType.getPrecision(),
decimalType.getScale(),
rescale,
rescale / 2);
}
});
}
ValueDecoder delegate = getLongDecimalDecoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
long[] buffer = new long[2 * length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
values[offset + i] = DecimalConversions.longToShortCast(
Int128.valueOf(buffer[2 * i], buffer[2 * i + 1]),
decimalAnnotation.getPrecision(),
decimalAnnotation.getScale(),
decimalType.getPrecision(),
decimalType.getScale());
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt32ToShortDecimalDecoder(ParquetEncoding encoding)
{
DecimalType decimalType = (DecimalType) field.getType();
ValueDecoder delegate = getInt32Decoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
int[] buffer = new int[length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
if (overflows(buffer[i], decimalType.getPrecision())) {
throw new TrinoException(
INVALID_CAST_ARGUMENT,
format("Cannot read parquet INT32 value '%s' as DECIMAL(%s, %s)", buffer[i], decimalType.getPrecision(), decimalType.getScale()));
}
values[i + offset] = rescale(buffer[i], 0, decimalType.getScale());
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt32ToLongDecoder(ParquetEncoding encoding)
{
ValueDecoder delegate = getInt32Decoder(encoding);
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
int[] buffer = new int[length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
values[i + offset] = buffer[i];
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
public ValueDecoder getInt64ToIntDecoder(ParquetEncoding encoding)
{
return new LongToIntTransformDecoder(getLongDecoder(encoding));
}
public ValueDecoder getShortDecimalToIntDecoder(ParquetEncoding encoding)
{
return new LongToIntTransformDecoder(getShortDecimalDecoder(encoding));
}
public ValueDecoder getInt64ToShortDecoder(ParquetEncoding encoding)
{
return new LongToShortTransformDecoder(getLongDecoder(encoding));
}
public ValueDecoder getShortDecimalToShortDecoder(ParquetEncoding encoding)
{
return new LongToShortTransformDecoder(getShortDecimalDecoder(encoding));
}
public ValueDecoder getInt64ToByteDecoder(ParquetEncoding encoding)
{
return new LongToByteTransformDecoder(getLongDecoder(encoding));
}
public ValueDecoder getShortDecimalToByteDecoder(ParquetEncoding encoding)
{
return new LongToByteTransformDecoder(getShortDecimalDecoder(encoding));
}
public ValueDecoder getDeltaUuidDecoder(ParquetEncoding encoding)
{
checkArgument(encoding.equals(DELTA_BYTE_ARRAY), "encoding %s is not DELTA_BYTE_ARRAY", encoding);
ValueDecoder delegate = new BinaryDeltaByteArrayDecoder();
return new ValueDecoder<>()
{
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
BinaryBuffer buffer = new BinaryBuffer(length);
delegate.read(buffer, 0, length);
SimpleSliceInputStream binaryInput = new SimpleSliceInputStream(buffer.asSlice());
int endOffset = (offset + length) * 2;
for (int outputOffset = offset * 2; outputOffset < endOffset; outputOffset += 2) {
values[outputOffset] = binaryInput.readLong();
values[outputOffset + 1] = binaryInput.readLong();
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
};
}
private static class LongToIntTransformDecoder
implements ValueDecoder
{
private final ValueDecoder delegate;
private LongToIntTransformDecoder(ValueDecoder delegate)
{
this.delegate = delegate;
}
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(int[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
values[offset + i] = toIntExact(buffer[i]);
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
}
private static class LongToShortTransformDecoder
implements ValueDecoder
{
private final ValueDecoder delegate;
private LongToShortTransformDecoder(ValueDecoder delegate)
{
this.delegate = delegate;
}
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(short[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
values[offset + i] = toShortExact(buffer[i]);
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
}
private static class LongToByteTransformDecoder
implements ValueDecoder
{
private final ValueDecoder delegate;
private LongToByteTransformDecoder(ValueDecoder delegate)
{
this.delegate = delegate;
}
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(byte[] values, int offset, int length)
{
long[] buffer = new long[length];
delegate.read(buffer, 0, length);
for (int i = 0; i < length; i++) {
values[offset + i] = toByteExact(buffer[i]);
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
}
private static class BinaryToLongDecimalTransformDecoder
implements ValueDecoder
{
private final ValueDecoder delegate;
private BinaryToLongDecimalTransformDecoder(ValueDecoder delegate)
{
this.delegate = delegate;
}
@Override
public void init(SimpleSliceInputStream input)
{
delegate.init(input);
}
@Override
public void read(long[] values, int offset, int length)
{
BinaryBuffer buffer = new BinaryBuffer(length);
delegate.read(buffer, 0, length);
int[] offsets = buffer.getOffsets();
Slice binaryInput = buffer.asSlice();
for (int i = 0; i < length; i++) {
int positionOffset = offsets[i];
int positionLength = offsets[i + 1] - positionOffset;
Int128 value = Int128.fromBigEndian(binaryInput.getBytes(positionOffset, positionLength));
values[2 * (offset + i)] = value.getHigh();
values[2 * (offset + i) + 1] = value.getLow();
}
}
@Override
public void skip(int n)
{
delegate.skip(n);
}
}
private static class InlineTransformDecoder
implements ValueDecoder
{
private final ValueDecoder valueDecoder;
private final TypeTransform typeTransform;
private InlineTransformDecoder(ValueDecoder valueDecoder, TypeTransform typeTransform)
{
this.valueDecoder = requireNonNull(valueDecoder, "valueDecoder is null");
this.typeTransform = requireNonNull(typeTransform, "typeTransform is null");
}
@Override
public void init(SimpleSliceInputStream input)
{
valueDecoder.init(input);
}
@Override
public void read(T values, int offset, int length)
{
valueDecoder.read(values, offset, length);
typeTransform.process(values, offset, length);
}
@Override
public void skip(int n)
{
valueDecoder.skip(n);
}
}
private interface TypeTransform
{
void process(T values, int offset, int length);
}
private ValuesReader getApacheParquetReader(ParquetEncoding encoding)
{
return encoding.getValuesReader(field.getDescriptor(), VALUES);
}
private IllegalArgumentException wrongEncoding(ParquetEncoding encoding)
{
return new IllegalArgumentException("Wrong encoding " + encoding + " for column " + field.getDescriptor());
}
}