org.apache.iceberg.spark.data.SparkValueReaders Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-spark Show documentation
Show all versions of iceberg-spark Show documentation
A table format for huge analytic datasets
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.spark.data;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.avro.io.Decoder;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.avro.ValueReader;
import org.apache.iceberg.avro.ValueReaders;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.catalyst.util.ArrayBasedMapData;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.GenericArrayData;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.unsafe.types.UTF8String;
public class SparkValueReaders {
private SparkValueReaders() {}
static ValueReader strings() {
return StringReader.INSTANCE;
}
static ValueReader enums(List symbols) {
return new EnumReader(symbols);
}
static ValueReader uuids() {
return UUIDReader.INSTANCE;
}
static ValueReader decimal(ValueReader unscaledReader, int scale) {
return new DecimalReader(unscaledReader, scale);
}
static ValueReader array(ValueReader> elementReader) {
return new ArrayReader(elementReader);
}
static ValueReader arrayMap(ValueReader> keyReader,
ValueReader> valueReader) {
return new ArrayMapReader(keyReader, valueReader);
}
static ValueReader map(ValueReader> keyReader, ValueReader> valueReader) {
return new MapReader(keyReader, valueReader);
}
static ValueReader struct(List> readers, Types.StructType struct,
Map idToConstant) {
return new StructReader(readers, struct, idToConstant);
}
private static class StringReader implements ValueReader {
private static final StringReader INSTANCE = new StringReader();
private StringReader() {
}
@Override
public UTF8String read(Decoder decoder, Object reuse) throws IOException {
// use the decoder's readString(Utf8) method because it may be a resolving decoder
Utf8 utf8 = null;
if (reuse instanceof UTF8String) {
utf8 = new Utf8(((UTF8String) reuse).getBytes());
}
Utf8 string = decoder.readString(utf8);
return UTF8String.fromBytes(string.getBytes(), 0, string.getByteLength());
// int length = decoder.readInt();
// byte[] bytes = new byte[length];
// decoder.readFixed(bytes, 0, length);
// return UTF8String.fromBytes(bytes);
}
}
private static class EnumReader implements ValueReader {
private final UTF8String[] symbols;
private EnumReader(List symbols) {
this.symbols = new UTF8String[symbols.size()];
for (int i = 0; i < this.symbols.length; i += 1) {
this.symbols[i] = UTF8String.fromBytes(symbols.get(i).getBytes(StandardCharsets.UTF_8));
}
}
@Override
public UTF8String read(Decoder decoder, Object ignore) throws IOException {
int index = decoder.readEnum();
return symbols[index];
}
}
private static class UUIDReader implements ValueReader {
private static final ThreadLocal BUFFER = ThreadLocal.withInitial(() -> {
ByteBuffer buffer = ByteBuffer.allocate(16);
buffer.order(ByteOrder.BIG_ENDIAN);
return buffer;
});
private static final UUIDReader INSTANCE = new UUIDReader();
private UUIDReader() {
}
@Override
public UTF8String read(Decoder decoder, Object reuse) throws IOException {
ByteBuffer buffer = BUFFER.get();
buffer.rewind();
decoder.readFixed(buffer.array(), 0, 16);
long mostSigBits = buffer.getLong();
long leastSigBits = buffer.getLong();
return UTF8String.fromString(new UUID(mostSigBits, leastSigBits).toString());
}
}
private static class DecimalReader implements ValueReader {
private final ValueReader bytesReader;
private final int scale;
private DecimalReader(ValueReader bytesReader, int scale) {
this.bytesReader = bytesReader;
this.scale = scale;
}
@Override
public Decimal read(Decoder decoder, Object reuse) throws IOException {
byte[] bytes = bytesReader.read(decoder, null);
return Decimal.apply(new BigDecimal(new BigInteger(bytes), scale));
}
}
private static class ArrayReader implements ValueReader {
private final ValueReader> elementReader;
private final List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy