All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.spark.data.SparkValueReaders Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg.spark.data;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import org.apache.avro.io.Decoder;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.avro.ValueReader;
import org.apache.iceberg.avro.ValueReaders;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.UUIDUtil;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
import org.apache.spark.sql.catalyst.util.ArrayBasedMapData;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.GenericArrayData;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.unsafe.types.UTF8String;

public class SparkValueReaders {

  private SparkValueReaders() {}

  static ValueReader strings() {
    return StringReader.INSTANCE;
  }

  static ValueReader enums(List symbols) {
    return new EnumReader(symbols);
  }

  static ValueReader uuids() {
    return UUIDReader.INSTANCE;
  }

  static ValueReader decimal(ValueReader unscaledReader, int scale) {
    return new DecimalReader(unscaledReader, scale);
  }

  static ValueReader array(ValueReader elementReader) {
    return new ArrayReader(elementReader);
  }

  static ValueReader arrayMap(
      ValueReader keyReader, ValueReader valueReader) {
    return new ArrayMapReader(keyReader, valueReader);
  }

  static ValueReader map(ValueReader keyReader, ValueReader valueReader) {
    return new MapReader(keyReader, valueReader);
  }

  static ValueReader struct(
      List> readers, Types.StructType struct, Map idToConstant) {
    return new StructReader(readers, struct, idToConstant);
  }

  private static class StringReader implements ValueReader {
    private static final StringReader INSTANCE = new StringReader();

    private StringReader() {}

    @Override
    public UTF8String read(Decoder decoder, Object reuse) throws IOException {
      // use the decoder's readString(Utf8) method because it may be a resolving decoder
      Utf8 utf8 = null;
      if (reuse instanceof UTF8String) {
        utf8 = new Utf8(((UTF8String) reuse).getBytes());
      }

      Utf8 string = decoder.readString(utf8);
      return UTF8String.fromBytes(string.getBytes(), 0, string.getByteLength());
    }
  }

  private static class EnumReader implements ValueReader {
    private final UTF8String[] symbols;

    private EnumReader(List symbols) {
      this.symbols = new UTF8String[symbols.size()];
      for (int i = 0; i < this.symbols.length; i += 1) {
        this.symbols[i] = UTF8String.fromBytes(symbols.get(i).getBytes(StandardCharsets.UTF_8));
      }
    }

    @Override
    public UTF8String read(Decoder decoder, Object ignore) throws IOException {
      int index = decoder.readEnum();
      return symbols[index];
    }
  }

  private static class UUIDReader implements ValueReader {
    private static final ThreadLocal BUFFER =
        ThreadLocal.withInitial(
            () -> {
              ByteBuffer buffer = ByteBuffer.allocate(16);
              buffer.order(ByteOrder.BIG_ENDIAN);
              return buffer;
            });

    private static final UUIDReader INSTANCE = new UUIDReader();

    private UUIDReader() {}

    @Override
    @SuppressWarnings("ByteBufferBackingArray")
    public UTF8String read(Decoder decoder, Object reuse) throws IOException {
      ByteBuffer buffer = BUFFER.get();
      buffer.rewind();

      decoder.readFixed(buffer.array(), 0, 16);

      return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
    }
  }

  private static class DecimalReader implements ValueReader {
    private final ValueReader bytesReader;
    private final int scale;

    private DecimalReader(ValueReader bytesReader, int scale) {
      this.bytesReader = bytesReader;
      this.scale = scale;
    }

    @Override
    public Decimal read(Decoder decoder, Object reuse) throws IOException {
      byte[] bytes = bytesReader.read(decoder, null);
      return Decimal.apply(new BigDecimal(new BigInteger(bytes), scale));
    }
  }

  private static class ArrayReader implements ValueReader {
    private final ValueReader elementReader;
    private final List reusedList = Lists.newArrayList();

    private ArrayReader(ValueReader elementReader) {
      this.elementReader = elementReader;
    }

    @Override
    public GenericArrayData read(Decoder decoder, Object reuse) throws IOException {
      reusedList.clear();
      long chunkLength = decoder.readArrayStart();

      while (chunkLength > 0) {
        for (int i = 0; i < chunkLength; i += 1) {
          reusedList.add(elementReader.read(decoder, null));
        }

        chunkLength = decoder.arrayNext();
      }

      // this will convert the list to an array so it is okay to reuse the list
      return new GenericArrayData(reusedList.toArray());
    }
  }

  private static class ArrayMapReader implements ValueReader {
    private final ValueReader keyReader;
    private final ValueReader valueReader;

    private final List reusedKeyList = Lists.newArrayList();
    private final List reusedValueList = Lists.newArrayList();

    private ArrayMapReader(ValueReader keyReader, ValueReader valueReader) {
      this.keyReader = keyReader;
      this.valueReader = valueReader;
    }

    @Override
    public ArrayBasedMapData read(Decoder decoder, Object reuse) throws IOException {
      reusedKeyList.clear();
      reusedValueList.clear();

      long chunkLength = decoder.readArrayStart();

      while (chunkLength > 0) {
        for (int i = 0; i < chunkLength; i += 1) {
          reusedKeyList.add(keyReader.read(decoder, null));
          reusedValueList.add(valueReader.read(decoder, null));
        }

        chunkLength = decoder.arrayNext();
      }

      return new ArrayBasedMapData(
          new GenericArrayData(reusedKeyList.toArray()),
          new GenericArrayData(reusedValueList.toArray()));
    }
  }

  private static class MapReader implements ValueReader {
    private final ValueReader keyReader;
    private final ValueReader valueReader;

    private final List reusedKeyList = Lists.newArrayList();
    private final List reusedValueList = Lists.newArrayList();

    private MapReader(ValueReader keyReader, ValueReader valueReader) {
      this.keyReader = keyReader;
      this.valueReader = valueReader;
    }

    @Override
    public ArrayBasedMapData read(Decoder decoder, Object reuse) throws IOException {
      reusedKeyList.clear();
      reusedValueList.clear();

      long chunkLength = decoder.readMapStart();

      while (chunkLength > 0) {
        for (int i = 0; i < chunkLength; i += 1) {
          reusedKeyList.add(keyReader.read(decoder, null));
          reusedValueList.add(valueReader.read(decoder, null));
        }

        chunkLength = decoder.mapNext();
      }

      return new ArrayBasedMapData(
          new GenericArrayData(reusedKeyList.toArray()),
          new GenericArrayData(reusedValueList.toArray()));
    }
  }

  static class StructReader extends ValueReaders.StructReader {
    private final int numFields;

    protected StructReader(
        List> readers, Types.StructType struct, Map idToConstant) {
      super(readers, struct, idToConstant);
      this.numFields = readers.size();
    }

    @Override
    protected InternalRow reuseOrCreate(Object reuse) {
      if (reuse instanceof GenericInternalRow
          && ((GenericInternalRow) reuse).numFields() == numFields) {
        return (InternalRow) reuse;
      }
      return new GenericInternalRow(numFields);
    }

    @Override
    protected Object get(InternalRow struct, int pos) {
      return null;
    }

    @Override
    protected void set(InternalRow struct, int pos, Object value) {
      if (value != null) {
        struct.update(pos, value);
      } else {
        struct.setNullAt(pos);
      }
    }
  }
}