All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.spark.data.SparkValueWriters Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg.spark.data;

import java.io.IOException;
import java.lang.reflect.Array;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
import java.util.UUID;
import org.apache.avro.io.Encoder;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.avro.ValueWriter;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.util.DecimalUtil;
import org.apache.iceberg.util.UUIDUtil;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.util.ArrayData;
import org.apache.spark.sql.catalyst.util.MapData;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Decimal;
import org.apache.spark.unsafe.types.UTF8String;

public class SparkValueWriters {

  private SparkValueWriters() {}

  static ValueWriter strings() {
    return StringWriter.INSTANCE;
  }

  static ValueWriter uuids() {
    return UUIDWriter.INSTANCE;
  }

  static ValueWriter decimal(int precision, int scale) {
    return new DecimalWriter(precision, scale);
  }

  static  ValueWriter array(ValueWriter elementWriter, DataType elementType) {
    return new ArrayWriter<>(elementWriter, elementType);
  }

  static  ValueWriter arrayMap(
      ValueWriter keyWriter, DataType keyType, ValueWriter valueWriter, DataType valueType) {
    return new ArrayMapWriter<>(keyWriter, keyType, valueWriter, valueType);
  }

  static  ValueWriter map(
      ValueWriter keyWriter, DataType keyType, ValueWriter valueWriter, DataType valueType) {
    return new MapWriter<>(keyWriter, keyType, valueWriter, valueType);
  }

  static ValueWriter struct(List> writers, List types) {
    return new StructWriter(writers, types);
  }

  private static class StringWriter implements ValueWriter {
    private static final StringWriter INSTANCE = new StringWriter();

    private StringWriter() {}

    @Override
    public void write(UTF8String s, Encoder encoder) throws IOException {
      // use getBytes because it may return the backing byte array if available.
      // otherwise, it copies to a new byte array, which is still cheaper than Avro
      // calling toString, which incurs encoding costs
      encoder.writeString(new Utf8(s.getBytes()));
    }
  }

  private static class UUIDWriter implements ValueWriter {
    private static final ThreadLocal BUFFER =
        ThreadLocal.withInitial(
            () -> {
              ByteBuffer buffer = ByteBuffer.allocate(16);
              buffer.order(ByteOrder.BIG_ENDIAN);
              return buffer;
            });

    private static final UUIDWriter INSTANCE = new UUIDWriter();

    private UUIDWriter() {}

    @Override
    @SuppressWarnings("ByteBufferBackingArray")
    public void write(UTF8String s, Encoder encoder) throws IOException {
      // TODO: direct conversion from string to byte buffer
      UUID uuid = UUID.fromString(s.toString());
      // calling array() is safe because the buffer is always allocated by the thread-local
      encoder.writeFixed(UUIDUtil.convertToByteBuffer(uuid, BUFFER.get()).array());
    }
  }

  private static class DecimalWriter implements ValueWriter {
    private final int precision;
    private final int scale;
    private final ThreadLocal bytes;

    private DecimalWriter(int precision, int scale) {
      this.precision = precision;
      this.scale = scale;
      this.bytes =
          ThreadLocal.withInitial(() -> new byte[TypeUtil.decimalRequiredBytes(precision)]);
    }

    @Override
    public void write(Decimal d, Encoder encoder) throws IOException {
      encoder.writeFixed(
          DecimalUtil.toReusedFixLengthBytes(precision, scale, d.toJavaBigDecimal(), bytes.get()));
    }
  }

  private static class ArrayWriter implements ValueWriter {
    private final ValueWriter elementWriter;
    private final DataType elementType;

    private ArrayWriter(ValueWriter elementWriter, DataType elementType) {
      this.elementWriter = elementWriter;
      this.elementType = elementType;
    }

    @Override
    @SuppressWarnings("unchecked")
    public void write(ArrayData array, Encoder encoder) throws IOException {
      encoder.writeArrayStart();
      int numElements = array.numElements();
      encoder.setItemCount(numElements);
      for (int i = 0; i < numElements; i += 1) {
        encoder.startItem();
        elementWriter.write((T) array.get(i, elementType), encoder);
      }
      encoder.writeArrayEnd();
    }
  }

  private static class ArrayMapWriter implements ValueWriter {
    private final ValueWriter keyWriter;
    private final ValueWriter valueWriter;
    private final DataType keyType;
    private final DataType valueType;

    private ArrayMapWriter(
        ValueWriter keyWriter,
        DataType keyType,
        ValueWriter valueWriter,
        DataType valueType) {
      this.keyWriter = keyWriter;
      this.keyType = keyType;
      this.valueWriter = valueWriter;
      this.valueType = valueType;
    }

    @Override
    @SuppressWarnings("unchecked")
    public void write(MapData map, Encoder encoder) throws IOException {
      encoder.writeArrayStart();
      int numElements = map.numElements();
      encoder.setItemCount(numElements);
      ArrayData keyArray = map.keyArray();
      ArrayData valueArray = map.valueArray();
      for (int i = 0; i < numElements; i += 1) {
        encoder.startItem();
        keyWriter.write((K) keyArray.get(i, keyType), encoder);
        valueWriter.write((V) valueArray.get(i, valueType), encoder);
      }
      encoder.writeArrayEnd();
    }
  }

  private static class MapWriter implements ValueWriter {
    private final ValueWriter keyWriter;
    private final ValueWriter valueWriter;
    private final DataType keyType;
    private final DataType valueType;

    private MapWriter(
        ValueWriter keyWriter,
        DataType keyType,
        ValueWriter valueWriter,
        DataType valueType) {
      this.keyWriter = keyWriter;
      this.keyType = keyType;
      this.valueWriter = valueWriter;
      this.valueType = valueType;
    }

    @Override
    @SuppressWarnings("unchecked")
    public void write(MapData map, Encoder encoder) throws IOException {
      encoder.writeMapStart();
      int numElements = map.numElements();
      encoder.setItemCount(numElements);
      ArrayData keyArray = map.keyArray();
      ArrayData valueArray = map.valueArray();
      for (int i = 0; i < numElements; i += 1) {
        encoder.startItem();
        keyWriter.write((K) keyArray.get(i, keyType), encoder);
        valueWriter.write((V) valueArray.get(i, valueType), encoder);
      }
      encoder.writeMapEnd();
    }
  }

  static class StructWriter implements ValueWriter {
    private final ValueWriter[] writers;
    private final DataType[] types;

    @SuppressWarnings("unchecked")
    private StructWriter(List> writers, List types) {
      this.writers = (ValueWriter[]) Array.newInstance(ValueWriter.class, writers.size());
      this.types = new DataType[writers.size()];
      for (int i = 0; i < writers.size(); i += 1) {
        this.writers[i] = writers.get(i);
        this.types[i] = types.get(i);
      }
    }

    ValueWriter[] writers() {
      return writers;
    }

    @Override
    public void write(InternalRow row, Encoder encoder) throws IOException {
      for (int i = 0; i < types.length; i += 1) {
        if (row.isNullAt(i)) {
          writers[i].write(null, encoder);
        } else {
          write(row, i, writers[i], encoder);
        }
      }
    }

    @SuppressWarnings("unchecked")
    private  void write(InternalRow row, int pos, ValueWriter writer, Encoder encoder)
        throws IOException {
      writer.write((T) row.get(pos, types[pos]), encoder);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy