All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.column.values.bytestreamsplit.ByteStreamSplitValuesWriter Maven / Gradle / Ivy

There is a newer version: 1.15.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.column.values.bytestreamsplit;

import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.bytes.BytesUtils;
import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.values.ValuesWriter;
import org.apache.parquet.io.ParquetEncodingException;
import org.apache.parquet.io.api.Binary;

public abstract class ByteStreamSplitValuesWriter extends ValuesWriter {

  protected final int numStreams;
  protected final int elementSizeInBytes;
  private final CapacityByteArrayOutputStream[] byteStreams;

  public ByteStreamSplitValuesWriter(
      int elementSizeInBytes, int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
    if (elementSizeInBytes <= 0) {
      throw new ParquetEncodingException(String.format("Element byte size is invalid: %d", elementSizeInBytes));
    }
    this.numStreams = elementSizeInBytes;
    this.elementSizeInBytes = elementSizeInBytes;
    this.byteStreams = new CapacityByteArrayOutputStream[elementSizeInBytes];

    // Round-up the capacity hint.
    final int capacityPerStream = (pageSize + this.numStreams - 1) / this.numStreams;
    final int initialCapacityPerStream = (initialCapacity + this.numStreams - 1) / this.numStreams;
    for (int i = 0; i < this.numStreams; ++i) {
      this.byteStreams[i] =
          new CapacityByteArrayOutputStream(initialCapacityPerStream, capacityPerStream, allocator);
    }
  }

  @Override
  public long getBufferedSize() {
    long totalSize = 0;
    for (CapacityByteArrayOutputStream stream : this.byteStreams) {
      totalSize += stream.size();
    }
    return totalSize;
  }

  @Override
  public BytesInput getBytes() {
    BytesInput[] allInputs = new BytesInput[this.numStreams];
    for (int i = 0; i < this.numStreams; ++i) {
      allInputs[i] = BytesInput.from(this.byteStreams[i]);
    }
    return BytesInput.concat(allInputs);
  }

  @Override
  public Encoding getEncoding() {
    return Encoding.BYTE_STREAM_SPLIT;
  }

  @Override
  public void reset() {
    for (CapacityByteArrayOutputStream stream : this.byteStreams) {
      stream.reset();
    }
  }

  @Override
  public void close() {
    for (CapacityByteArrayOutputStream stream : byteStreams) {
      stream.close();
    }
  }

  protected void scatterBytes(byte[] bytes) {
    if (bytes.length != this.numStreams) {
      throw new ParquetEncodingException(String.format(
          "Number of bytes doesn't match the number of streams. Num butes: %d, Num streams: %d",
          bytes.length, this.numStreams));
    }
    for (int i = 0; i < bytes.length; ++i) {
      this.byteStreams[i].write(bytes[i]);
    }
  }

  @Override
  public long getAllocatedSize() {
    long totalCapacity = 0;
    for (CapacityByteArrayOutputStream stream : byteStreams) {
      totalCapacity += stream.getCapacity();
    }
    return totalCapacity;
  }

  public static class FloatByteStreamSplitValuesWriter extends ByteStreamSplitValuesWriter {

    public FloatByteStreamSplitValuesWriter(int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
      super(Float.BYTES, initialCapacity, pageSize, allocator);
    }

    @Override
    public void writeFloat(float v) {
      super.scatterBytes(BytesUtils.intToBytes(Float.floatToIntBits(v)));
    }

    @Override
    public String memUsageString(String prefix) {
      return String.format("%s FloatByteStreamSplitWriter %d bytes", prefix, getAllocatedSize());
    }
  }

  public static class DoubleByteStreamSplitValuesWriter extends ByteStreamSplitValuesWriter {

    public DoubleByteStreamSplitValuesWriter(int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
      super(Double.BYTES, initialCapacity, pageSize, allocator);
    }

    @Override
    public void writeDouble(double v) {
      super.scatterBytes(BytesUtils.longToBytes(Double.doubleToLongBits(v)));
    }

    @Override
    public String memUsageString(String prefix) {
      return String.format("%s DoubleByteStreamSplitWriter %d bytes", prefix, getAllocatedSize());
    }
  }

  public static class IntegerByteStreamSplitValuesWriter extends ByteStreamSplitValuesWriter {
    public IntegerByteStreamSplitValuesWriter(int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
      super(4, initialCapacity, pageSize, allocator);
    }

    @Override
    public void writeInteger(int v) {
      super.scatterBytes(BytesUtils.intToBytes(v));
    }

    @Override
    public String memUsageString(String prefix) {
      return String.format("%s IntegerByteStreamSplitWriter %d bytes", prefix, getAllocatedSize());
    }
  }

  public static class LongByteStreamSplitValuesWriter extends ByteStreamSplitValuesWriter {
    public LongByteStreamSplitValuesWriter(int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
      super(8, initialCapacity, pageSize, allocator);
    }

    @Override
    public void writeLong(long v) {
      super.scatterBytes(BytesUtils.longToBytes(v));
    }

    @Override
    public String memUsageString(String prefix) {
      return String.format("%s LongByteStreamSplitWriter %d bytes", prefix, getAllocatedSize());
    }
  }

  public static class FixedLenByteArrayByteStreamSplitValuesWriter extends ByteStreamSplitValuesWriter {
    private final int length;

    public FixedLenByteArrayByteStreamSplitValuesWriter(
        int length, int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
      super(length, initialCapacity, pageSize, allocator);
      this.length = length;
    }

    @Override
    public final void writeBytes(Binary v) {
      assert (v.length() == length)
          : ("Fixed Binary size " + v.length() + " does not match field type length " + length);
      super.scatterBytes(v.getBytesUnsafe());
    }

    @Override
    public String memUsageString(String prefix) {
      return String.format(
          "%s FixedLenByteArrayByteStreamSplitValuesWriter %d bytes", prefix, getAllocatedSize());
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy