All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.catalyst.expressions.codegen;

import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.array.ByteArrayMethods;

/**
 * A helper class to manage the data buffer for an unsafe row.  The data buffer can grow and
 * automatically re-point the unsafe row to it.
 *
 * This class can be used to build a one-pass unsafe row writing program, i.e. data will be written
 * to the data buffer directly and no extra copy is needed.  There should be only one instance of
 * this class per writing program, so that the memory segment/data buffer can be reused.  Note that
 * for each incoming record, we should call `reset` of BufferHolder instance before write the record
 * and reuse the data buffer.
 */
final class BufferHolder {

  private static final int ARRAY_MAX = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH;

  // buffer is guarantee to be word-aligned since UnsafeRow assumes each field is word-aligned.
  private byte[] buffer;
  private int cursor = Platform.BYTE_ARRAY_OFFSET;
  private final UnsafeRow row;
  private final int fixedSize;

  BufferHolder(UnsafeRow row) {
    this(row, 64);
  }

  BufferHolder(UnsafeRow row, int initialSize) {
    int bitsetWidthInBytes = UnsafeRow.calculateBitSetWidthInBytes(row.numFields());
    if (row.numFields() > (ARRAY_MAX - initialSize - bitsetWidthInBytes) / 8) {
      throw new UnsupportedOperationException(
        "Cannot create BufferHolder for input UnsafeRow because there are " +
          "too many fields (number of fields: " + row.numFields() + ")");
    }
    this.fixedSize = bitsetWidthInBytes + 8 * row.numFields();
    int roundedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(fixedSize + initialSize);
    this.buffer = new byte[roundedSize];
    this.row = row;
    this.row.pointTo(buffer, buffer.length);
  }

  /**
   * Grows the buffer by at least neededSize and points the row to the buffer.
   */
  void grow(int neededSize) {
    if (neededSize < 0) {
      throw new IllegalArgumentException(
        "Cannot grow BufferHolder by size " + neededSize + " because the size is negative");
    }
    if (neededSize > ARRAY_MAX - totalSize()) {
      throw new IllegalArgumentException(
        "Cannot grow BufferHolder by size " + neededSize + " because the size after growing " +
          "exceeds size limitation " + ARRAY_MAX);
    }
    final int length = totalSize() + neededSize;
    if (buffer.length < length) {
      // This will not happen frequently, because the buffer is re-used.
      int newLength = length < ARRAY_MAX / 2 ? length * 2 : ARRAY_MAX;
      int roundedSize = ByteArrayMethods.roundNumberOfBytesToNearestWord(newLength);
      final byte[] tmp = new byte[roundedSize];
      Platform.copyMemory(
        buffer,
        Platform.BYTE_ARRAY_OFFSET,
        tmp,
        Platform.BYTE_ARRAY_OFFSET,
        totalSize());
      buffer = tmp;
      row.pointTo(buffer, buffer.length);
    }
  }

  byte[] getBuffer() {
    return buffer;
  }

  int getCursor() {
    return cursor;
  }

  void increaseCursor(int val) {
    cursor += val;
  }

  void reset() {
    cursor = Platform.BYTE_ARRAY_OFFSET + fixedSize;
  }

  int totalSize() {
    return cursor - Platform.BYTE_ARRAY_OFFSET;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy