All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.arrow.vector.BitVector Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.arrow.vector;

import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.complex.impl.BitReaderImpl;
import org.apache.arrow.vector.complex.reader.FieldReader;
import org.apache.arrow.vector.holders.BitHolder;
import org.apache.arrow.vector.holders.NullableBitHolder;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.arrow.vector.util.TransferPair;

import io.netty.buffer.ArrowBuf;

/**
 * BitVector implements a fixed width (1 bit) vector of
 * boolean values which could be null. Each value in the vector corresponds
 * to a single bit in the underlying data stream backing the vector.
 */
public final class BitVector extends BaseFixedWidthVector {

  private static final int HASH_CODE_FOR_ZERO = 17;

  private static final int HASH_CODE_FOR_ONE = 19;

  private final FieldReader reader;

  /**
   * Instantiate a BitVector. This doesn't allocate any memory for
   * the data in vector.
   *
   * @param name      name of the vector
   * @param allocator allocator for memory management.
   */
  public BitVector(String name, BufferAllocator allocator) {
    this(name, FieldType.nullable(MinorType.BIT.getType()), allocator);
  }

  /**
   * Instantiate a BitVector. This doesn't allocate any memory for
   * the data in vector.
   *
   * @param name      name of the vector
   * @param fieldType type of Field materialized by this vector
   * @param allocator allocator for memory management.
   */
  public BitVector(String name, FieldType fieldType, BufferAllocator allocator) {
    this(new Field(name, fieldType, null), allocator);
  }

  /**
   * Instantiate a BitVector. This doesn't allocate any memory for
   * the data in vector.
   *
   * @param field the Field materialized by this vector
   * @param allocator allocator for memory management.
   */
  public BitVector(Field field, BufferAllocator allocator) {
    super(field, allocator,0);
    reader = new BitReaderImpl(BitVector.this);
  }

  /**
   * Get a reader that supports reading values from this vector.
   *
   * @return Field Reader for this vector
   */
  @Override
  public FieldReader getReader() {
    return reader;
  }

  /**
   * Get minor type for this vector. The vector holds values belonging
   * to a particular type.
   *
   * @return {@link org.apache.arrow.vector.types.Types.MinorType}
   */
  @Override
  public MinorType getMinorType() {
    return MinorType.BIT;
  }

  /**
   * Sets the desired value capacity for the vector. This function doesn't
   * allocate any memory for the vector.
   *
   * @param valueCount desired number of elements in the vector
   */
  @Override
  public void setInitialCapacity(int valueCount) {
    final int size = getValidityBufferSizeFromCount(valueCount);
    if (size * 2 > MAX_ALLOCATION_SIZE) {
      throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
    }
    lastValueCapacity = valueCount;
  }

  /**
   * Get the current value capacity for the vector.
   *
   * @return number of elements that vector can hold.
   */
  @Override
  public int getValueCapacity() {
    return capAtMaxInt(validityBuffer.capacity() * 8);
  }

  /**
   * Get the potential buffer size for a particular number of records.
   *
   * @param count desired number of elements in the vector
   * @return estimated size of underlying buffers if the vector holds
   *         a given number of elements
   */
  @Override
  public int getBufferSizeFor(final int count) {
    if (count == 0) {
      return 0;
    }
    return 2 * getValidityBufferSizeFromCount(count);
  }

  /**
   * Get the size (number of bytes) of underlying buffers used by this vector.
   *
   * @return size of underlying buffers.
   */
  @Override
  public int getBufferSize() {
    return getBufferSizeFor(valueCount);
  }

  /**
   * Slice this vector at desired index and length and transfer the
   * corresponding data to the target vector.
   *
   * @param startIndex start position of the split in source vector.
   * @param length     length of the split.
   * @param target     destination vector
   */
  public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) {
    Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
        "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
    compareTypes(target, "splitAndTransferTo");
    target.clear();
    target.validityBuffer = splitAndTransferBuffer(startIndex, length, target,
            validityBuffer, target.validityBuffer);
    target.valueBuffer = splitAndTransferBuffer(startIndex, length, target,
            valueBuffer, target.valueBuffer);

    target.setValueCount(length);
  }

  private ArrowBuf splitAndTransferBuffer(
      int startIndex,
      int length,
      BaseFixedWidthVector target,
      ArrowBuf sourceBuffer,
      ArrowBuf destBuffer) {
    int firstByteSource = BitVectorHelper.byteIndex(startIndex);
    int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
    int byteSizeTarget = getValidityBufferSizeFromCount(length);
    int offset = startIndex % 8;

    if (length > 0) {
      if (offset == 0) {
        /* slice */
        if (destBuffer != null) {
          destBuffer.getReferenceManager().release();
        }
        destBuffer = sourceBuffer.slice(firstByteSource, byteSizeTarget);
        destBuffer.getReferenceManager().retain(1);
      } else {
        /* Copy data
         * When the first bit starts from the middle of a byte (offset != 0),
         * copy data from src BitVector.
         * Each byte in the target is composed by a part in i-th byte,
         * another part in (i+1)-th byte.
         */
        destBuffer = allocator.buffer(byteSizeTarget);
        destBuffer.readerIndex(0);
        destBuffer.setZero(0, destBuffer.capacity());

        for (int i = 0; i < byteSizeTarget - 1; i++) {
          byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer, firstByteSource + i, offset);
          byte b2 = BitVectorHelper.getBitsFromNextByte(sourceBuffer, firstByteSource + i + 1, offset);

          destBuffer.setByte(i, (b1 + b2));
        }

        /* Copying the last piece is done in the following manner:
         * if the source vector has 1 or more bytes remaining, we copy
         * the last piece as a byte formed by shifting data
         * from the current byte and the next byte.
         *
         * if the source vector has no more bytes remaining
         * (we are at the last byte), we copy the last piece as a byte
         * by shifting data from the current byte.
         */
        if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
          byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer,
                  firstByteSource + byteSizeTarget - 1, offset);
          byte b2 = BitVectorHelper.getBitsFromNextByte(sourceBuffer,
                  firstByteSource + byteSizeTarget, offset);

          destBuffer.setByte(byteSizeTarget - 1, b1 + b2);
        } else {
          byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer,
                  firstByteSource + byteSizeTarget - 1, offset);
          destBuffer.setByte(byteSizeTarget - 1, b1);
        }
      }
    }

    return destBuffer;
  }


  /*----------------------------------------------------------------*
   |                                                                |
   |          vector value retrieval methods                        |
   |                                                                |
   *----------------------------------------------------------------*/

  private int getBit(int index) {
    final int byteIndex = index >> 3;
    final byte b = valueBuffer.getByte(byteIndex);
    final int bitIndex = index & 7;
    return (b >> bitIndex) & 0x01;
  }

  /**
   * Get the element at the given index from the vector.
   *
   * @param index position of element
   * @return element at given index
   */
  public int get(int index) throws IllegalStateException {
    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
      throw new IllegalStateException("Value at index is null");
    }
    return getBit(index);
  }

  /**
   * Get the element at the given index from the vector and
   * sets the state in holder. If element at given index
   * is null, holder.isSet will be zero.
   *
   * @param index position of element
   */
  public void get(int index, NullableBitHolder holder) {
    if (isSet(index) == 0) {
      holder.isSet = 0;
      return;
    }
    holder.isSet = 1;
    holder.value = getBit(index);
  }

  /**
   * Same as {@link #get(int)}.
   *
   * @param index position of element
   * @return element at given index
   */
  public Boolean getObject(int index) {
    if (isSet(index) == 0) {
      return null;
    } else {
      return new Boolean(getBit(index) != 0);
    }
  }

  /**
   * Copy a cell value from a particular index in source vector to a particular
   * position in this vector.
   *
   * @param fromIndex position to copy from in source vector
   * @param thisIndex position to copy to in this vector
   * @param from      source vector
   */
  @Override
  public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
    Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
    boolean fromIsSet = BitVectorHelper.get(from.getValidityBuffer(), fromIndex) != 0;
    if (fromIsSet) {
      BitVectorHelper.setBit(validityBuffer, thisIndex);
      BitVectorHelper.setValidityBit(valueBuffer, thisIndex, ((BitVector) from).getBit(fromIndex));
    } else {
      BitVectorHelper.unsetBit(validityBuffer, thisIndex);
    }
  }

  /*----------------------------------------------------------------*
   |                                                                |
   |          vector value setter methods                           |
   |                                                                |
   *----------------------------------------------------------------*/


  /**
   * Set the element at the given index to the given value.
   *
   * @param index position of element
   * @param value value of element
   */
  public void set(int index, int value) {
    BitVectorHelper.setBit(validityBuffer, index);
    if (value != 0) {
      BitVectorHelper.setBit(valueBuffer, index);
    } else {
      BitVectorHelper.unsetBit(valueBuffer, index);
    }
  }

  /**
   * Set the element at the given index to the value set in data holder.
   * If the value in holder is not indicated as set, element in the
   * at the given index will be null.
   *
   * @param index  position of element
   * @param holder nullable data holder for value of element
   */
  public void set(int index, NullableBitHolder holder) throws IllegalArgumentException {
    if (holder.isSet < 0) {
      throw new IllegalArgumentException();
    } else if (holder.isSet > 0) {
      BitVectorHelper.setBit(validityBuffer, index);
      if (holder.value != 0) {
        BitVectorHelper.setBit(valueBuffer, index);
      } else {
        BitVectorHelper.unsetBit(valueBuffer, index);
      }
    } else {
      BitVectorHelper.unsetBit(validityBuffer, index);
    }
  }

  /**
   * Set the element at the given index to the value set in data holder.
   *
   * @param index  position of element
   * @param holder data holder for value of element
   */
  public void set(int index, BitHolder holder) {
    BitVectorHelper.setBit(validityBuffer, index);
    if (holder.value != 0) {
      BitVectorHelper.setBit(valueBuffer, index);
    } else {
      BitVectorHelper.unsetBit(valueBuffer, index);
    }
  }

  /**
   * Same as {@link #set(int, int)} except that it handles the
   * case when index is greater than or equal to existing
   * value capacity {@link #getValueCapacity()}.
   *
   * @param index position of element
   * @param value value of element
   */
  public void setSafe(int index, int value) {
    handleSafe(index);
    set(index, value);
  }

  /**
   * Same as {@link #set(int, NullableBitHolder)} except that it handles the
   * case when index is greater than or equal to existing
   * value capacity {@link #getValueCapacity()}.
   *
   * @param index  position of element
   * @param holder nullable data holder for value of element
   */
  public void setSafe(int index, NullableBitHolder holder) throws IllegalArgumentException {
    handleSafe(index);
    set(index, holder);
  }

  /**
   * Same as {@link #set(int, BitHolder)} except that it handles the
   * case when index is greater than or equal to existing
   * value capacity {@link #getValueCapacity()}.
   *
   * @param index  position of element
   * @param holder data holder for value of element
   */
  public void setSafe(int index, BitHolder holder) {
    handleSafe(index);
    set(index, holder);
  }

  /**
   * Store the given value at a particular position in the vector. isSet indicates
   * whether the value is NULL or not.
   *
   * @param index position of the new value
   * @param isSet 0 for NULL value, 1 otherwise
   * @param value element value
   */
  public void set(int index, int isSet, int value) {
    if (isSet > 0) {
      set(index, value);
    } else {
      BitVectorHelper.unsetBit(validityBuffer, index);
    }
  }

  /**
   * Same as {@link #set(int, int, int)} except that it handles the case
   * when index is greater than or equal to current value capacity of the
   * vector.
   *
   * @param index position of the new value
   * @param isSet 0 for NULL value, 1 otherwise
   * @param value element value
   */
  public void setSafe(int index, int isSet, int value) {
    handleSafe(index);
    set(index, isSet, value);
  }

  /**
   * Set the element at the given index to one.
   *
   * @param index position of element
   */
  public void setToOne(int index) {
    BitVectorHelper.setBit(validityBuffer, index);
    BitVectorHelper.setBit(valueBuffer, index);
  }

  /**
   * Same as {@link #setToOne(int)} except that it handles the case when
   * index is greater than or equal to current value capacity of the vector.
   *
   * @param index position of the element
   */
  public void setSafeToOne(int index) {
    handleSafe(index);
    setToOne(index);
  }

  @Override
  public ArrowBufPointer getDataPointer(int index) {
    throw new UnsupportedOperationException();
  }

  @Override
  public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
    throw new UnsupportedOperationException();
  }

  @Override
  public int hashCode(int index) {
    if (isNull(index)) {
      return ArrowBufPointer.NULL_HASH_CODE;
    } else {
      if (get(index) == 0) {
        return HASH_CODE_FOR_ZERO;
      } else {
        return HASH_CODE_FOR_ONE;
      }
    }
  }

  @Override
  public int hashCode(int index, ArrowBufHasher hasher) {
    return hashCode(index);
  }

  /**
   * Set count bits to 1 in data starting at firstBitIndex.
   *
   * @param firstBitIndex the index of the first bit to set
   * @param count         the number of bits to set
   */
  public void setRangeToOne(int firstBitIndex, int count) {
    int startByteIndex = BitVectorHelper.byteIndex(firstBitIndex);
    final int lastBitIndex = firstBitIndex + count;
    final int endByteIndex = BitVectorHelper.byteIndex(lastBitIndex);
    final int startByteBitIndex = BitVectorHelper.bitIndex(firstBitIndex);
    final int endBytebitIndex = BitVectorHelper.bitIndex(lastBitIndex);
    if (count < 8 && startByteIndex == endByteIndex) {
      // handles the case where we don't have a first and a last byte
      byte bitMask = 0;
      for (int i = startByteBitIndex; i < endBytebitIndex; ++i) {
        bitMask |= (byte) (1L << i);
      }
      BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask);
      BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask);
    } else {
      // fill in first byte (if it's not full)
      if (startByteBitIndex != 0) {
        final byte bitMask = (byte) (0xFFL << startByteBitIndex);
        BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask);
        BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask);
        ++startByteIndex;
      }

      // fill in one full byte at a time
      validityBuffer.setOne(startByteIndex, endByteIndex - startByteIndex);
      valueBuffer.setOne(startByteIndex, endByteIndex - startByteIndex);

      // fill in the last byte (if it's not full)
      if (endBytebitIndex != 0) {
        final int byteIndex = BitVectorHelper.byteIndex(lastBitIndex - endBytebitIndex);
        final byte bitMask = (byte) (0xFFL >>> ((8 - endBytebitIndex) & 7));
        BitVectorHelper.setBitMaskedByte(validityBuffer, byteIndex, bitMask);
        BitVectorHelper.setBitMaskedByte(valueBuffer, byteIndex, bitMask);
      }
    }
  }


  /*----------------------------------------------------------------*
   |                                                                |
   |                      vector transfer                           |
   |                                                                |
   *----------------------------------------------------------------*/


  /**
   * Construct a TransferPair comprising of this and a target vector of
   * the same type.
   *
   * @param ref       name of the target vector
   * @param allocator allocator for the target vector
   * @return {@link TransferPair}
   */
  @Override
  public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
    return new TransferImpl(ref, allocator);
  }

  /**
   * Construct a TransferPair with a desired target vector of the same type.
   *
   * @param to target vector
   * @return {@link TransferPair}
   */
  @Override
  public TransferPair makeTransferPair(ValueVector to) {
    return new TransferImpl((BitVector) to);
  }

  private class TransferImpl implements TransferPair {
    BitVector to;

    public TransferImpl(String ref, BufferAllocator allocator) {
      to = new BitVector(ref, field.getFieldType(), allocator);
    }

    public TransferImpl(BitVector to) {
      this.to = to;
    }

    @Override
    public BitVector getTo() {
      return to;
    }

    @Override
    public void transfer() {
      transferTo(to);
    }

    @Override
    public void splitAndTransfer(int startIndex, int length) {
      splitAndTransferTo(startIndex, length, to);
    }

    @Override
    public void copyValueSafe(int fromIndex, int toIndex) {
      to.copyFromSafe(fromIndex, toIndex, BitVector.this);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy