All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.fury.format.row.binary.BinaryRow Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 The Fury Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.fury.format.row.binary;

import static io.fury.util.Preconditions.checkArgument;

import com.google.common.base.Preconditions;
import io.fury.format.row.Row;
import io.fury.format.type.DataTypes;
import io.fury.memory.BitUtils;
import io.fury.memory.MemoryBuffer;
import io.fury.memory.MemoryUtils;
import java.math.BigDecimal;
import java.util.HashMap;
import java.util.Map;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;

/**
 * A binary implementation of {@link Row} backed by binary buffer instead of java objects.
 *
 * 
    *
  • Validity Bit Set Bitmap Region (1 bit/field) for tracking null values. Primitive type is * always considered to be not null. Set bit to 1 indicate the value is not null, Set bit to 0 * indicate null *
  • Fixed-Length 8-byte Values Region. if field isn't aligned, read any length-gt-1 value may * need read multi times cache. *
  • Variable-Length Data Section *
* *

Equality comparison and hashing of rows can be performed on raw bytes since if two rows are * identical so should be their bit-wise representation. * *

    * BinaryRow is inspired by Apache Spark tungsten UnsafeRow, the differences are *
  • Use arrow schema to describe meta. *
  • String support latin/utf16/utf8 encoding. *
  • Decimal use arrow decimal format. *
  • Variable-size field can be inline in fixed-size region if small enough. *
  • Allow skip padding by generate Row using aot to put offsets in generated code. *
  • The implementation support java/C++/python/etc.. *
  • Support adding fields without breaking compatibility *
*/ public class BinaryRow extends UnsafeTrait implements Row { private final Schema schema; private final int numFields; private final int bitmapWidthInBytes; private MemoryBuffer buffer; private int baseOffset; private int sizeInBytes; public BinaryRow(Schema schema) { this.schema = schema; this.numFields = schema.getFields().size(); Preconditions.checkArgument(numFields > 0); this.bitmapWidthInBytes = BitUtils.calculateBitmapWidthInBytes(numFields); } public void pointTo(MemoryBuffer buffer, int offset, int sizeInBytes) { this.buffer = buffer; this.baseOffset = offset; this.sizeInBytes = sizeInBytes; } @Override public Schema getSchema() { return schema; } @Override public int numFields() { return numFields; } public int getSizeInBytes() { return sizeInBytes; } @Override public int getBaseOffset() { return baseOffset; } @Override public MemoryBuffer getBuffer() { return buffer; } @Override public int getOffset(int ordinal) { return baseOffset + bitmapWidthInBytes + (ordinal << 3); // ordinal * 8 = (ordinal << 3) } @Override public void assertIndexIsValid(int index) { assert index >= 0 : "index (" + index + ") should >= 0"; checkArgument(index < numFields, "index (%d) should < %d", index, numFields); } @Override public boolean isNullAt(int ordinal) { return BitUtils.isSet(buffer, baseOffset, ordinal); } @Override public boolean anyNull() { return BitUtils.anySet(buffer, baseOffset, bitmapWidthInBytes); } @Override public void setNullAt(int ordinal) { assertIndexIsValid(ordinal); BitUtils.set(buffer, baseOffset, ordinal); assert DataTypes.getTypeWidth(schema.getFields().get(ordinal).getType()) > 0 : "field[ " + ordinal + " " + schema.getFields().get(ordinal).getType() + " ] " + "must be fixed-width"; // To preserve row equality, zero out the value when setting the column to null. // Since this row does not currently support updates to variable-length values, we don't // have to worry about zeroing out that data. buffer.putLong(getOffset(ordinal), 0); } public void setNotNullAt(int ordinal) { assertIndexIsValid(ordinal); BitUtils.unset(buffer, baseOffset, ordinal); } @Override public BigDecimal getDecimal(int ordinal) { return getDecimal(ordinal, (ArrowType.Decimal) schema.getFields().get(ordinal).getType()); } @Override public BinaryRow getStruct(int ordinal) { return getStruct(ordinal, schema.getFields().get(ordinal)); } @Override public BinaryArray getArray(int ordinal) { return getArray(ordinal, schema.getFields().get(ordinal)); } @Override public BinaryMap getMap(int ordinal) { return getMap(ordinal, schema.getFields().get(ordinal)); } @Override public Row copy() { MemoryBuffer copyBuf = MemoryUtils.buffer(sizeInBytes); buffer.copyTo(baseOffset, copyBuf, 0, sizeInBytes); BinaryRow copyRow = new BinaryRow(schema); copyRow.pointTo(copyBuf, 0, sizeInBytes); return copyRow; } @Override public String toString() { if (buffer == null) { return "null"; } else { StringBuilder build = new StringBuilder("{"); for (int i = 0; i < numFields; i++) { if (i != 0) { build.append(", "); } Field field = schema.getFields().get(i); build.append(field.getName()).append("="); if (isNullAt(i)) { build.append("null"); } else { build.append(get(i, field)); } } build.append("}"); return build.toString(); } } public String toDebugString() { if (buffer == null) { return "null"; } else { StringBuilder build = new StringBuilder(); for (int i = 0; i < bitmapWidthInBytes + 8 * numFields; i += 8) { if (i != 0) { build.append(','); } build.append(Long.toHexString(buffer.getLong(baseOffset + i))); } return build.toString(); } } public Map toMap() { Map map = new HashMap<>(); for (int i = 0; i < numFields; i++) { Field field = schema.getFields().get(i); map.put(field.getName(), get(i, field)); } return map; } public byte[] toBytes() { return buffer.getBytes(baseOffset, sizeInBytes); } /** * If it is a fixed-length field, we can call this BinaryRow's setXX method for in-place updates. * If it is variable-length field, can't use this method, because the underlying data is stored * continuously. */ public static boolean isFixedLength(ArrowType type) { return DataTypes.getTypeWidth(type) > 0; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy