All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.data.BinaryArray Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.data;

import org.dinky.shaded.paimon.annotation.Public;
import org.dinky.shaded.paimon.memory.MemorySegment;
import org.dinky.shaded.paimon.memory.MemorySegmentUtils;
import org.dinky.shaded.paimon.types.DataType;

import java.lang.reflect.Array;

import static org.dinky.shaded.paimon.memory.MemorySegment.UNSAFE;

/**
 * A binary implementation of {@link InternalArray} which is backed by {@link MemorySegment}s.
 *
 * 

For fields that hold fixed-length primitive types, such as long, double or int, they are * stored compacted in bytes, just like the original java array. * *

The binary layout of {@link BinaryArray}: * *

 * [size(int)] + [null bits(4-byte word boundaries)] + [values or offset&length] + [variable length part].
 * 
* * @since 0.4.0 */ @Public public final class BinaryArray extends BinarySection implements InternalArray, DataSetters { private static final long serialVersionUID = 1L; /** Offset for Arrays. */ private static final int BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); private static final int BOOLEAN_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(boolean[].class); private static final int SHORT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(short[].class); private static final int INT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(int[].class); private static final int LONG_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(long[].class); private static final int FLOAT_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(float[].class); private static final int DOUBLE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(double[].class); public static int calculateHeaderInBytes(int numFields) { return 4 + ((numFields + 31) / 32) * 4; } /** * It store real value when type is primitive. It store the length and offset of variable-length * part when type is string, map, etc. */ public static int calculateFixLengthPartSize(DataType type) { // ordered by type root definition switch (type.getTypeRoot()) { case BOOLEAN: case TINYINT: return 1; case CHAR: case VARCHAR: case BINARY: case VARBINARY: case DECIMAL: case BIGINT: case DOUBLE: case TIMESTAMP_WITHOUT_TIME_ZONE: case TIMESTAMP_WITH_LOCAL_TIME_ZONE: case ARRAY: case MULTISET: case MAP: case ROW: // long and double are 8 bytes; // otherwise it stores the length and offset of the variable-length part for types // such as is string, map, etc. return 8; case SMALLINT: return 2; case INTEGER: case FLOAT: case DATE: case TIME_WITHOUT_TIME_ZONE: return 4; default: throw new IllegalArgumentException("Unsupported type: " + type); } } // The number of elements in this array private transient int size; /** The position to start storing array elements. */ private transient int elementOffset; public BinaryArray() {} private void assertIndexIsValid(int ordinal) { assert ordinal >= 0 : "ordinal (" + ordinal + ") should >= 0"; assert ordinal < size : "ordinal (" + ordinal + ") should < " + size; } private int getElementOffset(int ordinal, int elementSize) { return elementOffset + ordinal * elementSize; } @Override public int size() { return size; } @Override public void pointTo(MemorySegment[] segments, int offset, int sizeInBytes) { // Read the number of elements from the first 4 bytes. final int size = MemorySegmentUtils.getInt(segments, offset); assert size >= 0 : "size (" + size + ") should >= 0"; this.size = size; this.segments = segments; this.offset = offset; this.sizeInBytes = sizeInBytes; this.elementOffset = offset + calculateHeaderInBytes(this.size); } @Override public boolean isNullAt(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.bitGet(segments, offset + 4, pos); } @Override public void setNullAt(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); } public void setNotNullAt(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitUnSet(segments, offset + 4, pos); } @Override public long getLong(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.getLong(segments, getElementOffset(pos, 8)); } @Override public void setLong(int pos, long value) { assertIndexIsValid(pos); setNotNullAt(pos); MemorySegmentUtils.setLong(segments, getElementOffset(pos, 8), value); } public void setNullLong(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); MemorySegmentUtils.setLong(segments, getElementOffset(pos, 8), 0L); } @Override public int getInt(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.getInt(segments, getElementOffset(pos, 4)); } @Override public void setInt(int pos, int value) { assertIndexIsValid(pos); setNotNullAt(pos); MemorySegmentUtils.setInt(segments, getElementOffset(pos, 4), value); } public void setNullInt(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); MemorySegmentUtils.setInt(segments, getElementOffset(pos, 4), 0); } @Override public BinaryString getString(int pos) { assertIndexIsValid(pos); int fieldOffset = getElementOffset(pos, 8); final long offsetAndSize = MemorySegmentUtils.getLong(segments, fieldOffset); return MemorySegmentUtils.readBinaryString(segments, offset, fieldOffset, offsetAndSize); } @Override public Decimal getDecimal(int pos, int precision, int scale) { assertIndexIsValid(pos); if (Decimal.isCompact(precision)) { return Decimal.fromUnscaledLong( MemorySegmentUtils.getLong(segments, getElementOffset(pos, 8)), precision, scale); } int fieldOffset = getElementOffset(pos, 8); final long offsetAndSize = MemorySegmentUtils.getLong(segments, fieldOffset); return MemorySegmentUtils.readDecimal(segments, offset, offsetAndSize, precision, scale); } @Override public Timestamp getTimestamp(int pos, int precision) { assertIndexIsValid(pos); if (Timestamp.isCompact(precision)) { return Timestamp.fromEpochMillis( MemorySegmentUtils.getLong(segments, getElementOffset(pos, 8))); } int fieldOffset = getElementOffset(pos, 8); final long offsetAndNanoOfMilli = MemorySegmentUtils.getLong(segments, fieldOffset); return MemorySegmentUtils.readTimestampData(segments, offset, offsetAndNanoOfMilli); } @Override public byte[] getBinary(int pos) { assertIndexIsValid(pos); int fieldOffset = getElementOffset(pos, 8); final long offsetAndSize = MemorySegmentUtils.getLong(segments, fieldOffset); return MemorySegmentUtils.readBinary(segments, offset, fieldOffset, offsetAndSize); } @Override public InternalArray getArray(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.readArrayData(segments, offset, getLong(pos)); } @Override public InternalMap getMap(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.readMapData(segments, offset, getLong(pos)); } @Override public InternalRow getRow(int pos, int numFields) { assertIndexIsValid(pos); int fieldOffset = getElementOffset(pos, 8); final long offsetAndSize = MemorySegmentUtils.getLong(segments, fieldOffset); return MemorySegmentUtils.readRowData(segments, numFields, offset, offsetAndSize); } @Override public boolean getBoolean(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.getBoolean(segments, getElementOffset(pos, 1)); } @Override public void setBoolean(int pos, boolean value) { assertIndexIsValid(pos); setNotNullAt(pos); MemorySegmentUtils.setBoolean(segments, getElementOffset(pos, 1), value); } public void setNullBoolean(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); MemorySegmentUtils.setBoolean(segments, getElementOffset(pos, 1), false); } @Override public byte getByte(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.getByte(segments, getElementOffset(pos, 1)); } @Override public void setByte(int pos, byte value) { assertIndexIsValid(pos); setNotNullAt(pos); MemorySegmentUtils.setByte(segments, getElementOffset(pos, 1), value); } public void setNullByte(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); MemorySegmentUtils.setByte(segments, getElementOffset(pos, 1), (byte) 0); } @Override public short getShort(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.getShort(segments, getElementOffset(pos, 2)); } @Override public void setShort(int pos, short value) { assertIndexIsValid(pos); setNotNullAt(pos); MemorySegmentUtils.setShort(segments, getElementOffset(pos, 2), value); } public void setNullShort(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); MemorySegmentUtils.setShort(segments, getElementOffset(pos, 2), (short) 0); } @Override public float getFloat(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.getFloat(segments, getElementOffset(pos, 4)); } @Override public void setFloat(int pos, float value) { assertIndexIsValid(pos); setNotNullAt(pos); MemorySegmentUtils.setFloat(segments, getElementOffset(pos, 4), value); } public void setNullFloat(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); MemorySegmentUtils.setFloat(segments, getElementOffset(pos, 4), 0F); } @Override public double getDouble(int pos) { assertIndexIsValid(pos); return MemorySegmentUtils.getDouble(segments, getElementOffset(pos, 8)); } @Override public void setDouble(int pos, double value) { assertIndexIsValid(pos); setNotNullAt(pos); MemorySegmentUtils.setDouble(segments, getElementOffset(pos, 8), value); } public void setNullDouble(int pos) { assertIndexIsValid(pos); MemorySegmentUtils.bitSet(segments, offset + 4, pos); MemorySegmentUtils.setDouble(segments, getElementOffset(pos, 8), 0.0); } @Override public void setDecimal(int pos, Decimal value, int precision) { assertIndexIsValid(pos); if (Decimal.isCompact(precision)) { // compact format setLong(pos, value.toUnscaledLong()); } else { int fieldOffset = getElementOffset(pos, 8); int cursor = (int) (MemorySegmentUtils.getLong(segments, fieldOffset) >>> 32); assert cursor > 0 : "invalid cursor " + cursor; // zero-out the bytes MemorySegmentUtils.setLong(segments, offset + cursor, 0L); MemorySegmentUtils.setLong(segments, offset + cursor + 8, 0L); if (value == null) { setNullAt(pos); // keep the offset for future update MemorySegmentUtils.setLong(segments, fieldOffset, ((long) cursor) << 32); } else { byte[] bytes = value.toUnscaledBytes(); assert (bytes.length <= 16); // Write the bytes to the variable length portion. MemorySegmentUtils.copyFromBytes(segments, offset + cursor, bytes, 0, bytes.length); setLong(pos, ((long) cursor << 32) | ((long) bytes.length)); } } } @Override public void setTimestamp(int pos, Timestamp value, int precision) { assertIndexIsValid(pos); if (Timestamp.isCompact(precision)) { setLong(pos, value.getMillisecond()); } else { int fieldOffset = getElementOffset(pos, 8); int cursor = (int) (MemorySegmentUtils.getLong(segments, fieldOffset) >>> 32); assert cursor > 0 : "invalid cursor " + cursor; if (value == null) { setNullAt(pos); // zero-out the bytes MemorySegmentUtils.setLong(segments, offset + cursor, 0L); // keep the offset for future update MemorySegmentUtils.setLong(segments, fieldOffset, ((long) cursor) << 32); } else { // write millisecond to the variable length portion. MemorySegmentUtils.setLong(segments, offset + cursor, value.getMillisecond()); // write nanoOfMillisecond to the fixed-length portion. setLong(pos, ((long) cursor << 32) | (long) value.getNanoOfMillisecond()); } } } public boolean anyNull() { for (int i = offset + 4; i < elementOffset; i += 4) { if (MemorySegmentUtils.getInt(segments, i) != 0) { return true; } } return false; } private void checkNoNull() { if (anyNull()) { throw new RuntimeException("Primitive array must not contain a null value."); } } @Override public boolean[] toBooleanArray() { checkNoNull(); boolean[] values = new boolean[size]; MemorySegmentUtils.copyToUnsafe( segments, elementOffset, values, BOOLEAN_ARRAY_OFFSET, size); return values; } @Override public byte[] toByteArray() { checkNoNull(); byte[] values = new byte[size]; MemorySegmentUtils.copyToUnsafe( segments, elementOffset, values, BYTE_ARRAY_BASE_OFFSET, size); return values; } @Override public short[] toShortArray() { checkNoNull(); short[] values = new short[size]; MemorySegmentUtils.copyToUnsafe( segments, elementOffset, values, SHORT_ARRAY_OFFSET, size * 2); return values; } @Override public int[] toIntArray() { checkNoNull(); int[] values = new int[size]; MemorySegmentUtils.copyToUnsafe( segments, elementOffset, values, INT_ARRAY_OFFSET, size * 4); return values; } @Override public long[] toLongArray() { checkNoNull(); long[] values = new long[size]; MemorySegmentUtils.copyToUnsafe( segments, elementOffset, values, LONG_ARRAY_OFFSET, size * 8); return values; } @Override public float[] toFloatArray() { checkNoNull(); float[] values = new float[size]; MemorySegmentUtils.copyToUnsafe( segments, elementOffset, values, FLOAT_ARRAY_OFFSET, size * 4); return values; } @Override public double[] toDoubleArray() { checkNoNull(); double[] values = new double[size]; MemorySegmentUtils.copyToUnsafe( segments, elementOffset, values, DOUBLE_ARRAY_OFFSET, size * 8); return values; } @SuppressWarnings("unchecked") public T[] toObjectArray(DataType elementType) { Class elementClass = (Class) InternalRow.getDataClass(elementType); InternalArray.ElementGetter elementGetter = InternalArray.createElementGetter(elementType); T[] values = (T[]) Array.newInstance(elementClass, size); for (int i = 0; i < size; i++) { if (!isNullAt(i)) { values[i] = (T) elementGetter.getElementOrNull(this, i); } } return values; } public BinaryArray copy() { return copy(new BinaryArray()); } public BinaryArray copy(BinaryArray reuse) { byte[] bytes = MemorySegmentUtils.copyToBytes(segments, offset, sizeInBytes); reuse.pointTo(MemorySegment.wrap(bytes), 0, sizeInBytes); return reuse; } @Override public int hashCode() { return MemorySegmentUtils.hashByWords(segments, offset, sizeInBytes); } // ------------------------------------------------------------------------------------------ // Construction Utilities // ------------------------------------------------------------------------------------------ public static BinaryArray fromPrimitiveArray(boolean[] arr) { return fromPrimitiveArray(arr, BOOLEAN_ARRAY_OFFSET, arr.length, 1); } public static BinaryArray fromPrimitiveArray(byte[] arr) { return fromPrimitiveArray(arr, BYTE_ARRAY_BASE_OFFSET, arr.length, 1); } public static BinaryArray fromPrimitiveArray(short[] arr) { return fromPrimitiveArray(arr, SHORT_ARRAY_OFFSET, arr.length, 2); } public static BinaryArray fromPrimitiveArray(int[] arr) { return fromPrimitiveArray(arr, INT_ARRAY_OFFSET, arr.length, 4); } public static BinaryArray fromPrimitiveArray(long[] arr) { return fromPrimitiveArray(arr, LONG_ARRAY_OFFSET, arr.length, 8); } public static BinaryArray fromPrimitiveArray(float[] arr) { return fromPrimitiveArray(arr, FLOAT_ARRAY_OFFSET, arr.length, 4); } public static BinaryArray fromPrimitiveArray(double[] arr) { return fromPrimitiveArray(arr, DOUBLE_ARRAY_OFFSET, arr.length, 8); } private static BinaryArray fromPrimitiveArray( Object arr, int offset, int length, int elementSize) { final long headerInBytes = calculateHeaderInBytes(length); final long valueRegionInBytes = elementSize * length; // must align by 8 bytes long totalSizeInLongs = (headerInBytes + valueRegionInBytes + 7) / 8; if (totalSizeInLongs > Integer.MAX_VALUE / 8) { throw new UnsupportedOperationException( "Cannot convert this array to unsafe format as " + "it's too big."); } long totalSize = totalSizeInLongs * 8; final byte[] data = new byte[(int) totalSize]; UNSAFE.putInt(data, (long) BYTE_ARRAY_BASE_OFFSET, length); UNSAFE.copyMemory( arr, offset, data, BYTE_ARRAY_BASE_OFFSET + headerInBytes, valueRegionInBytes); BinaryArray result = new BinaryArray(); result.pointTo(MemorySegment.wrap(data), 0, (int) totalSize); return result; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy