All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.data.serializer.RowCompactedSerializer Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.data.serializer;

import org.dinky.shaded.paimon.annotation.VisibleForTesting;
import org.dinky.shaded.paimon.data.BinaryArray;
import org.dinky.shaded.paimon.data.BinaryMap;
import org.dinky.shaded.paimon.data.BinaryString;
import org.dinky.shaded.paimon.data.Decimal;
import org.dinky.shaded.paimon.data.GenericRow;
import org.dinky.shaded.paimon.data.InternalArray;
import org.dinky.shaded.paimon.data.InternalMap;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.data.InternalRow.FieldGetter;
import org.dinky.shaded.paimon.data.Timestamp;
import org.dinky.shaded.paimon.io.DataInputView;
import org.dinky.shaded.paimon.io.DataOutputView;
import org.dinky.shaded.paimon.memory.MemorySegment;
import org.dinky.shaded.paimon.types.DataType;
import org.dinky.shaded.paimon.types.RowKind;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.utils.VarLengthIntUtils;

import javax.annotation.Nullable;

import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Objects;

import static org.dinky.shaded.paimon.data.BinaryRow.HEADER_SIZE_IN_BITS;
import static org.dinky.shaded.paimon.memory.MemorySegmentUtils.bitGet;
import static org.dinky.shaded.paimon.memory.MemorySegmentUtils.bitSet;
import static org.dinky.shaded.paimon.types.DataTypeChecks.getPrecision;
import static org.dinky.shaded.paimon.types.DataTypeChecks.getScale;
import static org.dinky.shaded.paimon.utils.Preconditions.checkArgument;

/** A {@link Serializer} for {@link InternalRow} using compacted binary. */
public class RowCompactedSerializer implements Serializer {

    private static final long serialVersionUID = 1L;

    private final FieldGetter[] getters;
    private final FieldWriter[] writers;
    private final FieldReader[] readers;
    private final RowType rowType;

    @Nullable private RowWriter rowWriter;

    @Nullable private RowReader rowReader;

    public static int calculateBitSetInBytes(int arity) {
        return (arity + 7 + HEADER_SIZE_IN_BITS) / 8;
    }

    public RowCompactedSerializer(RowType rowType) {
        this.getters = new FieldGetter[rowType.getFieldCount()];
        this.writers = new FieldWriter[rowType.getFieldCount()];
        this.readers = new FieldReader[rowType.getFieldCount()];
        for (int i = 0; i < rowType.getFieldCount(); i++) {
            DataType type = rowType.getTypeAt(i);
            getters[i] = InternalRow.createFieldGetter(type, i);
            writers[i] = createFieldWriter(type);
            readers[i] = createFieldReader(type);
        }
        this.rowType = rowType;
    }

    @VisibleForTesting
    RowType rowType() {
        return rowType;
    }

    @Override
    public Serializer duplicate() {
        return new RowCompactedSerializer(rowType);
    }

    @Override
    public InternalRow copy(InternalRow from) {
        return deserialize(serializeToBytes(from));
    }

    @Override
    public void serialize(InternalRow record, DataOutputView target) throws IOException {
        byte[] bytes = serializeToBytes(record);
        VarLengthIntUtils.encodeInt(target, bytes.length);
        target.write(bytes);
    }

    @Override
    public InternalRow deserialize(DataInputView source) throws IOException {
        int len = VarLengthIntUtils.decodeInt(source);
        byte[] bytes = new byte[len];
        source.readFully(bytes);
        return deserialize(bytes);
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        RowCompactedSerializer that = (RowCompactedSerializer) o;
        return Objects.equals(rowType, that.rowType);
    }

    @Override
    public int hashCode() {
        return Objects.hash(rowType);
    }

    public byte[] serializeToBytes(InternalRow record) {
        if (rowWriter == null) {
            rowWriter = new RowWriter(calculateBitSetInBytes(getters.length));
        }
        rowWriter.reset();
        rowWriter.writeRowKind(record.getRowKind());
        for (int i = 0; i < getters.length; i++) {
            Object field = getters[i].getFieldOrNull(record);
            if (field == null) {
                rowWriter.setNullAt(i);
            } else {
                writers[i].writeField(rowWriter, i, field);
            }
        }
        return rowWriter.copyBuffer();
    }

    public InternalRow deserialize(byte[] bytes) {
        if (rowReader == null) {
            rowReader = new RowReader(calculateBitSetInBytes(getters.length));
        }
        rowReader.pointTo(bytes);
        GenericRow row = new GenericRow(readers.length);
        row.setRowKind(rowReader.readRowKind());
        for (int i = 0; i < readers.length; i++) {
            row.setField(i, rowReader.isNullAt(i) ? null : readers[i].readField(rowReader, i));
        }
        return row;
    }

    private static FieldWriter createFieldWriter(DataType fieldType) {
        final FieldWriter fieldWriter;
        switch (fieldType.getTypeRoot()) {
            case CHAR:
            case VARCHAR:
                fieldWriter = (writer, pos, value) -> writer.writeString((BinaryString) value);
                break;
            case BOOLEAN:
                fieldWriter = (writer, pos, value) -> writer.writeBoolean((boolean) value);
                break;
            case BINARY:
            case VARBINARY:
                fieldWriter = (writer, pos, value) -> writer.writeBinary((byte[]) value);
                break;
            case DECIMAL:
                final int decimalPrecision = getPrecision(fieldType);
                fieldWriter =
                        (writer, pos, value) ->
                                writer.writeDecimal((Decimal) value, decimalPrecision);
                break;
            case TINYINT:
                fieldWriter = (writer, pos, value) -> writer.writeByte((byte) value);
                break;
            case SMALLINT:
                fieldWriter = (writer, pos, value) -> writer.writeShort((short) value);
                break;
            case INTEGER:
            case DATE:
            case TIME_WITHOUT_TIME_ZONE:
                fieldWriter = (writer, pos, value) -> writer.writeInt((int) value);
                break;
            case BIGINT:
                fieldWriter = (writer, pos, value) -> writer.writeLong((long) value);
                break;
            case FLOAT:
                fieldWriter = (writer, pos, value) -> writer.writeFloat((float) value);
                break;
            case DOUBLE:
                fieldWriter = (writer, pos, value) -> writer.writeDouble((double) value);
                break;
            case TIMESTAMP_WITHOUT_TIME_ZONE:
            case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
                final int timestampPrecision = getPrecision(fieldType);
                fieldWriter =
                        (writer, pos, value) ->
                                writer.writeTimestamp((Timestamp) value, timestampPrecision);
                break;
            case ARRAY:
                Serializer arraySerializer = InternalSerializers.create(fieldType);
                fieldWriter =
                        (writer, pos, value) ->
                                writer.writeArray(
                                        (InternalArray) value,
                                        (InternalArraySerializer) arraySerializer);
                break;
            case MULTISET:
            case MAP:
                Serializer mapSerializer = InternalSerializers.create(fieldType);
                fieldWriter =
                        (writer, pos, value) ->
                                writer.writeMap(
                                        (InternalMap) value, (InternalMapSerializer) mapSerializer);
                break;
            case ROW:
                RowCompactedSerializer rowSerializer =
                        new RowCompactedSerializer((RowType) fieldType);
                fieldWriter =
                        (writer, pos, value) -> writer.writeRow((InternalRow) value, rowSerializer);
                break;
            default:
                throw new IllegalArgumentException();
        }

        if (!fieldType.isNullable()) {
            return fieldWriter;
        }
        return (writer, pos, value) -> {
            if (value == null) {
                writer.setNullAt(pos);
            } else {
                fieldWriter.writeField(writer, pos, value);
            }
        };
    }

    private static FieldReader createFieldReader(DataType fieldType) {
        final FieldReader fieldReader;
        // ordered by type root definition
        switch (fieldType.getTypeRoot()) {
            case CHAR:
            case VARCHAR:
                fieldReader = (reader, pos) -> reader.readString();
                break;
            case BOOLEAN:
                fieldReader = (reader, pos) -> reader.readBoolean();
                break;
            case BINARY:
            case VARBINARY:
                fieldReader = (reader, pos) -> reader.readBinary();
                break;
            case DECIMAL:
                final int decimalPrecision = getPrecision(fieldType);
                final int decimalScale = getScale(fieldType);
                fieldReader = (reader, pos) -> reader.readDecimal(decimalPrecision, decimalScale);
                break;
            case TINYINT:
                fieldReader = (reader, pos) -> reader.readByte();
                break;
            case SMALLINT:
                fieldReader = (reader, pos) -> reader.readShort();
                break;
            case INTEGER:
            case DATE:
            case TIME_WITHOUT_TIME_ZONE:
                fieldReader = (reader, pos) -> reader.readInt();
                break;
            case BIGINT:
                fieldReader = (reader, pos) -> reader.readLong();
                break;
            case FLOAT:
                fieldReader = (reader, pos) -> reader.readFloat();
                break;
            case DOUBLE:
                fieldReader = (reader, pos) -> reader.readDouble();
                break;
            case TIMESTAMP_WITHOUT_TIME_ZONE:
            case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
                final int timestampPrecision = getPrecision(fieldType);
                fieldReader = (reader, pos) -> reader.readTimestamp(timestampPrecision);
                break;
            case ARRAY:
                fieldReader = (reader, pos) -> reader.readArray();
                break;
            case MULTISET:
            case MAP:
                fieldReader = (reader, pos) -> reader.readMap();
                break;
            case ROW:
                RowCompactedSerializer serializer = new RowCompactedSerializer((RowType) fieldType);
                fieldReader = (reader, pos) -> reader.readRow(serializer);
                break;
            default:
                throw new IllegalArgumentException();
        }
        if (!fieldType.isNullable()) {
            return fieldReader;
        }
        return (reader, pos) -> {
            if (reader.isNullAt(pos)) {
                return null;
            }
            return fieldReader.readField(reader, pos);
        };
    }

    private interface FieldWriter extends Serializable {
        void writeField(RowWriter writer, int pos, Object value);
    }

    private interface FieldReader extends Serializable {
        Object readField(RowReader reader, int pos);
    }

    private static class RowWriter {

        // Including RowKind and null bits.
        private final int headerSizeInBytes;

        private byte[] buffer;
        private MemorySegment segment;
        private int position;

        private RowWriter(int headerSizeInBytes) {
            this.headerSizeInBytes = headerSizeInBytes;
            setBuffer(new byte[Math.max(64, headerSizeInBytes)]);
            this.position = headerSizeInBytes;
        }

        private void reset() {
            this.position = headerSizeInBytes;
            for (int i = 0; i < headerSizeInBytes; i++) {
                buffer[i] = 0;
            }
        }

        private void writeRowKind(RowKind kind) {
            this.buffer[0] = kind.toByteValue();
        }

        private void setNullAt(int pos) {
            bitSet(segment, 0, pos + HEADER_SIZE_IN_BITS);
        }

        private void writeBoolean(boolean value) {
            ensureCapacity(1);
            segment.putBoolean(position++, value);
        }

        private void writeByte(byte value) {
            ensureCapacity(1);
            segment.put(position++, value);
        }

        private void writeShort(short value) {
            ensureCapacity(2);
            segment.putShort(position, value);
            position += 2;
        }

        private void writeInt(int value) {
            ensureCapacity(4);
            segment.putInt(position, value);
            position += 4;
        }

        private void writeLong(long value) {
            ensureCapacity(8);
            segment.putLong(position, value);
            position += 8;
        }

        private void writeFloat(float value) {
            ensureCapacity(4);
            segment.putFloat(position, value);
            position += 4;
        }

        private void writeDouble(double value) {
            ensureCapacity(8);
            segment.putDouble(position, value);
            position += 8;
        }

        private void writeString(BinaryString value) {
            writeSegments(value.getSegments(), value.getOffset(), value.getSizeInBytes());
        }

        private void writeDecimal(Decimal value, int precision) {
            if (Decimal.isCompact(precision)) {
                writeLong(value.toUnscaledLong());
            } else {
                writeBinary(value.toUnscaledBytes());
            }
        }

        private void writeTimestamp(Timestamp value, int precision) {
            if (Timestamp.isCompact(precision)) {
                writeLong(value.getMillisecond());
            } else {
                writeLong(value.getMillisecond());
                writeUnsignedInt(value.getNanoOfMillisecond());
            }
        }

        private void writeUnsignedInt(int value) {
            checkArgument(value >= 0);
            ensureCapacity(5);
            int len = VarLengthIntUtils.encodeInt(buffer, position, value);
            position += len;
        }

        private void writeArray(InternalArray value, InternalArraySerializer serializer) {
            BinaryArray binary = serializer.toBinaryArray(value);
            writeSegments(binary.getSegments(), binary.getOffset(), binary.getSizeInBytes());
        }

        private void writeMap(InternalMap value, InternalMapSerializer serializer) {
            BinaryMap binary = serializer.toBinaryMap(value);
            writeSegments(binary.getSegments(), binary.getOffset(), binary.getSizeInBytes());
        }

        private void writeRow(InternalRow value, RowCompactedSerializer serializer) {
            writeBinary(serializer.serializeToBytes(value));
        }

        private byte[] copyBuffer() {
            return Arrays.copyOf(buffer, position);
        }

        private void setBuffer(byte[] buffer) {
            this.buffer = buffer;
            this.segment = MemorySegment.wrap(buffer);
        }

        private void ensureCapacity(int size) {
            if (buffer.length - position < size) {
                grow(size);
            }
        }

        private void grow(int minCapacityAdd) {
            int newLen = Math.max(this.buffer.length * 2, this.buffer.length + minCapacityAdd);
            setBuffer(Arrays.copyOf(this.buffer, newLen));
        }

        private void writeBinary(byte[] value) {
            writeUnsignedInt(value.length);
            ensureCapacity(value.length);
            System.arraycopy(value, 0, buffer, position, value.length);
            position += value.length;
        }

        private void write(MemorySegment segment, int off, int len) {
            ensureCapacity(len);
            segment.get(off, this.buffer, this.position, len);
            this.position += len;
        }

        private void writeSegments(MemorySegment[] segments, int off, int len) {
            writeUnsignedInt(len);
            if (len + off <= segments[0].size()) {
                write(segments[0], off, len);
            } else {
                write(segments, off, len);
            }
        }

        private void write(MemorySegment[] segments, int off, int len) {
            ensureCapacity(len);
            int toWrite = len;
            int fromOffset = off;
            int toOffset = this.position;
            for (MemorySegment sourceSegment : segments) {
                int remain = sourceSegment.size() - fromOffset;
                if (remain > 0) {
                    int localToWrite = Math.min(remain, toWrite);
                    sourceSegment.get(fromOffset, buffer, toOffset, localToWrite);
                    toWrite -= localToWrite;
                    toOffset += localToWrite;
                    fromOffset = 0;
                } else {
                    fromOffset -= sourceSegment.size();
                }
            }
            this.position += len;
        }
    }

    private static class RowReader {

        // Including RowKind and null bits.
        private final int headerSizeInBytes;

        private MemorySegment segment;
        private MemorySegment[] segments;
        private int position;

        private RowReader(int headerSizeInBytes) {
            this.headerSizeInBytes = headerSizeInBytes;
        }

        private void pointTo(byte[] bytes) {
            this.segment = MemorySegment.wrap(bytes);
            this.segments = new MemorySegment[] {segment};
            this.position = headerSizeInBytes;
        }

        private RowKind readRowKind() {
            return RowKind.fromByteValue(segment.get(0));
        }

        private boolean isNullAt(int pos) {
            return bitGet(segment, 0, pos + HEADER_SIZE_IN_BITS);
        }

        private boolean readBoolean() {
            return segment.getBoolean(position++);
        }

        private byte readByte() {
            return segment.get(position++);
        }

        private short readShort() {
            short value = segment.getShort(position);
            position += 2;
            return value;
        }

        private int readInt() {
            int value = segment.getInt(position);
            position += 4;
            return value;
        }

        private long readLong() {
            long value = segment.getLong(position);
            position += 8;
            return value;
        }

        private float readFloat() {
            float value = segment.getFloat(position);
            position += 4;
            return value;
        }

        private double readDouble() {
            double value = segment.getDouble(position);
            position += 8;
            return value;
        }

        private BinaryString readString() {
            int length = readUnsignedInt();
            BinaryString string = BinaryString.fromAddress(segments, position, length);
            position += length;
            return string;
        }

        private int readUnsignedInt() {
            for (int offset = 0, result = 0; offset < 32; offset += 7) {
                int b = readByte();
                result |= (b & 0x7F) << offset;
                if ((b & 0x80) == 0) {
                    return result;
                }
            }
            throw new Error("Malformed integer.");
        }

        private Decimal readDecimal(int precision, int scale) {
            return Decimal.isCompact(precision)
                    ? Decimal.fromUnscaledLong(readLong(), precision, scale)
                    : Decimal.fromUnscaledBytes(readBinary(), precision, scale);
        }

        private Timestamp readTimestamp(int precision) {
            if (Timestamp.isCompact(precision)) {
                return Timestamp.fromEpochMillis(readLong());
            }
            long milliseconds = readLong();
            int nanosOfMillisecond = readUnsignedInt();
            return Timestamp.fromEpochMillis(milliseconds, nanosOfMillisecond);
        }

        private byte[] readBinary() {
            int length = readUnsignedInt();
            byte[] bytes = new byte[length];
            segment.get(position, bytes, 0, length);
            position += length;
            return bytes;
        }

        private InternalArray readArray() {
            BinaryArray value = new BinaryArray();
            int length = readUnsignedInt();
            value.pointTo(segments, position, length);
            position += length;
            return value;
        }

        private InternalMap readMap() {
            BinaryMap value = new BinaryMap();
            int length = readUnsignedInt();
            value.pointTo(segments, position, length);
            position += length;
            return value;
        }

        private InternalRow readRow(RowCompactedSerializer serializer) {
            byte[] bytes = readBinary();
            return serializer.deserialize(bytes);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy