com.clickzetta.platform.operator.OperationsEncoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
package com.clickzetta.platform.operator;
import com.clickzetta.platform.common.ColumnSchema;
import com.clickzetta.platform.common.Schema;
import com.clickzetta.platform.common.Type;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.CompositeByteBuf;
import io.netty.buffer.Unpooled;
import org.apache.kudu.RowOperations;
import scala.Tuple2;
import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.List;
public class OperationsEncoder {
private static final MemoryAllocator allocator = new MemoryAllocator();
private static class PbEncode {
public Schema schema;
public ByteBuf rows;
public CompositeByteBuf indirect;
public int indirectIndex = 0;
public long indirectWrittenBytes;
public void init(Schema schema, int numOperations) {
this.schema = schema;
final int columnBitSetSize = Bytes.getBitSetSize(schema.getColumnCount());
int sizePerRow = 1 + schema.getRowSize() + columnBitSetSize;
if (schema.hasNullableColumns()) {
sizePerRow += columnBitSetSize;
}
// when encode rows. we use ByteOrder.LITTLE_ENDIAN to encode.
this.rows = allocator.heapBuffer(sizePerRow * numOperations);
int totalVarLength = schema.getVarLengthColumnCount() * numOperations;
if (totalVarLength != 0) {
this.indirect = allocator.compositeHeapBuffer(totalVarLength);
}
}
public RowOperations.RowOperationsPB toPB() {
try {
RowOperations.RowOperationsPB.Builder rowOpsBuilder = RowOperations.RowOperationsPB.newBuilder();
rowOpsBuilder.setRows(ByteString.copyFrom(rows.nioBuffer()));
if (indirectIndex > 0 && indirect != null) {
byte[] indirectData = new byte[(int) indirectWrittenBytes];
int offset = 0;
for (int i = 0; i < indirectIndex; i++) {
if (indirect.component(i) != null) {
int bbSize = indirect.component(i).readableBytes();
indirect.component(i).readBytes(indirectData, offset, bbSize);
offset += bbSize;
}
}
rowOpsBuilder.setIndirectData(UnsafeByteOperations.unsafeWrap(indirectData));
}
RowOperations.RowOperationsPB pb = rowOpsBuilder.build();
return pb;
} finally {
if (indirect != null) {
indirect.release();
}
if (rows != null) {
rows.release();
}
}
}
}
private static void encodeRow(PartialRow row, PbEncode encode, WriteOperation.ChangeType type) {
BitSet columnsBitSet = row.getColumnsBitSet();
BitSet nullsBitSet = row.getNullsBitSet();
// deep copy.
if (type == WriteOperation.ChangeType.DELETE || type == WriteOperation.ChangeType.DELETE_IGNORE) {
byte[] colBytes = Bytes.fromBitSet(columnsBitSet, encode.schema.getColumnCount());
columnsBitSet = Bytes.toBitSet(colBytes, 0, encode.schema.getColumnCount());
if (encode.schema.hasNullableColumns()) {
byte[] nullBytes = Bytes.fromBitSet(nullsBitSet, encode.schema.getColumnCount());
nullsBitSet = Bytes.toBitSet(nullBytes, 0, encode.schema.getColumnCount());
}
}
int columnCount = row.maxChangedIndex + 1;
if (type == WriteOperation.ChangeType.DELETE || type == WriteOperation.ChangeType.DELETE_IGNORE) {
columnCount = row.getSchema().getPrimaryKeyColumnCount();
columnsBitSet.clear(encode.schema.getPrimaryKeyColumnCount(), columnsBitSet.size());
if (encode.schema.hasNullableColumns()) {
nullsBitSet.clear(encode.schema.getPrimaryKeyColumnCount(), nullsBitSet.size());
}
}
encode.rows.writeByte(type.toEncodedByte());
encode.rows.writeBytes(Bytes.fromBitSet(columnsBitSet, encode.schema.getColumnCount()));
if (encode.schema.hasNullableColumns()) {
encode.rows.writeBytes(Bytes.fromBitSet(nullsBitSet, encode.schema.getColumnCount()));
}
byte[] rowData = row.getRowAlloc();
int currentRowOffset = 0;
for (int colIdx = 0; colIdx < columnCount; colIdx++) {
ColumnSchema col = encode.schema.getColumnByIndex(colIdx);
if (row.isSet(colIdx) && !row.isSetToNull(colIdx)) {
if (col.getType() == Type.STRING || col.getType() == Type.BINARY ||
col.getType() == Type.VARCHAR) {
ByteBuffer varLengthData = row.getVarLengthData()
.get(row.getSchema().getVarLengthColumnOffsetToAllVarCharColumns(colIdx));
varLengthData.reset();
encode.rows.writeLongLE(encode.indirectWrittenBytes);
int bbSize = varLengthData.remaining();
encode.rows.writeLongLE(bbSize);
encode.indirect.addComponent(encode.indirectIndex++, Unpooled.wrappedBuffer(varLengthData));
encode.indirectWrittenBytes += bbSize;
} else {
encode.rows.writeBytes(rowData, currentRowOffset, col.getTypeSize());
}
}
currentRowOffset += col.getTypeSize();
}
}
private static void encodeKeyRow(PartialRow row, PbEncode keyEncode, WriteOperation.ChangeType keyType) {
BitSet columnsBitSet = row.getColumnsBitSet();
BitSet nullsBitSet = row.getNullsBitSet();
keyEncode.rows.writeByte(WriteOperation.ChangeType.SPLIT_ROWS.toEncodedByte());
BitSet keyColumnsBitSet = new BitSet(keyEncode.schema.getColumnCount());
BitSet keyNullsBitSet = new BitSet(keyEncode.schema.getColumnCount());
for (int keyColIdx = 0; keyColIdx < keyEncode.schema.getColumns().size(); keyColIdx++) {
ColumnSchema keyCol = keyEncode.schema.getColumnByIndex(keyColIdx);
ColumnSchema col = row.getSchema().getColumn(keyCol.getName());
int colIdx = row.getSchema().getColumnIndex(col.getName());
if (columnsBitSet.get(colIdx)) {
keyColumnsBitSet.set(keyColIdx);
}
if (row.getSchema().hasNullableColumns() && nullsBitSet.get(colIdx) && keyEncode.schema.hasNullableColumns()) {
keyNullsBitSet.set(keyColIdx);
}
}
keyEncode.rows.writeBytes(Bytes.fromBitSet(keyColumnsBitSet, keyEncode.schema.getColumnCount()));
if (keyEncode.schema.hasNullableColumns()) {
keyEncode.rows.writeBytes(Bytes.fromBitSet(keyNullsBitSet, keyEncode.schema.getColumnCount()));
}
// encode key row data.
byte[] rowData = row.getRowAlloc();
for (int keyColIdx = 0; keyColIdx < keyEncode.schema.getColumns().size(); keyColIdx++) {
ColumnSchema keyCol = keyEncode.schema.getColumnByIndex(keyColIdx);
ColumnSchema col = row.getSchema().getColumn(keyCol.getName());
int colIdx = row.getSchema().getColumnIndex(col.getName());
if (row.isSet(colIdx) && !row.isSetToNull(colIdx)) {
if (col.getType() == Type.STRING || col.getType() == Type.BINARY ||
col.getType() == Type.VARCHAR) {
ByteBuffer varLengthData = row.getVarLengthData()
.get(row.getSchema().getVarLengthColumnOffsetToAllVarCharColumns(colIdx));
varLengthData.reset();
keyEncode.rows.writeLongLE(keyEncode.indirectWrittenBytes);
int bbSize = varLengthData.remaining();
keyEncode.rows.writeLongLE(bbSize);
keyEncode.indirect.addComponent(keyEncode.indirectIndex++, Unpooled.wrappedBuffer(varLengthData));
keyEncode.indirectWrittenBytes += bbSize;
} else {
keyEncode.rows.writeBytes(rowData, row.getSchema().getColumnOffset(colIdx), col.getTypeSize());
}
}
}
}
public static Tuple2 encodeOperations(List operations) {
if (operations == null || operations.isEmpty()) {
return null;
}
AbstractRow abstractRow = operations.get(0).getRow();
PbEncode schemaEncode = new PbEncode();
schemaEncode.init(abstractRow.getTable().getSchema(), operations.size());
PbEncode keySchemaEncode = new PbEncode();
keySchemaEncode.init(abstractRow.getTable().getKeySchema(), operations.size());
// use SPLIT_ROWS to encode Key Column Operations.
for (WriteOperation operation : operations) {
AbstractRow bindingRow = operation.getRow();
encodeRow(bindingRow.getPartialRow(), schemaEncode, bindingRow.type());
encodeKeyRow(bindingRow.getPartialRow(), keySchemaEncode, bindingRow.type());
}
return new Tuple2<>(schemaEncode.toPB(), keySchemaEncode.toPB());
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy