All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.operator.OperationsEncoder Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.operator;

import com.clickzetta.platform.common.ColumnSchema;
import com.clickzetta.platform.common.Schema;
import com.clickzetta.platform.common.Type;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.CompositeByteBuf;
import io.netty.buffer.Unpooled;
import org.apache.kudu.RowOperations;
import scala.Tuple2;

import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.List;

public class OperationsEncoder {
  private static final MemoryAllocator allocator = new MemoryAllocator();

  private static class PbEncode {
    public Schema schema;
    public ByteBuf rows;
    public CompositeByteBuf indirect;
    public int indirectIndex = 0;
    public long indirectWrittenBytes;

    public void init(Schema schema, int numOperations) {
      this.schema = schema;

      final int columnBitSetSize = Bytes.getBitSetSize(schema.getColumnCount());
      int sizePerRow = 1 + schema.getRowSize() + columnBitSetSize;
      if (schema.hasNullableColumns()) {
        sizePerRow += columnBitSetSize;
      }

      // when encode rows. we use  ByteOrder.LITTLE_ENDIAN to encode.
      this.rows = allocator.heapBuffer(sizePerRow * numOperations);
      int totalVarLength = schema.getVarLengthColumnCount() * numOperations;
      if (totalVarLength != 0) {
        this.indirect = allocator.compositeHeapBuffer(totalVarLength);
      }
    }

    public RowOperations.RowOperationsPB toPB() {
      try {
        RowOperations.RowOperationsPB.Builder rowOpsBuilder = RowOperations.RowOperationsPB.newBuilder();

        rowOpsBuilder.setRows(ByteString.copyFrom(rows.nioBuffer()));
        if (indirectIndex > 0 && indirect != null) {
          byte[] indirectData = new byte[(int) indirectWrittenBytes];
          int offset = 0;
          for (int i = 0; i < indirectIndex; i++) {
            if (indirect.component(i) != null) {
              int bbSize = indirect.component(i).readableBytes();
              indirect.component(i).readBytes(indirectData, offset, bbSize);
              offset += bbSize;
            }
          }
          rowOpsBuilder.setIndirectData(UnsafeByteOperations.unsafeWrap(indirectData));
        }
        RowOperations.RowOperationsPB pb = rowOpsBuilder.build();
        return pb;
      } finally {
        if (indirect != null) {
          indirect.release();
        }
        if (rows != null) {
          rows.release();
        }
      }
    }
  }

  private static void encodeRow(PartialRow row, PbEncode encode, WriteOperation.ChangeType type) {
    BitSet columnsBitSet = row.getColumnsBitSet();
    BitSet nullsBitSet = row.getNullsBitSet();

    // deep copy.
    if (type == WriteOperation.ChangeType.DELETE || type == WriteOperation.ChangeType.DELETE_IGNORE) {
      byte[] colBytes = Bytes.fromBitSet(columnsBitSet, encode.schema.getColumnCount());
      columnsBitSet = Bytes.toBitSet(colBytes, 0, encode.schema.getColumnCount());
      if (encode.schema.hasNullableColumns()) {
        byte[] nullBytes = Bytes.fromBitSet(nullsBitSet, encode.schema.getColumnCount());
        nullsBitSet = Bytes.toBitSet(nullBytes, 0, encode.schema.getColumnCount());
      }
    }

    int columnCount = row.maxChangedIndex + 1;
    if (type == WriteOperation.ChangeType.DELETE || type == WriteOperation.ChangeType.DELETE_IGNORE) {
      columnCount = row.getSchema().getPrimaryKeyColumnCount();
      columnsBitSet.clear(encode.schema.getPrimaryKeyColumnCount(), columnsBitSet.size());
      if (encode.schema.hasNullableColumns()) {
        nullsBitSet.clear(encode.schema.getPrimaryKeyColumnCount(), nullsBitSet.size());
      }
    }

    encode.rows.writeByte(type.toEncodedByte());
    encode.rows.writeBytes(Bytes.fromBitSet(columnsBitSet, encode.schema.getColumnCount()));
    if (encode.schema.hasNullableColumns()) {
      encode.rows.writeBytes(Bytes.fromBitSet(nullsBitSet, encode.schema.getColumnCount()));
    }

    byte[] rowData = row.getRowAlloc();
    int currentRowOffset = 0;
    for (int colIdx = 0; colIdx < columnCount; colIdx++) {
      ColumnSchema col = encode.schema.getColumnByIndex(colIdx);
      if (row.isSet(colIdx) && !row.isSetToNull(colIdx)) {
        if (col.getType() == Type.STRING || col.getType() == Type.BINARY ||
            col.getType() == Type.VARCHAR) {
          ByteBuffer varLengthData = row.getVarLengthData()
              .get(row.getSchema().getVarLengthColumnOffsetToAllVarCharColumns(colIdx));
          varLengthData.reset();
          encode.rows.writeLongLE(encode.indirectWrittenBytes);
          int bbSize = varLengthData.remaining();
          encode.rows.writeLongLE(bbSize);
          encode.indirect.addComponent(encode.indirectIndex++, Unpooled.wrappedBuffer(varLengthData));
          encode.indirectWrittenBytes += bbSize;
        } else {
          encode.rows.writeBytes(rowData, currentRowOffset, col.getTypeSize());
        }
      }
      currentRowOffset += col.getTypeSize();
    }
  }

  private static void encodeKeyRow(PartialRow row, PbEncode keyEncode, WriteOperation.ChangeType keyType) {
    BitSet columnsBitSet = row.getColumnsBitSet();
    BitSet nullsBitSet = row.getNullsBitSet();

    keyEncode.rows.writeByte(WriteOperation.ChangeType.SPLIT_ROWS.toEncodedByte());
    BitSet keyColumnsBitSet = new BitSet(keyEncode.schema.getColumnCount());
    BitSet keyNullsBitSet = new BitSet(keyEncode.schema.getColumnCount());
    for (int keyColIdx = 0; keyColIdx < keyEncode.schema.getColumns().size(); keyColIdx++) {
      ColumnSchema keyCol = keyEncode.schema.getColumnByIndex(keyColIdx);
      ColumnSchema col = row.getSchema().getColumn(keyCol.getName());
      int colIdx = row.getSchema().getColumnIndex(col.getName());
      if (columnsBitSet.get(colIdx)) {
        keyColumnsBitSet.set(keyColIdx);
      }
      if (row.getSchema().hasNullableColumns() && nullsBitSet.get(colIdx) && keyEncode.schema.hasNullableColumns()) {
        keyNullsBitSet.set(keyColIdx);
      }
    }

    keyEncode.rows.writeBytes(Bytes.fromBitSet(keyColumnsBitSet, keyEncode.schema.getColumnCount()));
    if (keyEncode.schema.hasNullableColumns()) {
      keyEncode.rows.writeBytes(Bytes.fromBitSet(keyNullsBitSet, keyEncode.schema.getColumnCount()));
    }

    // encode key row data.
    byte[] rowData = row.getRowAlloc();
    for (int keyColIdx = 0; keyColIdx < keyEncode.schema.getColumns().size(); keyColIdx++) {
      ColumnSchema keyCol = keyEncode.schema.getColumnByIndex(keyColIdx);
      ColumnSchema col = row.getSchema().getColumn(keyCol.getName());
      int colIdx = row.getSchema().getColumnIndex(col.getName());
      if (row.isSet(colIdx) && !row.isSetToNull(colIdx)) {
        if (col.getType() == Type.STRING || col.getType() == Type.BINARY ||
            col.getType() == Type.VARCHAR) {
          ByteBuffer varLengthData = row.getVarLengthData()
              .get(row.getSchema().getVarLengthColumnOffsetToAllVarCharColumns(colIdx));
          varLengthData.reset();
          keyEncode.rows.writeLongLE(keyEncode.indirectWrittenBytes);
          int bbSize = varLengthData.remaining();
          keyEncode.rows.writeLongLE(bbSize);
          keyEncode.indirect.addComponent(keyEncode.indirectIndex++, Unpooled.wrappedBuffer(varLengthData));
          keyEncode.indirectWrittenBytes += bbSize;
        } else {
          keyEncode.rows.writeBytes(rowData, row.getSchema().getColumnOffset(colIdx), col.getTypeSize());
        }
      }
    }
  }

  public static Tuple2 encodeOperations(List operations) {
    if (operations == null || operations.isEmpty()) {
      return null;
    }
    AbstractRow abstractRow = operations.get(0).getRow();
    PbEncode schemaEncode = new PbEncode();
    schemaEncode.init(abstractRow.getTable().getSchema(), operations.size());

    PbEncode keySchemaEncode = new PbEncode();
    keySchemaEncode.init(abstractRow.getTable().getKeySchema(), operations.size());

    // use SPLIT_ROWS to encode Key Column Operations.
    for (WriteOperation operation : operations) {
      AbstractRow bindingRow = operation.getRow();
      encodeRow(bindingRow.getPartialRow(), schemaEncode, bindingRow.type());
      encodeKeyRow(bindingRow.getPartialRow(), keySchemaEncode, bindingRow.type());
    }
    return new Tuple2<>(schemaEncode.toPB(), keySchemaEncode.toPB());
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy