All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.operator.KeyEncoder Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.operator;

import com.clickzetta.platform.client.PartitionSchema;
import com.clickzetta.platform.common.ColumnSchema;
import com.clickzetta.platform.common.Schema;
import com.clickzetta.platform.common.Type;
import com.clickzetta.platform.util.ByteVec;
import com.clickzetta.platform.util.DateUtil;
import com.clickzetta.platform.util.DecimalUtil;
import com.clickzetta.platform.util.Pair;
import com.google.common.primitives.Ints;
import com.google.common.primitives.UnsignedLongs;
import com.sangupta.murmur.Murmur2;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

public class KeyEncoder {

  private static final BigInteger MIN_VALUE_128 = BigInteger.valueOf(-2).pow(127);

  private KeyEncoder() {
  }

  public static byte[] encodePrimaryKey(final PartialRow row) {
    ByteVec buf = ByteVec.create();
    final Schema schema = row.getSchema();
    for (int columnIdx = 0; columnIdx < schema.getPrimaryKeyColumnCount(); columnIdx++) {
      final boolean isLast = columnIdx + 1 == schema.getPrimaryKeyColumnCount();
      encodeColumn(row, columnIdx, isLast, buf);
    }
    return buf.toArray();
  }


  public static int getHashBucket(PartialRow row, PartitionSchema.HashBucketSchema hashSchema) {
    ByteVec buf = ByteVec.create();
    encodeColumns(row, hashSchema.getColumnIds(), buf);
    long hash = Murmur2.hash64(buf.data(), buf.len(), hashSchema.getSeed());
    return (int) UnsignedLongs.remainder(hash, hashSchema.getNumBuckets());
  }

  public static byte[] encodePartitionKey(PartialRow row, PartitionSchema partitionSchema) {
    ByteVec buf = ByteVec.create();
    if (!partitionSchema.getHashBucketSchemas().isEmpty()) {
      for (final PartitionSchema.HashBucketSchema hashSchema : partitionSchema.getHashBucketSchemas()) {
        encodeHashBucket(getHashBucket(row, hashSchema), buf);
      }
    }

    encodeColumns(row, partitionSchema.getRangeSchema().getColumnIds(), buf);
    return buf.toArray();
  }


  public static byte[] encodeRangePartitionKey(PartialRow row,
                                               PartitionSchema.RangeSchema rangeSchema) {
    ByteVec buf = ByteVec.create();
    encodeColumns(row, rangeSchema.getColumnIds(), buf);
    return buf.toArray();
  }


  private static void encodeColumns(PartialRow row, List columnIds, ByteVec buf) {
    for (int i = 0; i < columnIds.size(); i++) {
      boolean isLast = i + 1 == columnIds.size();
      encodeColumn(row, row.getSchema().getColumnIndex(columnIds.get(i)), isLast, buf);
    }
  }


  private static void encodeColumn(PartialRow row,
                                   int columnIdx,
                                   boolean isLast,
                                   ByteVec buf) {
    final Schema schema = row.getSchema();
    final ColumnSchema column = schema.getColumnByIndex(columnIdx);
    if (!row.isSet(columnIdx)) {
      throw new IllegalStateException(String.format("Primary key column %s is not set",
                                                    column.getName()));
    }
    final Type type = column.getType();
    if (type == Type.STRING || type == Type.BINARY ||
        type == Type.VARCHAR) {
      encodeBinary(row.getVarLengthData().get(columnIdx), isLast, buf);
    } else {
      encodeSignedInt(row.getRowAlloc(),
                      schema.getColumnOffset(columnIdx),
                      column.getTypeSize(),
                      buf);
    }
  }

  private static void encodeBinary(ByteBuffer value, boolean isLast, ByteVec buf) {
    value.reset();

    while (value.hasRemaining()) {
      byte currentByte = value.get();
      buf.push(currentByte);
      if (!isLast && currentByte == 0x00) {
        buf.push((byte) 0x01);
      }
    }

    if (!isLast) {
      buf.push((byte) 0x00);
      buf.push((byte) 0x00);
    }
  }

  private static void encodeSignedInt(byte[] value,
                                      int offset,
                                      int len,
                                      ByteVec buf) {
    byte lastByte = value[offset + (len - 1)];
    lastByte = Bytes.xorLeftMostBit(lastByte);
    buf.push(lastByte);
    for (int i = len - 2; i >= 0; i--) {
      buf.push(value[offset + i]);
    }
  }


  public static void encodeHashBucket(int bucket, ByteVec buf) {
    buf.append(Ints.toByteArray(bucket));
  }


  public static PartialRow decodePrimaryKey(Schema schema, byte[] key) {
    PartialRow row = schema.newPartialRow();
    ByteBuffer buf = ByteBuffer.wrap(key);
    buf.order(ByteOrder.BIG_ENDIAN);

    for (int idx = 0; idx < schema.getPrimaryKeyColumnCount(); idx++) {
      decodeColumn(buf, row, idx, idx + 1 == schema.getPrimaryKeyColumnCount());
    }

    if (buf.hasRemaining()) {
      throw new IllegalArgumentException("Unable to decode all primary key bytes");
    }
    return row;
  }

  public static Pair, PartialRow> decodePartitionKey(Schema schema,
                                                                   PartitionSchema partitionSchema,
                                                                   byte[] key) {
    ByteBuffer buf = ByteBuffer.wrap(key);
    buf.order(ByteOrder.BIG_ENDIAN);

    List buckets = new ArrayList<>();

    for (int i = 0; i < partitionSchema.getHashBucketSchemas().size(); i++) {
      if (buf.hasRemaining()) {
        buckets.add(buf.getInt());
      } else {
        buckets.add(0);
      }
    }

    return new Pair<>(buckets, decodeRangePartitionKey(schema, partitionSchema, buf));
  }


  public static PartialRow decodeRangePartitionKey(Schema schema,
                                                   PartitionSchema partitionSchema,
                                                   byte[] key) {
    ByteBuffer buf = ByteBuffer.wrap(key);
    buf.order(ByteOrder.BIG_ENDIAN);
    return decodeRangePartitionKey(schema, partitionSchema, buf);
  }


  private static PartialRow decodeRangePartitionKey(Schema schema,
                                                    PartitionSchema partitionSchema,
                                                    ByteBuffer buf) {
    PartialRow row = schema.newPartialRow();
    Iterator rangeIds = partitionSchema.getRangeSchema().getColumnIds().iterator();
    while (rangeIds.hasNext()) {
      int idx = schema.getColumnIndex(rangeIds.next());
      if (buf.hasRemaining()) {
        decodeColumn(buf, row, idx, !rangeIds.hasNext());
      } else {
        row.setMin(idx);
      }
    }

    if (buf.hasRemaining()) {
      throw new IllegalArgumentException("Unable to decode all partition key bytes");
    }
    return row;
  }


  private static void decodeColumn(ByteBuffer buf, PartialRow row, int idx, boolean isLast) {
    Schema schema = row.getSchema();
    ColumnSchema column = schema.getColumnByIndex(idx);
    switch (column.getType()) {
      case INT8:
        row.addByte(idx, (byte) (buf.get() ^ Byte.MIN_VALUE));
        break;
      case INT16:
        row.addShort(idx, (short) (buf.getShort() ^ Short.MIN_VALUE));
        break;
      case DATE: {
        int days = buf.getInt() ^ Integer.MIN_VALUE;
        row.addDate(idx, DateUtil.epochDaysToSqlDate(days));
        break;
      }
      case INT32:
        row.addInt(idx, buf.getInt() ^ Integer.MIN_VALUE);
        break;
      case INT64:
      case UNIXTIME_MICROS:
        row.addLong(idx, buf.getLong() ^ Long.MIN_VALUE);
        break;
      case BINARY: {
        byte[] binary = decodeBinaryColumn(buf, isLast);
        row.addBinary(idx, binary);
        break;
      }
      case VARCHAR: {
        byte[] binary = decodeBinaryColumn(buf, isLast);
        row.addVarchar(idx, new String(binary, StandardCharsets.UTF_8));
        break;
      }
      case STRING: {
        byte[] binary = decodeBinaryColumn(buf, isLast);
        row.addStringUtf8(idx, binary);
        break;
      }
      case DECIMAL: {
        int scale = column.getTypeAttributes().getScale();
        int size = column.getTypeSize();
        switch (size) {
          case  DecimalUtil.DECIMAL32_SIZE:
            int intVal = buf.getInt() ^ Integer.MIN_VALUE;
            row.addDecimal(idx, BigDecimal.valueOf(intVal, scale));
            break;
          case DecimalUtil.DECIMAL64_SIZE:
            long longVal = buf.getLong() ^ Long.MIN_VALUE;
            row.addDecimal(idx, BigDecimal.valueOf(longVal, scale));
            break;
          case DecimalUtil.DECIMAL128_SIZE:
            byte[] bytes = new byte[size];
            buf.get(bytes);
            BigInteger bigIntVal = new BigInteger(bytes).xor(MIN_VALUE_128);
            row.addDecimal(idx, new BigDecimal(bigIntVal, scale));
            break;
          default:
            throw new IllegalArgumentException("Unsupported decimal type size: " + size);
        }
        break;
      }
      default:
        throw new IllegalArgumentException(String.format(
            "The column type %s is not a valid key component type",
            schema.getColumnByIndex(idx).getType()));
    }
  }

  private static byte[] decodeBinaryColumn(ByteBuffer key, boolean isLast) {
    if (isLast) {
      byte[] bytes = Arrays.copyOfRange(key.array(),
                                        key.arrayOffset() + key.position(),
                                        key.arrayOffset() + key.limit());
      key.position(key.limit());
      return bytes;
    }

    ByteVec buf = ByteVec.withCapacity(key.remaining());
    for (int i = key.position(); i < key.limit(); i++) {
      if (key.get(i) == 0) {
        switch (key.get(i + 1)) {
          case 0: {
            buf.append(key.array(),
                       key.arrayOffset() + key.position(),
                       i - key.position());
            key.position(i + 2);
            return buf.toArray();
          }
          case 1: {
            buf.append(key.array(),
                       key.arrayOffset() + key.position(),
                       i + 1 - key.position());
            i++;
            key.position(i + 1);
            break;
          }
          default: throw new IllegalArgumentException("Unexpected binary sequence");
        }
      }
    }

    buf.append(key.array(),
               key.arrayOffset() + key.position(),
               key.remaining());
    key.position(key.limit());
    return buf.toArray();
  }

  public static String formatPartitionKeyRange(Schema schema,
                                               PartitionSchema partitionSchema,
                                               byte[] lowerBound,
                                               byte[] upperBound) {
    if (partitionSchema.getRangeSchema().getColumnIds().isEmpty() &&
        partitionSchema.getHashBucketSchemas().isEmpty()) {
      assert lowerBound.length == 0 && upperBound.length == 0;
      return "";
    }

    Pair, PartialRow> lower = decodePartitionKey(schema, partitionSchema, lowerBound);
    Pair, PartialRow> upper = decodePartitionKey(schema, partitionSchema, upperBound);

    StringBuilder sb = new StringBuilder();

    List hashBuckets = lower.getFirst();
    if (!hashBuckets.isEmpty()) {
      sb.append("hash-partition-buckets: ");
      sb.append(hashBuckets);
    }

    if (!partitionSchema.getRangeSchema().getColumnIds().isEmpty()) {
      if (!hashBuckets.isEmpty()) {
        sb.append(", ");
      }

      List idxs = new ArrayList<>();
      for (int id : partitionSchema.getRangeSchema().getColumnIds()) {
        idxs.add(schema.getColumnIndex(id));
      }

      sb.append("range-partition: [");
      if (lowerBound.length > 4 * hashBuckets.size()) {
        sb.append('(');
        lower.getSecond().appendDebugString(idxs, sb);
        sb.append(')');
      } else {
        sb.append("");
      }
      sb.append(", ");
      if (upperBound.length > 4 * hashBuckets.size()) {
        sb.append('(');
        upper.getSecond().appendDebugString(idxs, sb);
        sb.append(')');
      } else {
        sb.append("");
      }
      sb.append(')');
    }
    return sb.toString();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy