All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kinesis4cats.producer.Record.scala Maven / Gradle / Ivy

There is a newer version: 0.0.32
Show newest version
/*
 * Copyright 2023-2023 etspaceman
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kinesis4cats.producer

import java.math.BigInteger
import java.nio.charset.StandardCharsets

import cats.Eq
import cats.syntax.all._
import com.google.protobuf.ByteString

import kinesis4cats.Utils
import kinesis4cats.models.ShardId
import kinesis4cats.protobuf.messages

final case class Record(
    data: Array[Byte],
    partitionKey: String,
    explicitHashKey: Option[String] = None,
    sequenceNumberForOrdering: Option[String] = None
) {

  private val partitionKeyBytes = partitionKey.getBytes(StandardCharsets.UTF_8)
  private val partitionKeyLength = partitionKeyBytes.length

  private[kinesis4cats] val payloadSize: Int =
    partitionKeyLength + data.length

  private[kinesis4cats] def isValidPayloadSize(payloadSizeLimit: Int) =
    payloadSize <= payloadSizeLimit

  private[kinesis4cats] def isValidPartitionKey(
      partitionKeyMin: Int,
      partitionKeyMax: Int
  ) =
    partitionKeyLength <= partitionKeyMax && partitionKeyMin <= partitionKeyLength

  private[kinesis4cats] def isValidExplicitHashKey = explicitHashKey.forall {
    case ehs =>
      val b = BigInt(ehs)
      b.compareTo(Record.unit128Max) <= 0 &&
      b.compareTo(BigInt(BigInteger.ZERO)) >= 0
  }

  private[kinesis4cats] def isValid(
      payloadSizeLimit: Int,
      partitionKeyMin: Int,
      partitionKeyMax: Int
  ) = isValidPayloadSize(payloadSizeLimit) && isValidPartitionKey(
    partitionKeyMin,
    partitionKeyMax
  ) && isValidExplicitHashKey
}

object Record {
  implicit val recordEq: Eq[Record] = (x, y) =>
    x.data.sameElements(y.data) &&
      x.explicitHashKey === y.explicitHashKey &&
      x.partitionKey === y.partitionKey &&
      x.sequenceNumberForOrdering === y.sequenceNumberForOrdering

  private val unit128Max = BigInt(List.fill(16)("FF").mkString, 16)

  // See https://github.com/awslabs/kinesis-aggregation/blob/2.0.3/java/KinesisAggregatorV2/src/main/java/com/amazonaws/kinesis/agg/AggRecord.java#L280
  // shift the value right one bit at a time until
  // there are no more '1' bits left...this counts
  // how many bits we need to represent the number
  @annotation.tailrec
  private def getBitsNeeded(value: Int, bitsNeeded: Int = 0): Int =
    if (value <= 0) bitsNeeded
    else getBitsNeeded(value >> 1, bitsNeeded + 1)

  // See https://github.com/awslabs/kinesis-aggregation/blob/2.0.3/java/KinesisAggregatorV2/src/main/java/com/amazonaws/kinesis/agg/AggRecord.java#L280
  private def calculateVarIntSize(value: Int): Int = {
    val bitsNeeded = if (value == 0) 1 else getBitsNeeded(value)
    val varintBytes = bitsNeeded / 7

    if (bitsNeeded % 7 > 0) varintBytes + 1
    else varintBytes
  }

  private[kinesis4cats] final case class WithShard(
      record: Record,
      predictedShard: ShardId
  ) {

    // See https://github.com/awslabs/kinesis-aggregation/blob/2.0.3/java/KinesisAggregatorV2/src/main/java/com/amazonaws/kinesis/agg/AggRecord.java#L467
    def getExplicitHashKey: String = record.explicitHashKey.getOrElse {
      var hashKey = BigInt(BigInteger.ZERO) // scalafix:ok
      val pkDigest = Utils.md5(record.partitionKeyBytes)

      for (i <- 0 until 16) {
        val p = BigInt(String.valueOf(pkDigest(i).toInt & 0xff))
        val shifted = p << ((16 - i - 1) * 8)
        hashKey = hashKey + shifted
      }

      hashKey.toString(10)
    }

    def asAggregationEntry(
        currentPartitionKeys: Map[String, Int],
        currentExplicitHashKeys: Map[String, Int]
    ): AggregationEntry = {
      val ehk = getExplicitHashKey

      val partitionKeyIndex = currentPartitionKeys.getOrElse(
        record.partitionKey,
        currentPartitionKeys.size
      )
      val explicitHashKeyIndex = currentExplicitHashKeys.getOrElse(
        ehk,
        currentExplicitHashKeys.size
      )
      AggregationEntry(record, ehk, partitionKeyIndex, explicitHashKeyIndex)
    }

    def aggregatedPayloadSize(
        currentPartitionKeys: Map[String, Int],
        currentExplicitHashKeys: Map[String, Int]
    ): Int =
      asAggregationEntry(currentPartitionKeys, currentExplicitHashKeys)
        .aggregatedPayloadSize(currentPartitionKeys, currentExplicitHashKeys)
  }

  private[kinesis4cats] object WithShard {
    def fromOption(record: Record, predictedShard: Option[ShardId]) =
      WithShard(record, predictedShard.getOrElse(ShardId("DEFAULT")))
  }

  private[kinesis4cats] final case class AggregationEntry(
      record: Record,
      explicitHashKey: String,
      partitionKeyTableIndex: Int,
      explicitHashKeyTableIndex: Int
  ) {

    // See https://github.com/awslabs/kinesis-aggregation/blob/2.0.3/java/KinesisAggregatorV2/src/main/java/com/amazonaws/kinesis/agg/AggRecord.java#L221
    def aggregatedPayloadSize(
        currentPartitionKeys: Map[String, Int],
        currentExplicitHashKeys: Map[String, Int]
    ): Int = {
      val pkSize = if (!currentPartitionKeys.contains(record.partitionKey)) {
        val pkLength = record.partitionKeyLength
        1 + Record.calculateVarIntSize(pkLength) + pkLength
      } else 0

      val explicitHashKeySize =
        if (!currentExplicitHashKeys.contains(explicitHashKey)) {
          val ehkLength =
            explicitHashKey.getBytes(StandardCharsets.UTF_8).length
          1 + Record.calculateVarIntSize(ehkLength) + ehkLength
        } else 0

      val innerRecordSize = {

        val pkIndexSize = Record.calculateVarIntSize(
          currentPartitionKeys.getOrElse(
            record.partitionKey,
            currentPartitionKeys.size
          )
        ) + 1

        val ehkIndexSize = Record.calculateVarIntSize(
          currentExplicitHashKeys.getOrElse(
            explicitHashKey,
            currentExplicitHashKeys.size
          )
        ) + 1

        val dataSize =
          Record.calculateVarIntSize(record.data.length) +
            record.data.length + 1

        val combined = pkIndexSize + ehkIndexSize + dataSize

        1 + Record.calculateVarIntSize(combined) + combined
      }
      pkSize + explicitHashKeySize + innerRecordSize
    }

    def asEntry: messages.Record = messages.Record(
      partitionKeyTableIndex.toLong,
      Some(explicitHashKeyTableIndex.toLong),
      ByteString.copyFrom(record.data)
    )
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy