All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.azure.cosmos.spark.ChangeFeedOffset.scala Maven / Gradle / Ivy

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.spark

import com.azure.cosmos.spark.ChangeFeedOffset.{IdPropertyName, InputPartitionsPropertyName, StatePropertyName, V1Identifier}
import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
import com.fasterxml.jackson.databind.node.ArrayNode
import org.apache.spark.sql.connector.read.streaming.{Offset, PartitionOffset}

import scala.collection.mutable.ArrayBuffer

// scalastyle:off underscore.import
import scala.collection.JavaConverters._
// scalastyle:on underscore.import

private[cosmos] case class ChangeFeedOffset
(
  changeFeedState: String,
  inputPartitions: Option[Array[CosmosInputPartition]]
) extends Offset
  with Serializable
  with PartitionOffset {

  @transient private lazy val jsonPersisted = inputPartitions match {
    case Some(partitions) =>
      val partitionsJson = String.join(",", partitions.map(p => raw"""${p.json()}""" ).toList.asJava)
      raw"""{"$IdPropertyName":"$V1Identifier",""" +
        raw""""$StatePropertyName":"$changeFeedState",""" +
        raw""""$InputPartitionsPropertyName":[$partitionsJson]}"""
    case None => raw"""{"$IdPropertyName":"$V1Identifier","$StatePropertyName":"$changeFeedState"}"""
  }

  override def json(): String = jsonPersisted
}

private[cosmos] object ChangeFeedOffset {
  private val IdPropertyName: String = "id"
  private val StatePropertyName: String = "state"
  private val InputPartitionsPropertyName: String = "partitions"
  val V1Identifier: String = "spark.cosmos.changeFeed.offset.v1"
  val V1AlternateIdentifier: String = "azure_cosmos_spark.com.azure.cosmos.spark.changeFeed.offset.v1"
  private val objectMapper = new ObjectMapper()

  def fromJson(json: String): ChangeFeedOffset = {
    val parsedNode = objectMapper.readTree(json)
    if (isValidJson(parsedNode)) {

      // Input partitions are serialized here to avoid having to calculate the latest LSN again
      // We need the latest LSN to calculate the endOffset/latestOffset - so we calculate
      // the input partitions already and pass it via the end offset to planInputPartitions call
      val inputPartitions = if (parsedNode.get(InputPartitionsPropertyName) != null &&
        parsedNode.get(InputPartitionsPropertyName).isArray) {
        val arrayNode = parsedNode.get(InputPartitionsPropertyName).asInstanceOf[ArrayNode]
        val inputPartitions = ArrayBuffer[CosmosInputPartition]()
        for (i <- 0 until arrayNode.size) {
          inputPartitions += CosmosInputPartition.fromJson(arrayNode.get(i))
        }
        Some(inputPartitions.toArray)
      } else {
        None
      }

      ChangeFeedOffset(parsedNode.get(StatePropertyName).asText, inputPartitions)
    } else {
      val message = s"Unable to deserialize offset '$json'."
      throw new IllegalArgumentException(message)
    }
  }

  private[this] def isValidJson(parsedNode: JsonNode): Boolean = {
    parsedNode != null &&
      parsedNode.isObject &&
      parsedNode.get(IdPropertyName) != null &&
      (parsedNode.get(IdPropertyName).asText("") == V1Identifier ||
        parsedNode.get(IdPropertyName).asText("") == V1AlternateIdentifier) &&
      parsedNode.get(StatePropertyName) != null &&
      parsedNode.get(StatePropertyName).isTextual &&
      parsedNode.get(StatePropertyName).asText("") != ""
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy