All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.azure.cosmos.spark.udf.CreateSpark2ContinuationsFromChangeFeedOffset.scala Maven / Gradle / Ivy

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.azure.cosmos.spark.udf

import com.azure.cosmos.implementation.SparkBridgeImplementationInternal
import com.azure.cosmos.implementation.SparkBridgeImplementationInternal.rangeToNormalizedRange
import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState
import com.azure.cosmos.implementation.query.CompositeContinuationToken
import com.azure.cosmos.spark.{ChangeFeedOffset, CosmosClientCache, CosmosClientCacheItem, CosmosClientConfiguration, CosmosConfig, CosmosContainerConfig, CosmosReadConfig, Loan}
import com.azure.cosmos.{CosmosAsyncClient, SparkBridgeInternal}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.api.java.UDF2

import scala.collection.mutable

@SerialVersionUID(1L)
class CreateSpark2ContinuationsFromChangeFeedOffset extends UDF2[Map[String, String], String, Map[Int, Long]] {
  override def call
  (
    userProvidedConfig: Map[String, String],
    changeFeedOffset: String
  ): Map[Int, Long] = {

    val effectiveUserConfig = CosmosConfig.getEffectiveConfig(None, None, userProvidedConfig)
    val readConfig = CosmosReadConfig.parseCosmosReadConfig(effectiveUserConfig)
    val cosmosClientConfig = CosmosClientConfiguration(
      effectiveUserConfig,
      useEventualConsistency = readConfig.forceEventualConsistency,
      CosmosClientConfiguration.getSparkEnvironmentInfo(SparkSession.getActiveSession))

    val cosmosContainerConfig: CosmosContainerConfig =
      CosmosContainerConfig.parseCosmosContainerConfig(effectiveUserConfig, None, None)

    Loan(
      List[Option[CosmosClientCacheItem]](
        Some(CosmosClientCache(
          cosmosClientConfig,
          None,
          s"UDF CreateSpark2ContinuationsFromChangeFeedOffset"
        ))
      ))
      .to(cosmosClientCacheItems => {
        createSpark2ContinuationsFromChangeFeedOffset(
          cosmosClientCacheItems.head.get.cosmosClient,
          cosmosContainerConfig.database,
          cosmosContainerConfig.container,
          changeFeedOffset
        )
      })
  }

  private[this] def createSpark2ContinuationsFromChangeFeedOffset
  (
    client: CosmosAsyncClient,
    databaseName: String,
    containerName: String,
    offsetJson: String
  ): Map[Int, Long] = {

    val effectiveOffsetJson = if (offsetJson.indexOf("\n") == 2 && offsetJson.size > 2) {
      offsetJson.substring(3)
    } else {
      offsetJson
    }
    val offset: ChangeFeedOffset = ChangeFeedOffset.fromJson(effectiveOffsetJson)

    val container = client
      .getDatabase(databaseName)
      .getContainer(containerName)

    val expectedContainerResourceId = SparkBridgeInternal
      .getContainerPropertiesFromCollectionCache(container)
      .getResourceId

    val pkRanges = SparkBridgeInternal
      .getPartitionKeyRanges(container)

    val lsnsByPkRangeId = mutable.Map[Int, Long]()

    pkRanges
      .foreach(pkRange => {
        val normalizedRange = rangeToNormalizedRange(pkRange.toRange)

        val effectiveChangeFeedState = ChangeFeedState
          .fromString(
            SparkBridgeImplementationInternal
              .extractChangeFeedStateForRange(offset.changeFeedState, normalizedRange)
          )

        val containerResourceId = effectiveChangeFeedState.getContainerRid

        if (!expectedContainerResourceId.equalsIgnoreCase(containerResourceId)) {
          throw new IllegalArgumentException(
            s"The provided change feed offset is for a different container (either completely different container " +
              s"or container with same name but after being deleted and recreated). Name:$containerName " +
              s"Expected ResourceId: $expectedContainerResourceId, " +
              s"Actual ResourceId: $containerResourceId"
          )
        }

        var minLsn: Option[CompositeContinuationToken] = None

        effectiveChangeFeedState
          .extractContinuationTokens()
          .forEach(token => {

            if (minLsn.isEmpty) {
              minLsn = Some(token)
            } else if (SparkBridgeImplementationInternal.toLsn(token.getToken) <
              SparkBridgeImplementationInternal.toLsn(minLsn.get.getToken)) {
              minLsn = Some(token)
            }
          })

        if (minLsn.isDefined) {
          lsnsByPkRangeId.put(
            pkRange.getId.toInt,
            Math.max(0, SparkBridgeImplementationInternal.toLsn(minLsn.get.getToken)))
        }
      })

    lsnsByPkRangeId.toMap
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy