All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.azure.cosmos.spark.TransientIOErrorsRetryingReadManyIterator.scala Maven / Gradle / Ivy

// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.azure.cosmos.spark

import com.azure.cosmos.{CosmosAsyncContainer, CosmosEndToEndOperationLatencyPolicyConfigBuilder}
import com.azure.cosmos.implementation.{ImplementationBridgeHelpers, OperationCancelledException}
import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple
import com.azure.cosmos.models.{CosmosItemIdentity, CosmosReadManyRequestOptions}
import com.azure.cosmos.spark.diagnostics.BasicLoggingTrait

import java.time.Duration
import java.util.concurrent.{ExecutorService, SynchronousQueue, ThreadPoolExecutor, TimeUnit, TimeoutException}
import scala.concurrent.{Await, ExecutionContext, Future}

// scalastyle:off underscore.import
import scala.collection.JavaConverters._
// scalastyle:on underscore.import

private[spark] class TransientIOErrorsRetryingReadManyIterator[TSparkRow]
(
  val container: CosmosAsyncContainer,
  val readManyFilterList: Iterator[CosmosItemIdentity],
  val queryOptions: CosmosReadManyRequestOptions,
  val pageSize: Int,
  val operationContextAndListener: Option[OperationContextAndListenerTuple],
  val classType: Class[TSparkRow]
) extends BufferedIterator[TSparkRow] with BasicLoggingTrait with AutoCloseable {

  private val maxPageRetrievalTimeout = scala.concurrent.duration.FiniteDuration(
    5 + CosmosConstants.readOperationEndToEndTimeoutInSeconds,
    scala.concurrent.duration.SECONDS)
  private val queryOptionsWithEnd2EndTimeout = queryOptions.setCosmosEndToEndOperationLatencyPolicyConfig(
    new CosmosEndToEndOperationLatencyPolicyConfigBuilder(
        java.time.Duration.ofSeconds(CosmosConstants.readOperationEndToEndTimeoutInSeconds)
      )
      .enable(true)
      .build
  )
  private[spark] var currentItemIterator: Option[BufferedIterator[TSparkRow]] = None
  private val readManyFilterBatchIterator = readManyFilterList.grouped(pageSize)

  override def hasNext: Boolean = {
    if (hasBufferedNext) {
      true
    } else {
      hasNextInternal
    }
  }

  /** *
   * Checks whether more records exists - this will potentially trigger I/O operations and retries
   *
   * @return true (more records exist), false (no more records exist), None (unknown call should be repeated)
   */
  private def hasNextInternal: Boolean = {
    var returnValue: Option[Boolean] = None

    while (returnValue.isEmpty) {
      if (readManyFilterBatchIterator.hasNext) {
        // fetch items for the next readMany filter batch
        val readManyFilterBatch = readManyFilterBatchIterator.next()
        returnValue =
          TransientErrorsRetryPolicy.executeWithRetry(
            () => hasNextInternalCore(readManyFilterBatch),
            statusResetFuncBetweenRetry = Some(() => { currentItemIterator = None })
          )
      } else {
        returnValue = Some(false)
      }
    }

    returnValue.get
  }

  /** *
   * Checks whether more records exists - this will potentially trigger I/O operations and retries
   *
   * @return true (more records exist), false (no more records exist), None (unknown call should be repeated)
   */
  private def hasNextInternalCore(readManyFilterList: List[CosmosItemIdentity]): Option[Boolean] = {
    val feedResponse = try {
      Await.result(
        Future {
          ImplementationBridgeHelpers
            .CosmosAsyncContainerHelper
            .getCosmosAsyncContainerAccessor
            .readMany(container, readManyFilterList.asJava, queryOptionsWithEnd2EndTimeout, classType)
            .block()
        }(TransientIOErrorsRetryingReadManyIterator.executionContext),
        maxPageRetrievalTimeout)
    } catch {
      case endToEndTimeoutException: OperationCancelledException =>
        val operationContextString = operationContextAndListener match {
          case Some(o) => if (o.getOperationContext != null) {
            o.getOperationContext.toString
          } else {
            "n/a"
          }
          case None => "n/a"
        }

        val message = s"End-to-end timeout hit when trying to retrieve the next page. Filter: " +
          s"$readManyFilterList, Context: $operationContextString"

        logError(message, throwable = endToEndTimeoutException)

        throw endToEndTimeoutException
      case timeoutException: TimeoutException =>

        val operationContextString = operationContextAndListener match {
          case Some(o) => if (o.getOperationContext != null) {
            o.getOperationContext.toString
          } else {
            "n/a"
          }
          case None => "n/a"
        }

        val message = s"Attempting to retrieve the next page timed out. Filter: " +
          s"$readManyFilterList, Context: $operationContextString"

        logError(message, timeoutException)

        val exception = new OperationCancelledException(
          message,
          null
        );
        exception.setStackTrace(timeoutException.getStackTrace());
        throw exception

      case other: Throwable => throw other
    }

    if (operationContextAndListener.isDefined) {
      operationContextAndListener.get.getOperationListener.feedResponseProcessedListener(
        operationContextAndListener.get.getOperationContext,
        feedResponse)
    }

    val iteratorCandidate = feedResponse.getResults.iterator().asScala.buffered

    if (iteratorCandidate.hasNext) {
      currentItemIterator = Some(iteratorCandidate)
      Some(true)
    } else {
      // empty page interleaved
      // need to get attempt to get next FeedResponse to determine whether more records exist
      None
    }
  }

  private def hasBufferedNext: Boolean = {
    currentItemIterator match {
      case Some(iterator) => if (iterator.hasNext) {
        true
      } else {
        currentItemIterator = None
        false
      }
      case None => false
    }
  }

  override def next(): TSparkRow = {
    currentItemIterator.get.next()
  }

  override def head(): TSparkRow = {
    currentItemIterator.get.head
  }

  override def close(): Unit = {}
}

private object TransientIOErrorsRetryingReadManyIterator extends BasicLoggingTrait {
  private val maxConcurrency = SparkUtils.getNumberOfHostCPUCores

  val executorService: ExecutorService = new ThreadPoolExecutor(
    maxConcurrency,
    maxConcurrency,
    0L,
    TimeUnit.MILLISECONDS,
    // A synchronous queue does not have any internal capacity, not even a capacity of one.
    new SynchronousQueue(),
    SparkUtils.daemonThreadFactory(),
    // if all worker threads are busy,
    // this policy makes the caller thread execute the task.
    // This provides a simple feedback control mechanism that will slow down the rate that new tasks are submitted.
    new ThreadPoolExecutor.CallerRunsPolicy()
  )

  val executionContext = ExecutionContext.fromExecutorService(executorService)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy