org.apache.spark.streaming.SparkStreamingOperator.scala Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of streamingpro-mlsql-spark_2.4 Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.streaming

import java.util.Map

import _root_.kafka.common.TopicAndPartition
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.DirectKafkaInputDStream
import streaming.core.compositor.spark.streaming.ck.{DirectKafkaRecoverSource, TestInputStreamRecoverSource}
import streaming.core.strategy.platform.{RuntimeOperator, SparkStreamingRuntime}

import scala.collection.mutable.ArrayBuffer

/**
  * 5/3/16 WilliamZhu([email protected])
  */
class SparkStreamingOperator(_ssr: SparkStreamingRuntime) extends RuntimeOperator {
  val ssr = _ssr
  val ssc = ssr.streamingContext

  def inputStreamId(index: Int) = {
    ssc.graph.getInputStreams()(index).id
  }

  def directKafkaRecoverSource = {
    new DirectKafkaRecoverSource(this)
  }

  def testInputRecoverSource = {
    new TestInputStreamRecoverSource(this)
  }


  def inputDStreams = {
    ssc.graph.getInputStreams()
  }

  def inputTrackerMeta(time: Time) = {
    ssc.scheduler.inputInfoTracker.getInfo(time)
  }

  def directKafkaDStreamsMap = {
    inputDStreams.filter { is =>
      is.isInstanceOf[DirectKafkaInputDStream[_, _, _, _, _]]
    }.map(f => f.id).toSet
  }


  def isStreamingCanStop() = {
    ssc.scheduler.getPendingTimes().size == 0
  }

  def snapShotInputStreamState() = {
    val buffer = new ArrayBuffer[(Int, Any)]()
    ssc.graph.getInputStreams().foreach { inputDStream =>
      inputDStream match {
        case dkid: DirectKafkaInputDStream[_, _, _, _, _] =>
          val field = classOf[DirectKafkaInputDStream[_, _, _, _, _]].getDeclaredField("currentOffset")
          field.setAccessible(true)
          val currentOffset = field.get(dkid).asInstanceOf[Map[TopicAndPartition, Long]]
          buffer += ((dkid.id, currentOffset))
        case test: TestInputStream[_] =>
          val field = classOf[TestInputStream[_]].getDeclaredField("currentOffset")
          field.setAccessible(true)
          val currentOffset = field.get(test).asInstanceOf[Int]
          buffer += ((test.id, currentOffset))
        case _ =>
      }

    }
    buffer.toMap
  }

  def setInputStreamState(inputId: Int, state: Any) = {
    val inputDStream = ssc.graph.getInputStreams().filter(f => f.id == inputId).head
    inputDStream match {
      case dkid: DirectKafkaInputDStream[_, _, _, _, _] =>
        val field = classOf[DirectKafkaInputDStream[_, _, _, _, _]].getDeclaredField("currentOffsets")
        field.setAccessible(true)
        field.set(dkid, state)
      case test: TestInputStream[_] =>
        val field = classOf[TestInputStream[_]].getDeclaredField("currentOffset")
        field.setAccessible(true)
        field.set(test, state)
      case _ =>
    }
  }

  def setInputStreamState(inputDStream: InputDStream[(String, String)], state: Any) = {
    inputDStream match {
      case dkid: DirectKafkaInputDStream[_, _, _, _, _] =>
        val field = classOf[DirectKafkaInputDStream[_, _, _, _, _]].getDeclaredField("currentOffsets")
        field.setAccessible(true)
        field.set(dkid, state)
      case test: TestInputStream[_] =>
        val field = classOf[TestInputStream[_]].getDeclaredField("currentOffset")
        field.setAccessible(true)
        field.set(test, state)
      case _ =>
    }
  }
}