
talystcode.streaming-reddit_2.11.0.0.2.source-code.RedditDemoSpark.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of streaming-reddit_2.11 Show documentation
Show all versions of streaming-reddit_2.11 Show documentation
A library for reading public web postings from Reddit using Spark Streaming.
The newest version!
import com.github.catalystcode.fortis.spark.streaming.reddit.{RedditAuth, RedditUtils}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
class RedditDemoSpark(auth: RedditAuth) {
def run(): Unit = {
// set up the spark context and streams
val conf = new SparkConf().setAppName("Reddit Application").setIfMissing("spark.master", "local[*]")
val sc = new SparkContext(conf)
val ssc = new StreamingContext(sc, Seconds(1))
val keywordSet = List("healthcare")
RedditUtils.createPageStream(auth, keywordSet, ssc, pollingPeriodInSeconds=10).map(x => s"Post: ${x}").print()
// run forever
ssc.start()
ssc.awaitTermination()
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy