org.streampipes.wrapper.spark.SparkRuntime Maven / Gradle / Ivy
/*
* Copyright 2018 FZI Forschungszentrum Informatik
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.streampipes.wrapper.spark;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.spark.SparkConf;
import org.apache.spark.launcher.SparkAppHandle;
import org.apache.spark.launcher.SparkLauncher;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.streampipes.model.SpDataStream;
import org.streampipes.model.base.InvocableStreamPipesEntity;
import org.streampipes.model.grounding.KafkaTransportProtocol;
import org.streampipes.wrapper.spark.converter.JsonToMapFormat;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
public abstract class SparkRuntime implements Runnable, Serializable {
private static final long serialVersionUID = 1L;
protected final SparkDeploymentConfig deploymentConfig;
protected Thread thread;
protected SparkAppHandle appHandle;
protected SparkLauncher launcher;
protected JavaStreamingContext streamingContext;
protected I graph;
protected Map kafkaParams;
public SparkRuntime(I graph, SparkDeploymentConfig deploymentConfig) {
this.graph = graph;
this.deploymentConfig = deploymentConfig;
kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", this.deploymentConfig.getKafkaHost());
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("key.serializer", StringSerializer.class);
kafkaParams.put("value.serializer", StringSerializer.class);
kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("enable.auto.commit", false);
}
public boolean startExecution() {
if (this.deploymentConfig.isRunLocal()) {
try {
SparkConf conf = new SparkConf().setAppName(this.deploymentConfig.getAppName())
.setMaster(this.deploymentConfig.getSparkHost());
streamingContext = new JavaStreamingContext(conf, new Duration(this.deploymentConfig.getSparkBatchDuration()));
JavaDStream