net.ibizsys.dataflow.spark.eai.SparkPSSysDataSyncAgentEngine Maven / Gradle / Ivy
The newest version!
package net.ibizsys.dataflow.spark.eai;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.streaming.DataStreamReader;
import org.apache.spark.sql.streaming.DataStreamWriter;
import net.ibizsys.dataflow.core.eai.PSSysDataSyncAgentEngineBase;
import net.ibizsys.model.PSModelEnums.DataSyncAgentDir;
import net.ibizsys.model.PSModelEnums.DataSyncAgentType;
import net.ibizsys.model.engine.PSModelEngineException;
import net.ibizsys.model.engine.util.IAction;
import net.ibizsys.model.res.IPSSysDataSyncAgent;
public class SparkPSSysDataSyncAgentEngine extends PSSysDataSyncAgentEngineBase implements ISparkPSSysDataSyncAgentEngine{
@Override
public DataStreamReader getDataStreamReader(SparkSession sparkSession) {
//判断是否为输入
if(DataSyncAgentDir.IN.value.equalsIgnoreCase(this.getPSSysDataSyncAgent().getSyncDir())
|| DataSyncAgentDir.INOUT.value.equalsIgnoreCase(this.getPSSysDataSyncAgent().getSyncDir())) {
return this.executeAction("获取数据源读取对象", new IAction() {
@Override
public DataStreamReader execute(Object[] args) throws Throwable {
return onGetDataStreamReader(sparkSession);
}
}, DataStreamReader.class);
}
throw new PSModelEngineException(this, String.format("未启用输入代理功能"));
}
protected DataStreamReader onGetDataStreamReader(SparkSession sparkSession) throws Throwable{
if(DataSyncAgentType.KAFKA.value.equalsIgnoreCase(this.getAgentType())) {
return sparkSession
.readStream()
.format("kafka")
.option("kafka.bootstrap.servers", getServiceUrl())
.option("subscribe", this.getDefaultTopic())
.option("kafka.group.id", this.getGroupId()); //消费组
}
throw new Exception(String.format("未支持代理类型[%1$s]", this.getAgentType()));
}
@Override
public DataStreamWriter getDataStreamWriter(SparkSession sparkSession, Dataset dataset, Class cls) {
//判断是否为输出
if(DataSyncAgentDir.OUT.value.equalsIgnoreCase(this.getPSSysDataSyncAgent().getSyncDir())
|| DataSyncAgentDir.INOUT.value.equalsIgnoreCase(this.getPSSysDataSyncAgent().getSyncDir())) {
return this.executeAction("获取数据源写入对象", new IAction() {
@Override
public DataStreamWriter execute(Object[] args) throws Throwable {
return onGetDataStreamWriter(sparkSession, dataset, cls);
}
}, DataStreamWriter.class);
}
throw new PSModelEngineException(this, String.format("未启用输出功能"));
}
protected DataStreamWriter onGetDataStreamWriter(SparkSession sparkSession, Dataset dataset, Class cls) throws Throwable{
if(DataSyncAgentType.KAFKA.value.equalsIgnoreCase(this.getAgentType())) {
// String[] columns = dataset.columns();
// Dataset dataset2 = dataset.toJSON();
if(dataset.isStreaming()) {
return dataset.writeStream()
.format("kafka")
.option("kafka.bootstrap.servers", getServiceUrl())
.option("topic", this.getDefaultTopic());
//.option("checkpointLocation", "/path/to/checkpoint") // 设置检查点目录
}
else {
dataset.write()
.format("kafka")
.option("kafka.bootstrap.servers", getServiceUrl())
.option("topic", this.getDefaultTopic())
.save();
return null;
}
}
throw new Exception(String.format("未支持代理类型[%1$s]", this.getAgentType()));
}
}