com.datastax.insight.ml.spark.data.dataset.DataSetWriter Maven / Gradle / Ivy
The newest version!
package com.datastax.insight.ml.spark.data.dataset;
import com.datastax.insight.core.entity.Cache;
import com.datastax.insight.core.entity.DBSource;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.core.service.PersistService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import java.net.URI;
import java.util.List;
import java.util.Properties;
public class DataSetWriter implements Operator {
public static void save(Dataset dataset, String format, String mode, String path) throws Exception {
save(dataset, format, mode, path, true);
}
public static void save(Dataset dataset, String format, String mode, String path, boolean withHeader) throws Exception {
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(new URI(path), conf);
String temp = new Path(hdfs.getWorkingDirectory(), "temp_" + String.valueOf(System.currentTimeMillis())).toString();
DataFrameWriter writer = dataset.write();
if(withHeader) {
writer.option("header", true);
}
writer.format(format).mode(mode).save(temp);
if(hdfs.exists(new Path(path))) {
hdfs.delete(new Path(path), true);
}
FileUtil.copyMerge(hdfs, new Path(temp), hdfs, new Path(path), true, conf, null);
PersistService.invoke("com.datastax.insight.agent.dao.InsightDAO",
"saveFlowResult",
new String[]{Long.class.getTypeName(), String.class.getTypeName()},
new Object[]{PersistService.getFlowId(), path});
}
public static void write(Dataset dataset, String format, String mode, String path, boolean withHeader) throws Exception {
DataFrameWriter writer = dataset.write();
if(withHeader) {
writer.option("header", true);
}
writer.format(format).mode(mode).save(path);
}
public static void jdbc(Dataset dataset, String mode, DBSource dbSource, String table) {
if(dbSource!=null) {
Properties properties=new Properties();
properties.put("driver",dbSource.getDriver());
properties.put("user",dbSource.getUser());
properties.put("password",dbSource.getPassword());
//System.out.println(dbSource.getUrl()+"==>"+dbSource.getUser());
dataset.write().mode(mode).jdbc(dbSource.getUrl(), table, properties);
}
}
public static void jdbc(Dataset dataset, String mode, String dbID,String table) {
DBSource dbSource=getDBSource(dbID);
jdbc(dataset,mode,dbSource,table);
}
private static DBSource getDBSource(String id) {
List dbSourceList = (List) Cache.getCache("dbsources");
if(dbSourceList != null) {
return dbSourceList.stream().filter(d->d.getId() == Integer.parseInt(id)).
findFirst().orElse(null);
}
return null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy