All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.insight.ml.spark.data.dataset.DataSetWriter Maven / Gradle / Ivy

The newest version!
package com.datastax.insight.ml.spark.data.dataset;

import com.datastax.insight.core.entity.Cache;
import com.datastax.insight.core.entity.DBSource;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.core.service.PersistService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;

import java.net.URI;
import java.util.List;
import java.util.Properties;

public class DataSetWriter implements Operator {
    public static  void save(Dataset dataset, String format, String mode, String path) throws Exception {
        save(dataset, format, mode, path, true);
    }

    public static  void save(Dataset dataset, String format, String mode, String path, boolean withHeader) throws Exception {

        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(new URI(path), conf);
        String temp = new Path(hdfs.getWorkingDirectory(), "temp_" + String.valueOf(System.currentTimeMillis())).toString();
        DataFrameWriter writer = dataset.write();

        if(withHeader) {
            writer.option("header", true);
        }

        writer.format(format).mode(mode).save(temp);

        if(hdfs.exists(new Path(path))) {
            hdfs.delete(new Path(path), true);
        }

        FileUtil.copyMerge(hdfs, new Path(temp), hdfs, new Path(path), true, conf, null);

        PersistService.invoke("com.datastax.insight.agent.dao.InsightDAO",
                "saveFlowResult",
                new String[]{Long.class.getTypeName(), String.class.getTypeName()},
                new Object[]{PersistService.getFlowId(), path});
    }

    public static  void write(Dataset dataset, String format, String mode, String path, boolean withHeader) throws Exception {
        DataFrameWriter writer = dataset.write();
        if(withHeader) {
            writer.option("header", true);
        }
        writer.format(format).mode(mode).save(path);
    }

    public static  void jdbc(Dataset dataset, String mode, DBSource dbSource, String table) {
        if(dbSource!=null) {
            Properties properties=new Properties();
            properties.put("driver",dbSource.getDriver());
            properties.put("user",dbSource.getUser());
            properties.put("password",dbSource.getPassword());

            //System.out.println(dbSource.getUrl()+"==>"+dbSource.getUser());
            dataset.write().mode(mode).jdbc(dbSource.getUrl(), table, properties);
        }
    }

    public static  void jdbc(Dataset dataset, String mode, String dbID,String table) {
        DBSource dbSource=getDBSource(dbID);
        jdbc(dataset,mode,dbSource,table);
    }

    private static DBSource getDBSource(String id) {
        List dbSourceList = (List) Cache.getCache("dbsources");
        if(dbSourceList != null) {
            return dbSourceList.stream().filter(d->d.getId() == Integer.parseInt(id)).
                    findFirst().orElse(null);
        }
        return null;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy