All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.insight.ml.spark.data.DatumLoader Maven / Gradle / Ivy

package com.datastax.insight.ml.spark.data;

import com.datastax.insight.core.entity.Cache;
import com.datastax.insight.core.entity.Datum;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.ml.spark.data.dataset.DataSetLoader;
import com.datastax.util.io.FileUtil;
import com.google.common.base.Strings;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

import javax.validation.constraints.NotNull;
import java.io.*;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Created by huangping on 17-3-8.
 */
public class DatumLoader implements Operator {

    public static String loadDatum(String id) {
        return getDatum(id).getPath();
    }

    public static Dataset loadDatum(@NotNull String id, @NotNull String format, Boolean header, String sep) {
        String path = loadDatum(id);

        return DataSetLoader.load(format, true,path, header, sep);
    }

    public static Dataset loadFile(@NotNull String path, @NotNull String format, Boolean header, String sep) {

        return DataSetLoader.load(format, false,path, header, sep);
    }



    public static String loadFile(@NotNull String id) {
        String path = loadDatum(id);
        String absolutePath = DataSetLoader.getDataPath(path);
        String tmpPath = "/tmp/" + id;
        if (absolutePath.startsWith("hdfs:")) {
            Hdfs2Local(absolutePath,tmpPath);
            return readFileString(tmpPath);
        }
        return readFileString(absolutePath);
    }

    public static String loadFile2(@NotNull String id) {
        String path = loadDatum(id);
        String absolutePath = DataSetLoader.getDataPath(path);
        return readFileString(absolutePath);
    }

    private static Datum getDatum(String id) {
        List data = (List) Cache.getCache("datum");
        if(data != null) {
            return data.stream()
                    .filter(d->String.valueOf(d.getId()).equals(id) && !Strings.isNullOrEmpty(d.getPath()))
                    .findFirst()
                    .orElse(null);
        }
        return null;
    }

    public static String readFileString(String path) {
        StringBuffer sb = new StringBuffer();
        Iterator var2 = readFileText(path).iterator();

        while(var2.hasNext()) {
            String line = (String)var2.next();
            sb.append(line + "\r\n");
        }

        return sb.toString();
    }

    public static List readFileText(String path) {
        BufferedReader bufread = null;
        ArrayList texts = new ArrayList();

        try {
            File file = new File(path);
            if (file.exists()) {
                InputStreamReader isr = new InputStreamReader(new FileInputStream(path), "UTF-8");
                bufread = new BufferedReader(isr);
            } else {
                InputStream in = FileUtil.class.getResourceAsStream(path);
                bufread = new BufferedReader(new InputStreamReader(in, "UTF-8"));
            }

            try {
                String read;
                try {
                    while((read = bufread.readLine()) != null) {
                        texts.add(read);
                    }
                } catch (IOException var10) {
                    var10.printStackTrace();
                }
            } finally {
                if (bufread != null) {
                    bufread.close();
                }

            }
        } catch (Exception var12) {
            var12.printStackTrace();
        }

        return texts;
    }

    public static void Hdfs2Local(String src, String dst) {

        try {
            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get(URI.create(src), conf);

            Path srcPath = new Path(src);
            Path dstPath = new Path(dst);

            fs.copyToLocalFile(srcPath, dstPath);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy