com.datastax.insight.ml.spark.data.DatumLoader Maven / Gradle / Ivy
package com.datastax.insight.ml.spark.data;
import com.datastax.insight.core.entity.Cache;
import com.datastax.insight.core.entity.Datum;
import com.datastax.insight.spec.Operator;
import com.datastax.insight.ml.spark.data.dataset.DataSetLoader;
import com.datastax.util.io.FileUtil;
import com.google.common.base.Strings;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import javax.validation.constraints.NotNull;
import java.io.*;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* Created by huangping on 17-3-8.
*/
public class DatumLoader implements Operator {
public static String loadDatum(String id) {
return getDatum(id).getPath();
}
public static Dataset loadDatum(@NotNull String id, @NotNull String format, Boolean header, String sep) {
String path = loadDatum(id);
return DataSetLoader.load(format, true,path, header, sep);
}
public static Dataset loadFile(@NotNull String path, @NotNull String format, Boolean header, String sep) {
return DataSetLoader.load(format, false,path, header, sep);
}
public static String loadFile(@NotNull String id) {
String path = loadDatum(id);
String absolutePath = DataSetLoader.getDataPath(path);
String tmpPath = "/tmp/" + id;
if (absolutePath.startsWith("hdfs:")) {
Hdfs2Local(absolutePath,tmpPath);
return readFileString(tmpPath);
}
return readFileString(absolutePath);
}
public static String loadFile2(@NotNull String id) {
String path = loadDatum(id);
String absolutePath = DataSetLoader.getDataPath(path);
return readFileString(absolutePath);
}
private static Datum getDatum(String id) {
List data = (List) Cache.getCache("datum");
if(data != null) {
return data.stream()
.filter(d->String.valueOf(d.getId()).equals(id) && !Strings.isNullOrEmpty(d.getPath()))
.findFirst()
.orElse(null);
}
return null;
}
public static String readFileString(String path) {
StringBuffer sb = new StringBuffer();
Iterator var2 = readFileText(path).iterator();
while(var2.hasNext()) {
String line = (String)var2.next();
sb.append(line + "\r\n");
}
return sb.toString();
}
public static List readFileText(String path) {
BufferedReader bufread = null;
ArrayList texts = new ArrayList();
try {
File file = new File(path);
if (file.exists()) {
InputStreamReader isr = new InputStreamReader(new FileInputStream(path), "UTF-8");
bufread = new BufferedReader(isr);
} else {
InputStream in = FileUtil.class.getResourceAsStream(path);
bufread = new BufferedReader(new InputStreamReader(in, "UTF-8"));
}
try {
String read;
try {
while((read = bufread.readLine()) != null) {
texts.add(read);
}
} catch (IOException var10) {
var10.printStackTrace();
}
} finally {
if (bufread != null) {
bufread.close();
}
}
} catch (Exception var12) {
var12.printStackTrace();
}
return texts;
}
public static void Hdfs2Local(String src, String dst) {
try {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(src), conf);
Path srcPath = new Path(src);
Path dstPath = new Path(dst);
fs.copyToLocalFile(srcPath, dstPath);
} catch (IOException e) {
e.printStackTrace();
}
}
}