com.datastax.data.exploration.common.File2DataTable Maven / Gradle / Ivy
package com.datastax.data.exploration.common;
import com.csvreader.CsvReader;
import com.datastax.data.exploration.biz.datatable.DataTable;
import com.datastax.data.exploration.biz.datatable.DataType;
import com.datastax.data.exploration.util.Consts;
import org.javatuples.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
public class File2DataTable {
private static final Logger logger = LoggerFactory.getLogger(File2DataTable.class);
/**
* 读取抽样后的完整数据集
* @param id 数据集id
* @return DataTable
*/
public static DataTable entireTable(String id) {
return table(id, FileUrl.entireData(id));
}
/**
* 读取抽样后的准确数据集
* @param id 数据集id
* @return DataTable
*/
public static DataTable exactTable(String id) {
return table(id, FileUrl.exactData(id));
}
/**
* 读取抽样后的错误数据集
* @param id 数据集id
* @return DataTable
*/
public static DataTable errorTable(String id) {
return table(id, FileUrl.errorData(id));
}
/**
* 读取抽样后的数据集
* @param id 数据集id
* @param filePath 数据集路径
* @return DataTable
*/
private static DataTable table(String id, String filePath) {
DataTable table = tableSchema(id);
if (table == null) return null;
List data = data(filePath);
if (data == null) return null;
//删除表头
//data.remove(0);
try {
for (String[] line : data) {
table.getRows().add(line);
}
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return table;
}
/**
* 返回数据结构
* @param id 数据集id
* @return 返回的是数据集表头
*/
public static DataTable tableSchema(String id) {
DataTable table = new DataTable();
table.setTableName(id);
Pair headers = DataOperator.readTitleAndType(id);
if (headers == null) return null;
String[] titles = headers.getValue0();
DataType[] dateTypes = headers.getValue1();
for (int i = 0; i < titles.length; i++) {
table.getColumns().add(titles[i], dateTypes[i]);
}
return table;
}
/**
* 读取数据集数据
* @param filePath 数据路径
* @return 数据集数据
*/
private static List data(String filePath) {
File file = new File(filePath);
List data = new ArrayList<>();
try {
if (!file.exists()) {
logger.info(file.getCanonicalPath() + " data file is not exist!");
return null;
}
CsvReader reader = new CsvReader(filePath, Consts.COA, Charset.forName("UTF-8"));
while (reader.readRecord()) {
data.add(reader.getValues());
}
reader.close();
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
return data;
}
}