All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.data.exploration.common.File2DataTable Maven / Gradle / Ivy

package com.datastax.data.exploration.common;


import com.csvreader.CsvReader;
import com.datastax.data.exploration.biz.datatable.DataTable;
import com.datastax.data.exploration.biz.datatable.DataType;
import com.datastax.data.exploration.util.Consts;
import org.javatuples.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

public class File2DataTable {

    private static final Logger logger = LoggerFactory.getLogger(File2DataTable.class);

    /**
     * 读取抽样后的完整数据集
     * @param id 数据集id
     * @return DataTable
     */
    public static DataTable entireTable(String id) {
        return table(id, FileUrl.entireData(id));
    }

    /**
     * 读取抽样后的准确数据集
     * @param id 数据集id
     * @return DataTable
     */
    public static DataTable exactTable(String id) {
        return table(id, FileUrl.exactData(id));
    }

    /**
     * 读取抽样后的错误数据集
     * @param id 数据集id
     * @return DataTable
     */
    public static DataTable errorTable(String id) {
        return table(id, FileUrl.errorData(id));
    }

    /**
     * 读取抽样后的数据集
     * @param id 数据集id
     * @param filePath 数据集路径
     * @return DataTable
     */
    private static DataTable table(String id, String filePath) {
        DataTable table = tableSchema(id);
        if (table == null) return null;
        List data = data(filePath);
        if (data == null) return null;
        //删除表头
        //data.remove(0);
        try {
            for (String[] line : data) {
                table.getRows().add(line);
            }
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
        }
        return table;
    }

    /**
     * 返回数据结构
     * @param id 数据集id
     * @return 返回的是数据集表头
     */
    public static DataTable tableSchema(String id) {
        DataTable table = new DataTable();
        table.setTableName(id);
        Pair headers = DataOperator.readTitleAndType(id);
        if (headers == null) return null;
        String[] titles = headers.getValue0();
        DataType[] dateTypes = headers.getValue1();

        for (int i = 0; i < titles.length; i++) {
            table.getColumns().add(titles[i], dateTypes[i]);
        }
        return table;
    }

    /**
     * 读取数据集数据
     * @param filePath 数据路径
     * @return 数据集数据
     */
    private static List data(String filePath) {
        File file = new File(filePath);
        List data = new ArrayList<>();
        try {
            if (!file.exists()) {
                logger.info(file.getCanonicalPath() + " data file is not exist!");
                return null;
            }
            CsvReader reader = new CsvReader(filePath, Consts.COA, Charset.forName("UTF-8"));
            while (reader.readRecord()) {
                data.add(reader.getValues());
            }
            reader.close();
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
        }
        return data;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy