All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.quickstart.service.SimpleHiveTestDataCreator Maven / Gradle / Ivy

The newest version!
package com.quickstart.service;

import com.quickstart.dao.JdbcTemplate;
import com.quickstart.dao.JdbcTemplateFactory;
import com.quickstart.model.ColumnMeta;
import com.quickstart.model.JdbcConfig;
import com.quickstart.model.TableMeta;
import lombok.extern.slf4j.Slf4j;

import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

@Slf4j
public class SimpleHiveTestDataCreator {
    private JdbcConfig jdbcConfig;
    private JdbcTemplate jdbcTemplate;


    public SimpleHiveTestDataCreator(JdbcConfig jdbcConfig) {
        this.jdbcConfig = jdbcConfig;
        JdbcTemplateFactory.setConfigs(jdbcConfig);
        jdbcTemplate = JdbcTemplateFactory.getJdbcTemplate(jdbcConfig.getName());
    }

    public void createDBIfNotExist(String schema) throws SQLException {
        jdbcTemplate.createDBIfNotExist(schema);
    }

    public void dropTable(String schema, String tableName) throws SQLException {
        String sql = "drop table " + schema + "." + tableName;
        log.info(sql);
        jdbcTemplate.execute(sql);
    }

    public void createTab(String schema, TableMeta tableMeta) throws SQLException {
        List cols = tableMeta.getColumnMetaList().stream().map(e -> String.format(" %s %s comment '%s'", e.getName(), e.getType(), e.getRemark())).collect(Collectors.toList());

        ColumnMeta partitionCol = tableMeta.getPartitionCol();
        String createSql = String.format("create table %s.%s(%s) partitioned by ( %s %s ) stored as parquet tblproperties ('parquet.compression'='snappy') ",
                schema, tableMeta.getName(),
                String.join(",", cols),
                partitionCol.getName(), partitionCol.getType()
        );
        log.info("createSql={}", createSql);
        jdbcTemplate.execute(createSql);
    }

    public void createTestData(String schema, String tabName, String pkCol, int num, int batch, String partition, String etlDT, boolean reCreatBaseTab, List unCludColList) throws SQLException {

        // 检测hive表是否存在
        // 加载表结构
        TableMeta tabMeta = jdbcTemplate.getTabMeta(schema, tabName);
        if (tabMeta.getName() == null) {
            log.error("can not found tab:{}", tabName);
            return;
        }
        TableMeta baseTestTabMeta = jdbcTemplate.getTabMeta(schema, "base_test");
        if (baseTestTabMeta.getName() == null) {
            reCreatBaseTab = true;
        }
        if (reCreatBaseTab) {
            createBaseTestTab(schema);
        }
        createTestData(schema, tabName, pkCol, num, batch, partition, etlDT, unCludColList);
        // 检测表数据量
        String countSql = String.format("select count(1) from %s.%s where etl_dt = '%s'", schema, tabName, etlDT);
        long count = jdbcTemplate.count(countSql);
        log.info("count:{}", count);
        jdbcTemplate.close();

    }

    public void createBaseTestTab(String schema) throws SQLException {
        log.info("start recreatBaseTab:{}.base_test", schema);
        //加载基表
        List baseTabDatas = new ArrayList<>();
        for (int i = 0; i < 100; i++) {
            baseTabDatas.add(String.format(" select 1 as col_1 "));
        }
        jdbcTemplate.execute(String.format("drop table %s.base_test", schema));
        String baseTabCreatSql = String.format("create table %s.base_test as select col_1 from (%s) temp", schema, String.join("union all", baseTabDatas));
        jdbcTemplate.execute(baseTabCreatSql);
    }

    public void createTestData(String schema, String tabName, String pkCol, int num, int batch, String partition, String etlDT, List unCludColList) throws SQLException {
        List cols = jdbcTemplate.getAllCols(schema, tabName);
        // 拼接制造数据的sql
        String overwriteSqlTemplat = "insert overwrite table %s.%s partition(%s = '%s') select %s from (select %s from %s) T group by %s limit %s";
        List randomColVals = new ArrayList<>(cols.size());
        List selCols = new ArrayList<>(cols.size());

        for (ColumnMeta cm : cols) {
            if (unCludColList.contains(cm.getName())) {
                continue;
            }
            if (cm.getType().equalsIgnoreCase("string")) {
                randomColVals.add(String.format("concat('" + cm.getName() + "_',ceiling(rand()*%s)) as %s", cm.getName().equalsIgnoreCase(pkCol) ? Integer.MAX_VALUE : 100, cm.getName()));
            } else {
                randomColVals.add(String.format("ceiling(rand()*%s) as %s", Integer.MAX_VALUE, cm.getName()));
            }
            selCols.add("max(" + cm.getName() + ")");
        }

        List baseTabs = new ArrayList<>(num);
        for (int i = 0; i < num; i++) {
            String subSql = "";
            if (i > 0) {
                subSql = " join ";
            }
            //增大数据量,以防重复数据导致最后的结果条数不对
            int size = (int) (batch * 1.5);
            subSql = subSql + String.format("(select '' from %s.base_test limit %s) base_test_%s", schema, size, i);
            if (i > 0) {
                subSql += " on 1=1 ";
            }
            baseTabs.add(subSql);
        }
        String overwriteSql = String.format(overwriteSqlTemplat, schema, tabName, partition, etlDT,
                String.join(",", selCols),
                String.join(",", randomColVals),
                String.join("", baseTabs),
                pkCol, num * batch);
        log.info("overwriteSql={}", overwriteSql);
        // 执行sql
        jdbcTemplate.execute(overwriteSql);
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy