All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.neko233.toolchain.ripplex.RippleX Maven / Gradle / Ivy

package com.neko233.toolchain.ripplex;

import com.neko233.toolchain.common.base.CollectionUtils233;
import com.neko233.toolchain.common.reflect.ReflectUtils233;
import com.neko233.toolchain.ripplex.caculator.Transformer;
import com.neko233.toolchain.ripplex.config.MeasureConfig;
import com.neko233.toolchain.ripplex.constant.AggregateType;
import com.neko233.toolchain.ripplex.orm.Map2InstanceOrm;
import com.neko233.toolchain.ripplex.strategy.merge.MergeStrategy;
import lombok.extern.slf4j.Slf4j;

import java.lang.reflect.Field;
import java.util.*;
import java.util.function.BiFunction;
import java.util.stream.Collectors;

/**
 * Dimension/Measure Theory + ETL Thought
 *
 * @author SolarisNeko on 2022-02-22
 **/
@Slf4j
public class RippleX {

    private static final String DELIMITER = ",";

    // MetaData
    private Class schema;
    // data
    private List dataList;
    // input
    private MeasureConfig measureConfig;
    private List groupFieldNameList;
    private List excludeFieldNameList;
    private List allFieldNameList;
    // output
    private List aggregateFieldNameList;
//    private ArrayList keepColumnNames;

    /**
     * 2. Calculate Needed Data
     */
    private int fieldNameSize = 0;
    private String CURRENT_GROUP_BY_KEY = null;
    /**
     * groupByKey : AggregateDataMap< FieldName: String, value: Object >
     */
    private Map> aggregateMapCache = new HashMap<>();


    private RippleX() {
    }

    public static RippleX builder() {
        return new RippleX();
    }

    public  RippleX data(List dataList) {
        this.dataList = dataList;
        return this;
    }

    /**
     * 字段的聚合,使用的操作,Map
     */
    public RippleX measureConfig(MeasureConfig measureConfig) {
        this.measureConfig = measureConfig;
        return this;
    }

    public RippleX dimensionColumnNames(String... groupColumnNames) {
        this.groupFieldNameList = Arrays.asList(groupColumnNames);
        return this;
    }

    public RippleX dimensionColumnNames(List groupColumnNames) {
        this.groupFieldNameList = groupColumnNames;
        return this;
    }

    public RippleX excludeColumnNames(String... excludeColumnNames) {
        this.excludeFieldNameList = Arrays.asList(excludeColumnNames);
        return this;
    }

    public RippleX excludeColumnNames(List excludeColumnNames) {
        this.excludeFieldNameList = excludeColumnNames;
        return this;
    }

    public RippleX returnType(Class schemaClass) {
        this.schema = schemaClass;
        return this;
    }

    /**
     * build
     *
     * @return 构建出分组计算后的 List
     */
    public  List build() {
        checkSchema();
        // 1. set config options
        boolean isConfigSuccess = rememberAndSetConfigOptions();
        if (!isConfigSuccess) {
            return new ArrayList<>();
        }

        List> aggregateDataMapList = getAggregateDataMapList();
        // orm
        return (List) Map2InstanceOrm.orm(aggregateDataMapList, schema);

    }

    private List> getAggregateDataMapList() {
        List> dataMapList = dataList.stream()
                .map(obj -> Transformer.transformObject2Map(obj, groupFieldNameList, aggregateFieldNameList))
                .collect(Collectors.toList());

        Map>> groupByDimensionValueMap = dataMapList.stream()
                .collect(Collectors.groupingBy(map -> {
                    List valueStringList = new ArrayList<>();
                    for (String groupColumnName : groupFieldNameList) {
                        String valueString = String.valueOf(map.get(groupColumnName));
                        valueStringList.add(valueString);
                    }
                    return String.join(DELIMITER, valueStringList);
                }));

        // TODO 指标滚动计算
        return groupByDimensionValueMap.values().stream()
                .map(v -> {
                    Map aggMap = new HashMap<>();
                    // 必定是同一组
                    for (Map dataMap : v) {
                        // 指标名
                        for (String aggColumnName : aggregateFieldNameList) {
                            AggregateType aggregateType = measureConfig.getAggregateType(aggColumnName);
                            if (aggregateType == null) {
                                continue;
                            }
                            // 滚动计算的值
                            Object aggValue = dataMap.get(aggColumnName);
                            if (aggValue == null) {
                                continue;
                            }
                            MergeStrategy strategy = MergeStrategy.choose(aggregateType);
                            BiFunction mergeBiFunction = strategy
                                    .getMergeBiFunction(aggValue.getClass());

                            // input -> Merge Algorithm -> output
                            String outputColumnName = measureConfig.getOutputColumnName(aggColumnName);
                            if (aggregateType == AggregateType.COUNT) {
                                // 特殊逻辑, COUNT 会产出 2 个数据, 分别是 col -> col, count(job) -> count
                                if (aggColumnName.equals(outputColumnName)) {
                                    // keep
                                    aggMap.merge(outputColumnName, 1, mergeBiFunction);
                                } else {
                                    aggMap.merge(aggColumnName, aggValue, (v1, v2) -> v1);
                                    // count
                                    aggMap.merge(outputColumnName, 1, mergeBiFunction);
                                }
                            } else if (aggregateType == AggregateType.KEEP_FIRST) {
                                aggMap.merge(aggColumnName, aggValue, (v1, v2) -> v1);
                            } else {
                                aggMap.merge(outputColumnName, aggValue, mergeBiFunction);
                            }
                        }
                    }
                    return aggMap;
                })
                .collect(Collectors.toList());
    }

    private boolean rememberAndSetConfigOptions() {
        List allColumns = ReflectUtils233.getAllFieldsRecursive(schema);
        if (CollectionUtils233.isEmpty(allColumns)) {
            return false;
        }

        // 1 all field
        allFieldNameList = allColumns.stream().map(Field::getName).collect(Collectors.toList());
        fieldNameSize = allFieldNameList.size();

        // 2 aggregate fields
        aggregateFieldNameList = new ArrayList<>(allFieldNameList);
        // TODO 是否保留 group By 信息
//        aggColumnNameList.removeAll(groupColumnNames);
        aggregateFieldNameList.removeAll(Optional.ofNullable(excludeFieldNameList).orElse(new ArrayList<>()));

        // 3
//        List keepColumnNames = new ArrayList<>(aClassAllColumnName);
//        keepColumnNames.removeAll(aggColumnNameList);
//        keepColumnNames.removeAll(Optional.ofNullable(excludeColumnList).orElse(new ArrayList<>()));
//        for (String keepColumnName : keepColumnNames) {
//            measureConfig.set(keepColumnName, AggregateType.KEEP_FIRST, keepColumnName);
//        }
        return true;
    }

    private void checkSchema() {
        if ("Object".equals(schema.getSimpleName())) {
            throw new RuntimeException("Object can't be a schema because it have no fields.");
        }
    }

    /**
     * 尝试获取已经存在的 GroupByMap
     *
     * @param data data
     * @return
     */
    private Map getMapFromCacheByObjectValues(Object data) {
        List valueStrings = getColumnValueStrList(data, groupFieldNameList);
        CURRENT_GROUP_BY_KEY = String.join(DELIMITER, valueStrings);
        return aggregateMapCache.get(CURRENT_GROUP_BY_KEY);
    }

    private List getColumnValueStrList(Object data, List columnList) {
        List valueStrings = new ArrayList<>();
        for (String col : columnList) {
            Object valueByField = ReflectUtils233.getValueByField(data, col);
            valueStrings.add(String.valueOf(valueByField));
        }
        return valueStrings;
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy