All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.neko233.toolchain.ripplex.RippleX Maven / Gradle / Ivy
package com.neko233.toolchain.ripplex;
import com.neko233.toolchain.common.base.CollectionUtils233;
import com.neko233.toolchain.common.reflect.ReflectUtils233;
import com.neko233.toolchain.ripplex.caculator.Transformer;
import com.neko233.toolchain.ripplex.config.MeasureConfig;
import com.neko233.toolchain.ripplex.constant.AggregateType;
import com.neko233.toolchain.ripplex.orm.Map2InstanceOrm;
import com.neko233.toolchain.ripplex.strategy.merge.MergeStrategy;
import lombok.extern.slf4j.Slf4j;
import java.lang.reflect.Field;
import java.util.*;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
/**
* Dimension/Measure Theory + ETL Thought
*
* @author SolarisNeko on 2022-02-22
**/
@Slf4j
public class RippleX {
private static final String DELIMITER = ",";
// MetaData
private Class> schema;
// data
private List> dataList;
// input
private MeasureConfig measureConfig;
private List groupFieldNameList;
private List excludeFieldNameList;
private List allFieldNameList;
// output
private List aggregateFieldNameList;
// private ArrayList keepColumnNames;
/**
* 2. Calculate Needed Data
*/
private int fieldNameSize = 0;
private String CURRENT_GROUP_BY_KEY = null;
/**
* groupByKey : AggregateDataMap< FieldName: String, value: Object >
*/
private Map> aggregateMapCache = new HashMap<>();
private RippleX() {
}
public static RippleX builder() {
return new RippleX();
}
public RippleX data(List dataList) {
this.dataList = dataList;
return this;
}
/**
* 字段的聚合,使用的操作,Map
*/
public RippleX measureConfig(MeasureConfig measureConfig) {
this.measureConfig = measureConfig;
return this;
}
public RippleX dimensionColumnNames(String... groupColumnNames) {
this.groupFieldNameList = Arrays.asList(groupColumnNames);
return this;
}
public RippleX dimensionColumnNames(List groupColumnNames) {
this.groupFieldNameList = groupColumnNames;
return this;
}
public RippleX excludeColumnNames(String... excludeColumnNames) {
this.excludeFieldNameList = Arrays.asList(excludeColumnNames);
return this;
}
public RippleX excludeColumnNames(List excludeColumnNames) {
this.excludeFieldNameList = excludeColumnNames;
return this;
}
public RippleX returnType(Class> schemaClass) {
this.schema = schemaClass;
return this;
}
/**
* build
*
* @return 构建出分组计算后的 List
*/
public List build() {
checkSchema();
// 1. set config options
boolean isConfigSuccess = rememberAndSetConfigOptions();
if (!isConfigSuccess) {
return new ArrayList<>();
}
List> aggregateDataMapList = getAggregateDataMapList();
// orm
return (List) Map2InstanceOrm.orm(aggregateDataMapList, schema);
}
private List> getAggregateDataMapList() {
List> dataMapList = dataList.stream()
.map(obj -> Transformer.transformObject2Map(obj, groupFieldNameList, aggregateFieldNameList))
.collect(Collectors.toList());
Map>> groupByDimensionValueMap = dataMapList.stream()
.collect(Collectors.groupingBy(map -> {
List valueStringList = new ArrayList<>();
for (String groupColumnName : groupFieldNameList) {
String valueString = String.valueOf(map.get(groupColumnName));
valueStringList.add(valueString);
}
return String.join(DELIMITER, valueStringList);
}));
// TODO 指标滚动计算
return groupByDimensionValueMap.values().stream()
.map(v -> {
Map aggMap = new HashMap<>();
// 必定是同一组
for (Map dataMap : v) {
// 指标名
for (String aggColumnName : aggregateFieldNameList) {
AggregateType aggregateType = measureConfig.getAggregateType(aggColumnName);
if (aggregateType == null) {
continue;
}
// 滚动计算的值
Object aggValue = dataMap.get(aggColumnName);
if (aggValue == null) {
continue;
}
MergeStrategy strategy = MergeStrategy.choose(aggregateType);
BiFunction super Object, ? super Object, ?> mergeBiFunction = strategy
.getMergeBiFunction(aggValue.getClass());
// input -> Merge Algorithm -> output
String outputColumnName = measureConfig.getOutputColumnName(aggColumnName);
if (aggregateType == AggregateType.COUNT) {
// 特殊逻辑, COUNT 会产出 2 个数据, 分别是 col -> col, count(job) -> count
if (aggColumnName.equals(outputColumnName)) {
// keep
aggMap.merge(outputColumnName, 1, mergeBiFunction);
} else {
aggMap.merge(aggColumnName, aggValue, (v1, v2) -> v1);
// count
aggMap.merge(outputColumnName, 1, mergeBiFunction);
}
} else if (aggregateType == AggregateType.KEEP_FIRST) {
aggMap.merge(aggColumnName, aggValue, (v1, v2) -> v1);
} else {
aggMap.merge(outputColumnName, aggValue, mergeBiFunction);
}
}
}
return aggMap;
})
.collect(Collectors.toList());
}
private boolean rememberAndSetConfigOptions() {
List allColumns = ReflectUtils233.getAllFieldsRecursive(schema);
if (CollectionUtils233.isEmpty(allColumns)) {
return false;
}
// 1 all field
allFieldNameList = allColumns.stream().map(Field::getName).collect(Collectors.toList());
fieldNameSize = allFieldNameList.size();
// 2 aggregate fields
aggregateFieldNameList = new ArrayList<>(allFieldNameList);
// TODO 是否保留 group By 信息
// aggColumnNameList.removeAll(groupColumnNames);
aggregateFieldNameList.removeAll(Optional.ofNullable(excludeFieldNameList).orElse(new ArrayList<>()));
// 3
// List keepColumnNames = new ArrayList<>(aClassAllColumnName);
// keepColumnNames.removeAll(aggColumnNameList);
// keepColumnNames.removeAll(Optional.ofNullable(excludeColumnList).orElse(new ArrayList<>()));
// for (String keepColumnName : keepColumnNames) {
// measureConfig.set(keepColumnName, AggregateType.KEEP_FIRST, keepColumnName);
// }
return true;
}
private void checkSchema() {
if ("Object".equals(schema.getSimpleName())) {
throw new RuntimeException("Object can't be a schema because it have no fields.");
}
}
/**
* 尝试获取已经存在的 GroupByMap
*
* @param data data
* @return
*/
private Map getMapFromCacheByObjectValues(Object data) {
List valueStrings = getColumnValueStrList(data, groupFieldNameList);
CURRENT_GROUP_BY_KEY = String.join(DELIMITER, valueStrings);
return aggregateMapCache.get(CURRENT_GROUP_BY_KEY);
}
private List getColumnValueStrList(Object data, List columnList) {
List valueStrings = new ArrayList<>();
for (String col : columnList) {
Object valueByField = ReflectUtils233.getValueByField(data, col);
valueStrings.add(String.valueOf(valueByField));
}
return valueStrings;
}
}