com.datastax.data.exploration.biz.stat.Pareto Maven / Gradle / Ivy
The newest version!
package com.datastax.data.exploration.biz.stat;
import com.alibaba.fastjson.JSONObject;
import com.datastax.data.exploration.biz.datatable.DataTable;
import com.datastax.data.exploration.biz.datatable.column.NomialColumn;
import com.datastax.data.exploration.common.File2DataTable;
import com.datastax.data.exploration.util.Consts;
import org.javatuples.Pair;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* 帕累托图
*/
public class Pareto {
/**
* 计算帕累托图,并返回展示的数据
*
* @param id 数据集id
* @param group 分组属性
* @param val 统计属性
* @param valSub 分项
* @return 饼图的展示数据
*/
public static List operator(String id, String group, String val, String valSub) {
DataTable table = File2DataTable.exactTable(id);
List> pairs = sortGroup(table, group, val, valSub);
long lineGrpCount = 0;
long lineGrpSubCount = 0;
List list = new ArrayList<>();
for (Pair pair : pairs) {
JSONObject json = new JSONObject();
long count = pair.getValue0();
if (Consts.NONE.equals(valSub)) {
lineGrpCount += count;
json.put("barVal", new long[]{-1, count});
json.put("lineVal", new long[]{-1, lineGrpCount});
} else {
long barGrpCount = ((NomialColumn) table.getColumns().getColumn(group)).aggCount().get(pair.getValue1());
;
lineGrpSubCount += count;
lineGrpCount += barGrpCount;
json.put("barVal", new long[]{count, barGrpCount});
json.put("lineVal", new long[]{lineGrpSubCount, lineGrpCount});
}
json.put("group", pair.getValue1());
list.add(json);
}
return list;
}
private static List> sortGroup(DataTable table, String group, String val, String valSub) {
List> pairs = new ArrayList<>();
if (Consts.NONE.equals(valSub)) {
Map aggCount = ((NomialColumn) table.getColumns().getColumn(group)).aggCount();
aggCount.forEach((k, v) -> pairs.add(new Pair<>(v, k)));
} else {
Map aggCount = aggSubCount(table, group, val, valSub);
aggCount.forEach((k, v) -> {
if(v != null){
pairs.add(new Pair<>(v, k));
}
});
}
return pairs.stream().sorted((p1, p2) -> p2.getValue0().compareTo(p1.getValue0())).collect(Collectors.toList());
}
private static Map aggSubCount(DataTable table, String group, String val, String valueSub) {
Map map = new HashMap<>();
table.getRows().aggList(group).forEach((k, v) -> {
List