All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.datastax.data.exploration.biz.stat.Pareto Maven / Gradle / Ivy
package com.datastax.data.exploration.biz.stat;
import com.alibaba.fastjson.JSONObject;
import com.datastax.data.exploration.biz.datatable.DataTable;
import com.datastax.data.exploration.biz.datatable.column.NomialColumn;
import com.datastax.data.exploration.common.File2DataTable;
import com.datastax.data.exploration.util.Consts;
import org.javatuples.Pair;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* 帕累托图
*/
public class Pareto {
/**
* 计算帕累托图,并返回展示的数据
*
* @param id 数据集id
* @param group 分组属性
* @param val 统计属性
* @param valSub 分项
* @return 饼图的展示数据
*/
public static List operator(String id, String group, String val, String valSub) {
DataTable table = File2DataTable.exactTable(id);
List> pairs = sortGroup(table, group, val, valSub);
long lineGrpCount = 0;
long lineGrpSubCount = 0;
List list = new ArrayList<>();
for (Pair pair : pairs) {
JSONObject json = new JSONObject();
long count = pair.getValue0();
if (Consts.NONE.equals(valSub)) {
lineGrpCount += count;
json.put("barVal", new long[]{-1, count});
json.put("lineVal", new long[]{-1, lineGrpCount});
} else {
long barGrpCount = ((NomialColumn) table.getColumns().getColumn(group)).aggCount().get(pair.getValue1());
;
lineGrpSubCount += count;
lineGrpCount += barGrpCount;
json.put("barVal", new long[]{count, barGrpCount});
json.put("lineVal", new long[]{lineGrpSubCount, lineGrpCount});
}
json.put("group", pair.getValue1());
list.add(json);
}
return list;
}
private static List> sortGroup(DataTable table, String group, String val, String valSub) {
List> pairs = new ArrayList<>();
if (Consts.NONE.equals(valSub)) {
Map aggCount = ((NomialColumn) table.getColumns().getColumn(group)).aggCount();
aggCount.forEach((k, v) -> pairs.add(new Pair<>(v, k)));
} else {
Map aggCount = aggSubCount(table, group, val, valSub);
aggCount.forEach((k, v) -> {
if(v != null){
pairs.add(new Pair<>(v, k));
}
});
}
return pairs.stream().sorted((p1, p2) -> p2.getValue0().compareTo(p1.getValue0())).collect(Collectors.toList());
}
private static Map aggSubCount(DataTable table, String group, String val, String valueSub) {
Map map = new HashMap<>();
table.getRows().aggList(group).forEach((k, v) -> {
List list = v.parallelStream().map(row -> row.getValue(val)).collect(Collectors.toList());
map.put((String) k, list.parallelStream().collect(Collectors.groupingBy(s -> s, Collectors.counting())).get(valueSub));
});
return map;
}
}