All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.data.exploration.biz.stat.Pareto Maven / Gradle / Ivy

package com.datastax.data.exploration.biz.stat;

import com.alibaba.fastjson.JSONObject;
import com.datastax.data.exploration.biz.datatable.DataTable;
import com.datastax.data.exploration.biz.datatable.column.NomialColumn;
import com.datastax.data.exploration.common.File2DataTable;
import com.datastax.data.exploration.util.Consts;
import org.javatuples.Pair;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 * 帕累托图
 */
public class Pareto {

    /**
     * 计算帕累托图,并返回展示的数据
     *
     * @param id     数据集id
     * @param group  分组属性
     * @param val    统计属性
     * @param valSub 分项
     * @return 饼图的展示数据
     */
    public static List operator(String id, String group, String val, String valSub) {
        DataTable table = File2DataTable.exactTable(id);
        List> pairs = sortGroup(table, group, val, valSub);
        long lineGrpCount = 0;
        long lineGrpSubCount = 0;
        List list = new ArrayList<>();
        for (Pair pair : pairs) {
            JSONObject json = new JSONObject();
            long count = pair.getValue0();
            if (Consts.NONE.equals(valSub)) {
                lineGrpCount += count;
                json.put("barVal", new long[]{-1, count});
                json.put("lineVal", new long[]{-1, lineGrpCount});
            } else {
                long barGrpCount = ((NomialColumn) table.getColumns().getColumn(group)).aggCount().get(pair.getValue1());
                ;
                lineGrpSubCount += count;
                lineGrpCount += barGrpCount;
                json.put("barVal", new long[]{count, barGrpCount});
                json.put("lineVal", new long[]{lineGrpSubCount, lineGrpCount});
            }
            json.put("group", pair.getValue1());
            list.add(json);
        }
        return list;
    }

    private static List> sortGroup(DataTable table, String group, String val, String valSub) {
        List> pairs = new ArrayList<>();
        if (Consts.NONE.equals(valSub)) {
            Map aggCount = ((NomialColumn) table.getColumns().getColumn(group)).aggCount();
            aggCount.forEach((k, v) -> pairs.add(new Pair<>(v, k)));
        } else {
            Map aggCount = aggSubCount(table, group, val, valSub);
            aggCount.forEach((k, v) -> {
                if(v != null){
                    pairs.add(new Pair<>(v, k));
                }
            });
        }
        return pairs.stream().sorted((p1, p2) -> p2.getValue0().compareTo(p1.getValue0())).collect(Collectors.toList());
    }

    private static Map aggSubCount(DataTable table, String group, String val, String valueSub) {
        Map map = new HashMap<>();
        table.getRows().aggList(group).forEach((k, v) -> {
            List list = v.parallelStream().map(row -> row.getValue(val)).collect(Collectors.toList());
            map.put((String) k, list.parallelStream().collect(Collectors.groupingBy(s -> s, Collectors.counting())).get(valueSub));
        });
        return map;
    }
}