All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.shef.dcs.sti.TODO.evaluation.DataStats Maven / Gradle / Ivy

The newest version!
package uk.ac.shef.dcs.sti.TODO.evaluation;

import uk.ac.shef.dcs.sti.util.FileUtils;

import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * Created with IntelliJ IDEA.
 * User: zqz
 * Date: 23/02/14
 * Time: 21:45
 * To change this template use File | Settings | File Templates.
 */
public class DataStats {

    public static void main(String[] args) throws IOException {
        /*calculate_average_query_time("D:\\Work\\lodie/lodie.LOG");
        System.exit(0);*/

        /////////////////////////////////////////////// BING SEARCH CONVERGENCE ////////////////////////////////////////////////////////////
        /*calculate_ws_converge("E:\\Data\\table_annotation\\efficiency_analysis_data\\SUBCOL_FINAL_limaye200_convergence.txt",
                "D:\\Work\\lodie\\tmp_result/convergence_ws_limaye200.csv");
        calculate_ws_converge("E:\\Data\\table_annotation\\efficiency_analysis_data\\SUBCOL_FINAL_limayeall_convergence.txt",
                "D:\\Work\\lodie\\tmp_result/convergence_ws_limayeall.csv");
        calculate_ws_converge("E:\\Data\\table_annotation\\efficiency_analysis_data\\SUBCOL_FINAL_musicbrainz_convergence.txt",
                "D:\\Work\\lodie\\tmp_result/convergence_ws_musicbrainz.csv");

        System.exit(0);*/

        ///////////////////////////////////////////////---DONE--- i-inf CONVERGENCE in column interpretation////////////////////////////////////////////////////////////
        /*calculate_converge_column_interpretation(
                "E:\\Data\\table_annotation\\limaye200\\200_tables_regen\\raw",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/random_nostop.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/random_nostop.csv");
        calculate_converge_column_interpretation(
                "E:\\Data\\table_annotation\\limaye200\\200_tables_regen\\raw",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/nonempty.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/nonempty.csv");
        calculate_converge_column_interpretation(
                "E:\\Data\\table_annotation\\limaye200\\200_tables_regen\\raw",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/tokens.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/tokens.csv");
        calculate_converge_column_interpretation(
                "E:\\Data\\table_annotation\\limaye200\\200_tables_regen\\raw",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/namelength.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/namelength.csv");
        calculate_converge_column_interpretation(
                "E:\\Data\\table_annotation\\limaye200\\200_tables_regen\\raw",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/random.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/random.csv");
        calculate_converge_column_interpretation(
                "E:\\Data\\table_annotation\\limaye200\\200_tables_regen\\raw",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/ospd_random.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200/ospd_random.csv");
        System.exit(0);


        calculate_converge_column_interpretation(
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/random.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/random.csv");
        calculate_converge_column_interpretation(
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/nonempty.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/nonempty.csv");
        calculate_converge_column_interpretation(
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/tokens.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/tokens.csv");
        calculate_converge_column_interpretation(
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/namelength.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/namelength.csv");
        calculate_converge_column_interpretation(
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/combined.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/combined.csv");
        calculate_converge_column_interpretation(
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/ospd_random.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_baseline\\limaye200/ospd_random.csv");
        System.exit(0);



        calculate_converge_column_interpretation(
                "E:\\Data\\table_annotation\\limaye_sample\\200_tables_regen",
                "E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limaye200_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/learning_phase_converengece-limaye200-tm.csv");
        calculate_converge_column_interpretation(
                "limaye/all",
                "E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limayeall_tm_sysout.LOG",
                "D:\\Work\\lodie\\tmp_result/learning_phase_converengece-limayeall-tm.csv");
        calculate_converge_column_interpretation(
                "musicbrainz_raw/",
                "E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_mb_tm_sysout.LOG",
                "D:\\Work\\lodie\\tmp_result/learning_phase_converengece-musicbrainz-tm.csv");
        calculate_converge_column_interpretation(
                "imdb_raw/",
                "E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_imdb_tm_sysout.LOG",
                "D:\\Work\\lodie\\tmp_result/learning_phase_converengece-imdb-tm.csv");
        System.exit(0);*/


        /////////////////////////////////////////////// ---done--- Candidate counting (overall, runPreliminaryColumnClassifier-consolidate only, new entity at update, all entity at update) ////////////////////////////////////////////////////////////
        calculate_entity_candidate_savings(
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\random_nostop.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\candidate_savings_random_nostop.csv");
        calculate_entity_candidate_savings(
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\random.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\candidate_savings_random.csv");
        calculate_entity_candidate_savings(
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\ospd_random.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\candidate_savings_ospd_random.csv");
        calculate_entity_candidate_savings(
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\nonempty.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\candidate_savings_nonempty.csv");
        calculate_entity_candidate_savings(
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\tokens.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\candidate_savings_tokens.csv");
        calculate_entity_candidate_savings(
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\namelength.LOG",
                "D:\\Work\\lodie\\tmp_result\\ospd_tm_iswc\\limaye200\\candidate_savings_namelength.csv");



        System.exit(0);


        calculate_entity_candidate_savings(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limaye200_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_overall-limaye200-tm.csv");
        calculate_entity_candidate_savings_update_only(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limaye200_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_learning_consol-limaye200-tm.csv");
        calculate_new_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limaye200_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_new_in_update-limaye200-tm.csv"
        );
        calculate_old_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limaye200_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_old_in_update-limaye200-tm.csv"
        );

        calculate_entity_candidate_savings(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limayeall_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_overall-limayeall-tm.csv");
        calculate_entity_candidate_savings_update_only(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limayeall_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_learning_consol-limayeall-tm.csv");
        calculate_new_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limayeall_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_new_in_update-limayeall-tm.csv"
        );
        calculate_old_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_limayeall_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_old_in_update-limayeall-tm.csv"
        );

        calculate_entity_candidate_savings(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_imdb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_overall-imdb-tm.csv");
        calculate_entity_candidate_savings_update_only(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_imdb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_learning_consol-imdb-tm.csv");
        calculate_new_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_imdb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_new_in_update-imdb-tm.csv"
        );
        calculate_old_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_imdb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_old_in_update-imdb-tm.csv"
        );

        calculate_entity_candidate_savings(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_mb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_overall-musicbrainz-tm.csv");
        calculate_entity_candidate_savings_update_only(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_mb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result\\entity_reduction_learning_consol-musicbrainz-tm.csv");
        calculate_new_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_mb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_new_in_update-musicbrainz-tm.csv"
        );
        calculate_old_entity_candidate_at_update(
                "E:\\Data\\table_annotation\\efficiency_analysis_data\\FINAL_mb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/entity_old_in_update-musicbrainz-tm.csv"
        );
        System.exit(0);


        /////////////////////////////////////////// -----------done ------------- candidate counting baseline //////////////////////////////////
        /*calculate_entity_candidate_savings_baseline("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limaye200_bs_sl_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/candidate_entity_limaye200_bs_sl.csv");
        calculate_entity_candidate_savings_baseline("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limayeall_bs_sl_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/candidate_entity_limayeall_bs_sl.csv");
        calculate_entity_candidate_savings_baseline("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_imdb_bs_sl_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/candidate_entity_imdb_bs_sl.csv");
        calculate_entity_candidate_savings_baseline("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_mb_bs_sl_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/candidate_entity_mb_bs_sl.csv");
        System.exit(0);*/

        /////////////////////////////////////////// UNIQUE ENTITIES processed
        /*calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limaye200_bs_sl_sysout.txt");
        calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limaye200_tm_sysout.txt");*/
        calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limayeall_bs_sl_sysout.txt");
        /*calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limayeall_tm_sysout.txt");
        calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_mb_tm_sysout.txt");
        calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_mb_base_sl_sysout.txt");
        calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_imdb_tm_sysout.txt");
        calculate_entity_candidate_unique_for_calculating_running_time
                ("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_imdb_base_sl_sysout.txt");*/
        System.exit(0);


        /////////////////////////////////////////// ----- done ----- iterations at update
        calculate_iterations_in_update("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limaye200_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/iterations_in_update-limaye200-tm.csv");
        calculate_iterations_in_update("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_limayeall_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/iterations_in_update-limayeall-tm.csv");
        calculate_iterations_in_update("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_imdb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/iterations_in_update-imdb-tm.csv");
        calculate_iterations_in_update("E:\\Data\\table_annotation\\efficiency_analysis_data/FINAL_mb_tm_sysout.txt",
                "D:\\Work\\lodie\\tmp_result/iterations_in_update-musicbrainz-tm.csv");

        System.exit(0);


    }

    private static void count_NE_cells(String in_system_out_file) throws IOException {
        List lines = FileUtils.readList(in_system_out_file, false);
        boolean stopCounting = false;

        int count = 0;

        int currentCol = -1;
        int totalCol = 0;
        int maxRow = 0;
        for (String l : lines) {

            if (l.contains("_E:\\")) {
                //System.out.println(l);
                System.out.println(totalCol + "," + maxRow + "," + totalCol * maxRow);

                currentCol = -1;
                totalCol = 0;
                maxRow = 0;
                //consolidate numbers
            }

            if (l.contains(">> Column=")) {
                String col = l.split("=")[1].trim();
                int c = Integer.valueOf(col) + 1;
                if (c != currentCol)
                    totalCol++;
            }
            if (l.contains(" position at ")) {
                int start_pos = l.indexOf("(");
                String nl = l.substring(start_pos + 1);
                int row = Integer.valueOf(nl.split(",")[0].trim());
                if (row > maxRow)
                    maxRow = row;
            }

            if (l.contains("UPDATE ITERATION "))
                stopCounting = true;
            if (l.contains("UPDATE STABLIZED AFTER"))
                stopCounting = false;

            if (!stopCounting) {
                if (l.contains(" position at ("))
                    count++;
            }
        }
        System.out.println(totalCol + "," + maxRow + "," + totalCol * maxRow);
        System.out.println(count);
    }

    private static void calculate_new_entity_candidate_at_update(String tableminer_convergence_log,
                                                                 String outfile) throws IOException {
        PrintWriter p = new PrintWriter(outfile);
        p.println("Initial, Reduced to");

        List lines = FileUtils.readList(tableminer_convergence_log, false);

        boolean count = false;

        int initial = 0, reduced = 0;
        for (String l : lines) {
            if (l.contains("BACKWARD UPDATE...")) {
                count = true;
                continue;
            }
            if (l.contains("FORWARD LEARNING")) {
                count = false;
                continue;
            }

            if (count && l.contains("(QUERY_KB:")) {
                String[] parts = l.trim().split(":");
                String value = parts[1].trim();
                String[] vs = value.split("\\s+");
                initial = Integer.valueOf(vs[0].trim());
                reduced = Integer.valueOf(vs[2].trim());
                if (reduced == 0) {
                    initial = 0;
                }

                continue;
            }

            if (count && l.contains("ALREADY BUILT FOR")) {
                String[] parts = l.trim().split("=");
                int already = Integer.valueOf(parts[1].trim());
                int todo = reduced - already;
                if (todo != 0) {
                    p.println(todo);
                }
                initial = 0;
                reduced = 0;
            }
        }


        p.close();
    }

    private static void calculate_old_entity_candidate_at_update(String tableminer_convergence_log,
                                                                 String outfile) throws IOException {
        PrintWriter p = new PrintWriter(outfile);
        p.println("Initial, Reduced to");

        List lines = FileUtils.readList(tableminer_convergence_log, false);

        boolean count = false;

        int initial = 0, reduced = 0;
        for (String l : lines) {
            if (l.contains("BACKWARD UPDATE...")) {
                count = true;
                continue;
            }
            if (l.contains("FORWARD LEARNING")) {
                count = false;
                continue;
            }

            if (count && l.contains("(QUERY_KB:")) {
                String[] parts = l.trim().split(":");
                String value = parts[1].trim();
                String[] vs = value.split("\\s+");
                initial = Integer.valueOf(vs[0].trim());
                reduced = Integer.valueOf(vs[2].trim());
                if (reduced == 0) {
                    continue;
                }
                p.println(reduced);
                continue;
            }
        }


        p.close();
    }

    private static void calculate_entity_candidate_savings(String tableminer_convergence_log,
                                                           String outfile) throws IOException {
        PrintWriter p = new PrintWriter(outfile);
        p.println("Initial, Reduced to");

        List lines = FileUtils.readList(tableminer_convergence_log, false);

        boolean count = false;
        int countline=-1;
        for (String l : lines) {
            countline++;
            if (l.contains("BACKWARD UPDATE...")) {
                count = false;
                continue;
            }
            if (l.contains("FORWARD LEARNING")) {
                count = true;
                continue;
            }

            if (count && l.contains("(QUERY_KB:")) {
                String[] parts = l.trim().split(":");
                String value = parts[1].trim();
                String[] vs = value.split("\\s+");
                String initial = vs[0].trim();
                String reduced = vs[2].trim();
                if(reduced.contains("|"))
                    reduced=reduced.split("\\|",2)[0].trim();
                if (reduced.equals("0"))
                    continue;

                String nextline = lines.get(countline+1);
                int repeat=1;
                if(nextline.contains(">> Disambiguation")&&nextline.contains("position at")){
                    int start= nextline.indexOf("([")+2;
                    int end = nextline.indexOf("],");
                    nextline=nextline.substring(start,end).trim();
                    repeat=nextline.split(",").length;
                }

               // for(int i=0; i lines = FileUtils.readList(tableminer_convergence_log, false);


        for (String l : lines) {


            if (l.contains("(QUERY_KB:")) {
                String[] parts = l.trim().split(":");
                String value = parts[1].trim();
                String[] vs = value.split("\\s+");
                String initial = vs[0].trim();
                String reduced = vs[2].trim();
                if (reduced.equals("0"))
                    continue;

                p.println(initial + "," + reduced);
            }
        }


        p.close();
    }


    private static void calculate_entity_candidate_unique_for_calculating_running_time(String tableminer_convergence_log) throws IOException {

        List lines = FileUtils.readList(tableminer_convergence_log, false);

        Set all = new HashSet();
        for (String l : lines) {


            if (l.contains("(QUERY_KB:")) {
                String[] parts = l.trim().split(":");
                String value = parts[1].trim();
                String[] vs = value.split("\\s+");
                String initial = vs[0].trim();
                String reduced = vs[2].trim();
                if (reduced.equals("0"))
                    continue;

                if (reduced.contains("|")) {
                    try {
                        String ids = reduced.split("\\|")[1].trim();
                        for (String id : ids.split(",")) {
                            if (id.trim().length() > 0)
                                all.add(id.trim());
                        }
                    } catch (Exception e) {
                        continue;
                    }
                }
                //p.println(initial + "," + reduced);
            }
        }
        System.out.println(all.size());
    }

    private static void calculate_entity_candidate_savings_update_only(String tableminer_convergence_log,
                                                                       String outfile) throws IOException {
        PrintWriter p = new PrintWriter(outfile);
        p.println("Initial, Reduced to");

        List lines = FileUtils.readList(tableminer_convergence_log, false);

        boolean count = false;
        for (String l : lines) {
            if (l.contains("BACKWARD UPDATE...")) {
                count = false;
                continue;
            }
            if (l.contains("LEARN (Consolidate) begins")) {
                count = true;
                continue;
            }

            if (count && l.contains("(QUERY_KB:")) {
                String[] parts = l.trim().split(":");
                String value = parts[1].trim();
                String[] vs = value.split("\\s+");
                String initial = vs[0].trim();
                String reduced = vs[2].trim();
                if (reduced.equals("0"))
                    continue;

                p.println(initial + "," + reduced);
            }
        }


        p.close();
    }

    private static void calculate_average_query_time(String logfile) throws IOException {
        List lines = FileUtils.readList(logfile, false);
        long totalTime = 0;
        int countQueries = 0;
        for(String l: lines){
            if(l.contains("QueryFreebase")){
                int start =l.lastIndexOf(":")+1;
                long time = Long.valueOf(l.substring(start).trim());
                totalTime=totalTime+time;
                countQueries++;
            }
        }
        System.out.println(totalTime+","+countQueries);
    }

    private static void calculate_iterations_in_update(String tableminer_convergence_log,
                                                       String outfile) throws IOException {
        PrintWriter p = new PrintWriter(outfile);

        List lines = FileUtils.readList(tableminer_convergence_log, false);

        boolean count = false;
        for (String l : lines) {
            if (l.contains("UPDATE STABLIZED AFTER")) {
                String[] tokens = l.trim().split("\\s+");
                int index = 0;
                for (int i = 0; i < tokens.length; i++) {
                    if (tokens[i].equals("AFTER"))
                        index = i + 1;
                }
                String itr = tokens[index].trim();
                p.println(Integer.valueOf(itr));
            }
        }
        p.close();

    }

    private static void calculate_converge_column_interpretation
            (String dataset,
             String tableminer_convergence_log,
             String outfile) throws IOException {
        PrintWriter p = new PrintWriter(outfile);
        p.println("convergence, max, savings");

        List lines = FileUtils.readList(tableminer_convergence_log, false);
        boolean started = false;
        for (String l : lines) {
            if (l.contains(dataset)) {
                //new file
                started = false;
                continue;
            }
            if (l.contains("FORWARD LEARNING...")) {
                started = true;
                continue;
            }

            if (started) {
                if (l.contains("Convergence iteration")) {
                    String[] parts = l.trim().split(",");
                    String append = "";
                    for (String value : parts) {
                        value = value.trim();
                        if (value.length() == 0 || value.indexOf("=") == -1) continue;
                        String split = value.split("=")[1];
                        append += split + ",";
                    }
                    p.println(append);
                }
            }
        }


        p.close();
    }

    public static int calculate_total_queries_issued_baseline(String sysoutFile) throws IOException {
        int sum = 0;
        List lines = FileUtils.readList(sysoutFile, false);
        for (String l : lines) {
            if (l.contains("(QUERY_KB:")) {
                sum++;
                int index = l.indexOf("=>");
                l = l.substring(index + 2).trim();
                int candidates = Integer.valueOf(l);
                sum = sum + candidates;
            }
        }
        System.out.println(sum);
        return sum;
    }

    public static int calculate_total_queries_issued_tableminer(String sysoutFile) throws IOException {
        int sum = 0;
        List lines = FileUtils.readList(sysoutFile, false);

        boolean count = false;
        for (String l : lines) {
            if (l.contains("FORWARD LEARNING")) {
                count = true;
                continue;
            }

            if (count && l.contains("(QUERY_KB:")) {
                sum++;
                int index = l.indexOf("=>");
                l = l.substring(index + 2).trim();
                int candidates = Integer.valueOf(l);
                sum = sum + candidates;
            }
        }
        System.out.println(sum);
        return sum;
    }


    //convergence time
    public static void calculate_ws_converge(String logFile, String outFile) throws IOException {
        PrintWriter p = new PrintWriter(outFile);
        p.println("convergence, max, savings");
        List lines = FileUtils.readList(logFile, false);
        for (String l : lines) {
            if (l.contains("Convergence iteration")) {
                String[] parts = l.trim().split(",");
                String append = "";
                for (String value : parts) {
                    value = value.trim();
                    if (value.length() == 0 || value.indexOf("=") == -1) continue;
                    String split = value.split("=")[1];
                    append += split + ",";
                }
                p.println(append);
            }
        }
        p.close();
    }

    /*  public static void calcuate_update_stats_until_convergence(String logFile, String outputFile) throws IOException {
        PrintWriter p = new PrintWriter(outputFile);
        List lines = FileUtils.readList(logFile, false);

        String text_to_search = "UPDATE STABLIZED AFTER ";
        for (String l : lines) {
            if (l.contains(text_to_search)) {
                int start = l.indexOf(text_to_search) + text_to_search.length();
                l = l.substring(start).trim();
                String iterations = l.split("\\s+")[0].trim();
                p.println(iterations);
            }
        }
        p.close();


    }*/


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy