All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.inria.prophet4j.dataset.DataManager Maven / Gradle / Ivy

package fr.inria.prophet4j.dataset;

import fr.inria.prophet4j.utility.CodeDiffer;
import fr.inria.prophet4j.utility.Structure.FeatureMatrix;
import fr.inria.prophet4j.utility.Structure.Sample;
import fr.inria.prophet4j.utility.Option;
import fr.inria.prophet4j.utility.Option.DataOption;
import fr.inria.prophet4j.utility.Option.PatchOption;
import fr.inria.prophet4j.utility.Support;
import fr.inria.prophet4j.utility.Support.DirType;

import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class DataManager {
    private Option option;

    public DataManager(Option option) {
        this.option = option;
    }

    // avoid handling some cases with diff issues
    // return List of VectorFilePath
    private List getBlacklist() {
        List blacklist = new ArrayList<>();
        switch (option.patchOption) {
            case CARDUMEN:
                break;
            case SPR:
                switch (option.dataOption) {
                    case CARDUMEN:
                        break;
                    case SANER:
                        blacklist.add("ignatov_intellij-erlang10/ErlangFormattingModelBuilder.bin");
                        blacklist.add("ignatov_intellij-erlang14/ErlangFormattingModelBuilder.bin");
                        blacklist.add("JetBrains_kotlin28/JetFormattingModelBuilder.bin");
                        blacklist.add("JetBrains_kotlin14/JetFormattingModelBuilder.bin");
                        break;
                }
                break;
        }
        return blacklist;
    }

    private Map>> loadDataWithPatches(String dataPath) {
        switch (option.dataOption) {
            case CARDUMEN:
                return DataLoader.loadCardumenWithPatches(dataPath, Support.PROPHET4J_DIR + "cardumen_dissection/");
            case SANER:
                break;
            case BEARS:
            case BUG_DOT_JAR_MINUS_MATH:
            case QUIX_BUGS:
                return DataLoader.loadODSWithPatches(dataPath);
        }
        return new HashMap<>();
    }

    private Map> loadDataWithoutPatches(String dataPath) {
        switch (option.dataOption) {
            case CARDUMEN:
                return DataLoader.loadCardumenWithoutPatches(dataPath, Support.PROPHET4J_DIR + "cardumen_dissection/");
            case SANER:
                return DataLoader.loadSANERWithoutPatches(dataPath);
            case BEARS:
            case BUG_DOT_JAR_MINUS_MATH:
            case QUIX_BUGS:
                return DataLoader.loadODSWithoutPatches(dataPath);
            case CLOSURE:
                return DataLoader.loadCLOSUREWithoutPatches(dataPath);
        }
        return new HashMap<>();
    }

    public List run() {
        if (option.dataOption == DataOption.CARDUMEN && option.patchOption == PatchOption.CARDUMEN ||
                option.dataOption == DataOption.BEARS && option.patchOption == PatchOption.BEARS ||
                option.dataOption == DataOption.BUG_DOT_JAR_MINUS_MATH && option.patchOption == PatchOption.BUG_DOT_JAR_MINUS_MATH ||
                option.dataOption == DataOption.QUIX_BUGS && option.patchOption == PatchOption.QUIX_BUGS
        ) {
            return handleByPatches();
        } else {
            return handleByGenerator();
        }
    }

    // buggy files & human patches & generated patches are given
    private List handleByPatches() {
        String dataPath = Support.getFilePath(DirType.DATA_DIR, option);
        String featurePath = Support.getFilePath(DirType.FEATURE_DIR, option);
        List blackList = getBlacklist();

        List filePaths = new ArrayList<>();
        String binFilePath = featurePath + "catalog.bin";
        if (new File(binFilePath).exists()) {
            filePaths = Support.deserialize(binFilePath);
        } else {
            CodeDiffer codeDiffer = new CodeDiffer(false, option);
            Map>> catalogs = loadDataWithPatches(dataPath);
            int progressAll = catalogs.size(), progressNow = 0;
            for (String pathName : catalogs.keySet()) {
                codeDiffer.setPathName(pathName);
                Map> catalog = catalogs.get(pathName);
                for (File oldFile : catalog.keySet()) {
                    try {
                        String tmpFileName = oldFile.getName().replace(".java", ".bin");
                        String vectorPath = pathName + "/" + tmpFileName;
                        System.out.println(vectorPath);
                        if (blackList.contains(vectorPath)) {
                            progressNow += 1;
                            System.out.println("blacklist");
                            continue;
                        }
                        vectorPath = featurePath + vectorPath;
                        File vectorFile = new File(vectorPath);
                        if (!vectorFile.exists()) {
                            List featureMatrices = codeDiffer.runByPatches(oldFile, catalog.get(oldFile));
                            if (featureMatrices.size() == 0) {
                                // diff.commonAncestor() returns null value
                                continue;
                            }
                            new Sample(vectorFile.getPath()).saveFeatureMatrices(featureMatrices);
                        }
                        if (!filePaths.contains(vectorPath)) {
                            filePaths.add(vectorPath);
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                progressNow += 1;
                System.out.println(pathName + " : " + progressNow + " / " + progressAll);
            }
            Support.serialize(binFilePath, filePaths);
        }
        return filePaths;
    }

    // buggy files & human patches are given
    private List handleByGenerator() {
        String dataPath = Support.getFilePath(DirType.DATA_DIR, option);
        String featurePath = Support.getFilePath(DirType.FEATURE_DIR, option);
        List blackList = getBlacklist();

        List filePaths = new ArrayList<>();
        String binFilePath = featurePath + "catalog.bin";
        if (new File(binFilePath).exists()) {
            filePaths = Support.deserialize(binFilePath);
        } else {
            CodeDiffer codeDiffer = new CodeDiffer(true, option);
            Map> catalogs = loadDataWithoutPatches(dataPath);
            int progressAll = catalogs.size(), progressNow = 0;
            for (String pathName : catalogs.keySet()) {
                Map catalog = catalogs.get(pathName);
                for (File oldFile : catalog.keySet()) {
                    try {
                        String tmpFileName = oldFile.getName().replace(".java", ".bin");
                        String vectorPath = pathName + "/" + tmpFileName;
                        System.out.println(vectorPath);
                        if (blackList.contains(vectorPath)) {
                            progressNow += 1;
                            System.out.println("blacklist");
                            continue;
                        }
                        vectorPath = featurePath + vectorPath;
                        File vectorFile = new File(vectorPath);
                        if (!vectorFile.exists()) {
                            List featureMatrices = codeDiffer.runByGenerator(oldFile, catalog.get(oldFile));
                            // we should have more than one FeatureMatrix when CodeDiffer's "byGenerator" is true
                            if (featureMatrices.size() == 0) {
                                continue;
                            }
                            if (featureMatrices.get(0).getFeatureVectors().size() == 0) {
                                // diff.commonAncestor() returns null value
                                continue;
                            }
                            new Sample(vectorFile.getPath()).saveFeatureMatrices(featureMatrices);
                        }
                        if (!filePaths.contains(vectorPath)) {
                            filePaths.add(vectorPath);
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
                progressNow += 1;
                System.out.println(pathName + " : " + progressNow + " / " + progressAll);
            }
            Support.serialize(binFilePath, filePaths);
        }
        return filePaths;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy