All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.inria.prophet4j.utility.CodeDiffer Maven / Gradle / Ivy

The newest version!
package fr.inria.prophet4j.utility;

import com.google.gson.JsonElement;
import com.google.gson.JsonPrimitive;
import fr.inria.coming.codefeatures.Cntx;
import fr.inria.coming.codefeatures.CodeFeatureDetector;
import fr.inria.coming.codefeatures.FeatureAnalyzer;
import fr.inria.prophet4j.feature.Feature;
import fr.inria.prophet4j.feature.FeatureCross;
import fr.inria.prophet4j.feature.FeatureExtractor;
import fr.inria.prophet4j.feature.RepairGenerator;
import fr.inria.prophet4j.feature.S4R.S4RFeature;
import fr.inria.prophet4j.feature.S4R.S4RFeatureCross;
import fr.inria.prophet4j.feature.S4RO.S4ROFeature;
import fr.inria.prophet4j.feature.S4RO.S4ROFeatureCross;
import fr.inria.prophet4j.feature.S4RO.S4ROFeatureExtractor;
import fr.inria.prophet4j.feature.S4RO.S4RORepairGenerator;
import fr.inria.prophet4j.utility.Structure.DiffType;
import fr.inria.prophet4j.utility.Structure.FeatureMatrix;
import fr.inria.prophet4j.utility.Structure.FeatureVector;
import fr.inria.prophet4j.utility.Structure.DiffEntry;
import fr.inria.prophet4j.utility.Structure.Repair;
import fr.inria.prophet4j.feature.enhanced.EnhancedFeatureExtractor;
import fr.inria.prophet4j.feature.enhanced.EnhancedRepairGenerator;
import fr.inria.prophet4j.feature.extended.ExtendedFeatureExtractor;
import fr.inria.prophet4j.feature.extended.ExtendedRepairGenerator;
import fr.inria.prophet4j.feature.original.OriginalFeatureExtractor;
import fr.inria.prophet4j.feature.original.OriginalRepairGenerator;
import fr.inria.prophet4j.utility.Option.FeatureOption;
import gumtree.spoon.AstComparator;
import gumtree.spoon.diff.Diff;
import gumtree.spoon.diff.operations.*;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import spoon.reflect.declaration.CtClass;
import spoon.reflect.declaration.CtElement;
import spoon.reflect.declaration.CtMethod;

import java.io.File;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

// based on pdiffer.cpp, ASTDiffer.cpp
public class CodeDiffer {

    private boolean byGenerator;
    private Option option;
    private String pathName;
    private static final Logger logger = LogManager.getLogger(CodeDiffer.class.getName());

    public CodeDiffer(boolean byGenerator, Option option) {
        this.byGenerator = byGenerator;
        this.option = option;
        this.pathName = "";
    }

    public void setPathName(String pathName) {
        this.pathName = pathName;
    }

    private FeatureExtractor newFeatureExtractor() {
        FeatureExtractor featureExtractor = null;
        switch (option.featureOption) {
            case ENHANCED:
                featureExtractor = new EnhancedFeatureExtractor();
                break;
            case EXTENDED:
                featureExtractor = new ExtendedFeatureExtractor();
                break;
            case ORIGINAL:
                featureExtractor = new OriginalFeatureExtractor();
                break;
            case S4R:
                logger.warn("S4R should not call newFeatureExtractor");
                break;
            case S4RO:
                featureExtractor = new S4ROFeatureExtractor();
                break;
        }
        return featureExtractor;
    }

    private RepairGenerator newRepairGenerator(DiffEntry diffEntry) {
        RepairGenerator repairGenerator = null;
        switch (option.featureOption) {
            case ENHANCED:
                repairGenerator = new EnhancedRepairGenerator(diffEntry);
                break;
            case EXTENDED:
                repairGenerator = new ExtendedRepairGenerator(diffEntry);
                break;
            case ORIGINAL:
                repairGenerator = new OriginalRepairGenerator(diffEntry);
                break;
            case S4R:
                logger.warn("S4R should not call newRepairGenerator");
                break;
            case S4RO:
                repairGenerator = new S4RORepairGenerator(diffEntry);
                break;
        }
        return repairGenerator;
    }

    private List genDiffEntries(Diff diff) throws IndexOutOfBoundsException {
        List diffEntries = new ArrayList<>();
        List operations = diff.getRootOperations();
        Map deleteOperations = new HashMap<>();
        Map insertOperations = new HashMap<>();
        // tmp wrapper for gumtree-spoon-ast-diff
        // may be affected by future versions of gumtree-spoon-ast-diff
        for (Operation operation : operations) {
            Pattern pattern = Pattern.compile(":(\\d+)");
            Matcher matcher = pattern.matcher(operation.toString());
            if (operation instanceof DeleteOperation) {
                if (matcher.find()) {
                    deleteOperations.put(Integer.valueOf(matcher.group(1)), operation);
                }
            } else if (operation instanceof InsertOperation) {
                if (matcher.find()) {
                    insertOperations.put(Integer.valueOf(matcher.group(1)), operation);
                }
            } else if (operation instanceof MoveOperation) {
                if (matcher.find()) {
                    deleteOperations.put(Integer.valueOf(matcher.group(1)), operation);
                }
                if (matcher.find()) {
                    insertOperations.put(Integer.valueOf(matcher.group(1)), operation);
                }
            } else if (operation instanceof UpdateOperation) {
                if (matcher.find()) {
                    deleteOperations.put(Integer.valueOf(matcher.group(1)), operation);
                    insertOperations.put(Integer.valueOf(matcher.group(1)), operation);
                }
            }
        }
        Set lineNums = new HashSet<>();
        lineNums.addAll(deleteOperations.keySet());
        lineNums.addAll(insertOperations.keySet());
        for (Integer lineNum : lineNums) {
            Operation deleteOperation = deleteOperations.get(lineNum);
            Operation insertOperation = insertOperations.get(lineNum);

            DiffType type = null;
            CtElement srcNode = null;
            CtElement dstNode = null;
            if (deleteOperation != null && insertOperation != null) {
                type = DiffType.UpdateType;
                srcNode = deleteOperation.getSrcNode(); // ...
                dstNode = insertOperation.getDstNode(); // ...
                if (insertOperation instanceof InsertOperation) {
                    dstNode = insertOperation.getSrcNode(); // ...
                }
            } else if (deleteOperation != null) {
                type = DiffType.DeleteType;
                srcNode = deleteOperation.getSrcNode(); // ...
                dstNode = deleteOperation.getDstNode(); // null
                if (srcNode == null) srcNode = dstNode;
                if (dstNode == null) dstNode = srcNode;
            } else if (insertOperation != null) {
                type = DiffType.InsertType;
                srcNode = insertOperation.getSrcNode(); // ...
                dstNode = insertOperation.getDstNode(); // null
                if (srcNode == null) srcNode = dstNode;
                if (dstNode == null) dstNode = srcNode;
            }
            // distinguish functionality changes from revision changes
            if (srcNode instanceof CtClass || srcNode instanceof CtMethod ||
                    dstNode instanceof CtClass || dstNode instanceof CtMethod) {
                continue;
            }
            diffEntries.add(new DiffEntry(type, srcNode, dstNode));
        }
        return diffEntries;
    }

    // this is only for compatible with S4R, therefore we do not handle MoveOperation
    private List genDiffEntry(Operation operation) throws IndexOutOfBoundsException {
        List diffEntries = new ArrayList<>();
        Map deleteOperations = new HashMap<>();
        Map insertOperations = new HashMap<>();
        // tmp wrapper for gumtree-spoon-ast-diff
        // may be affected by future versions of gumtree-spoon-ast-diff
        Pattern pattern = Pattern.compile(":(\\d+)");
        Matcher matcher = pattern.matcher(operation.toString());
        if (operation instanceof DeleteOperation) {
            if (matcher.find()) {
                deleteOperations.put(Integer.valueOf(matcher.group(1)), operation);
            }
        } else if (operation instanceof InsertOperation) {
            if (matcher.find()) {
                insertOperations.put(Integer.valueOf(matcher.group(1)), operation);
            }
        } else if (operation instanceof MoveOperation) {
            if (matcher.find()) {
                deleteOperations.put(Integer.valueOf(matcher.group(1)), operation);
            }
            if (matcher.find()) {
                insertOperations.put(Integer.valueOf(matcher.group(1)), operation);
            }
        } else if (operation instanceof UpdateOperation) {
            if (matcher.find()) {
                deleteOperations.put(Integer.valueOf(matcher.group(1)), operation);
                insertOperations.put(Integer.valueOf(matcher.group(1)), operation);
            }
        }

        Set lineNums = new HashSet<>();
        lineNums.addAll(deleteOperations.keySet());
        lineNums.addAll(insertOperations.keySet());
        for (Integer lineNum : lineNums) {
            Operation deleteOperation = deleteOperations.get(lineNum);
            Operation insertOperation = insertOperations.get(lineNum);

            DiffType type = null;
            CtElement srcNode = null;
            CtElement dstNode = null;
            if (deleteOperation != null && insertOperation != null) {
                type = DiffType.UpdateType;
                srcNode = deleteOperation.getSrcNode(); // ...
                dstNode = insertOperation.getDstNode(); // ...
                if (insertOperation instanceof InsertOperation) {
                    dstNode = insertOperation.getSrcNode(); // ...
                }
            } else if (deleteOperation != null) {
                type = DiffType.DeleteType;
                srcNode = deleteOperation.getSrcNode(); // ...
                dstNode = deleteOperation.getDstNode(); // null
                if (srcNode == null) srcNode = dstNode;
                if (dstNode == null) dstNode = srcNode;
            } else if (insertOperation != null) {
                type = DiffType.InsertType;
                srcNode = insertOperation.getSrcNode(); // ...
                dstNode = insertOperation.getDstNode(); // null
                if (srcNode == null) srcNode = dstNode;
                if (dstNode == null) dstNode = srcNode;
            }
            // distinguish functionality changes from revision changes
            if (srcNode instanceof CtClass || srcNode instanceof CtMethod ||
                    dstNode instanceof CtClass || dstNode instanceof CtMethod) {
                continue;
            }
            diffEntries.add(new DiffEntry(type, srcNode, dstNode));
        }
        return diffEntries;
    }

    // size == 1 if option.featureOption == FeatureOption.S4R or byGenerator = false
    private List genFeatureMatrices(Diff diff, String fileKey) {
        List featureMatrices = new ArrayList<>();
        // used for the case of SKETCH4REPAIR
        FeatureAnalyzer featureAnalyzer = new FeatureAnalyzer();
        CodeFeatureDetector cresolver = new CodeFeatureDetector();
        try {
            if (option.featureOption == FeatureOption.S4R) {
                // based on L152-186 at FeatureAnalyzer.java
                JsonObject file = new JsonObject();
                try {
                    JsonArray changesArray = new JsonArray();
                    file.add("features", changesArray);
                    List ops = diff.getRootOperations();
                    for (Operation operation : ops) {
                        try {
                            CtElement affectedCtElement = featureAnalyzer.getLeftElement(operation);
                            if (affectedCtElement != null) {
                                Cntx iContext = cresolver.analyzeFeatures(affectedCtElement);
                                changesArray.add(iContext.toJSON());
                            }
                        } catch (Exception e) {
//                            e.printStackTrace();
                        }
                    }
                } catch (Throwable e) {
                    e.printStackTrace();
                }
                // based on L61-79 at FeaturesOnD4jTest.java
                JsonElement elAST = file.get("features");
//        			assertNotNull(elAST);
//		        	assertTrue(elAST instanceof JsonArray);
                JsonArray featuresOperationList = (JsonArray) elAST;
//			        assertTrue(featuresOperationList.size() > 0);
                List featureVectors = new ArrayList<>();
                for (JsonElement featuresOfOperation : featuresOperationList) {
                    // the first one in newFiles is human patch
                    FeatureVector featureVector = new FeatureVector();
                    JsonObject jso = featuresOfOperation.getAsJsonObject();
                    for (S4RFeature.CodeFeature codeFeature : S4RFeature.CodeFeature.values()) {
                        JsonElement property = jso.get(codeFeature.toString());
                        if (property != null) {
                            try {
                                JsonPrimitive value = property.getAsJsonPrimitive();
                                String str = value.getAsString();

                                if (str.equalsIgnoreCase("true")) {
                                    // handle boolean-form features
                                    List features = new ArrayList<>();
                                    features.add(codeFeature);
                                    FeatureCross featureCross = new S4RFeatureCross(S4RFeature.CrossType.CF_CT, features, 1.0);
                                    featureVector.addFeatureCross(featureCross);
                                } else if (str.equalsIgnoreCase("false")) {
                                    // handle boolean-form features
                                    List features = new ArrayList<>();
                                    features.add(codeFeature);
                                    FeatureCross featureCross = new S4RFeatureCross(S4RFeature.CrossType.CF_CT, features, 0.0);
                                    featureVector.addFeatureCross(featureCross);
                                } else {
                                    // handle numerical-form features
                                    try {
                                        double degree = Double.parseDouble(value.getAsString());
                                        List features = new ArrayList<>();
                                        features.add(codeFeature);
                                        FeatureCross featureCross = new S4RFeatureCross(S4RFeature.CrossType.CF_CT, features, degree);
                                        featureVector.addFeatureCross(featureCross);
                                    } catch (Exception e) {
//                                        e.printStackTrace();
                                    }
                                }
                            } catch (IllegalStateException e) {
//                                logger.error("Not a JSON Primitive");
                            }
                        }
                    }
                    featureVectors.add(featureVector);
                }
                featureMatrices.add(new FeatureMatrix(true, fileKey, featureVectors));
            } else if (option.featureOption == FeatureOption.S4RO) {
                FeatureExtractor featureExtractor = newFeatureExtractor();
                List featureVectors = new ArrayList<>();
                // based on L152-186 at FeatureAnalyzer.java
                JsonObject file = new JsonObject();
                try {
                    JsonArray changesArray = new JsonArray();
                    file.add("features", changesArray);
                    List ops = diff.getRootOperations();
                    for (Operation operation : ops) {
                        try {
                            CtElement affectedCtElement = featureAnalyzer.getLeftElement(operation);
                            if (affectedCtElement != null) {
                                Cntx iContext = cresolver.analyzeFeatures(affectedCtElement);
                                changesArray.add(iContext.toJSON());

                                // here we merge two feature-vectors of one MoveOperation
                                FeatureVector featureVector = new FeatureVector();
                                for (DiffEntry diffEntry : genDiffEntry(operation)) {
                                    // generate P4J featureVectors beforehand
                                    RepairGenerator generator = newRepairGenerator(diffEntry);
                                    Repair repair = generator.obtainHumanRepair();
                                    for (CtElement atom : repair.getCandidateAtoms()) {
                                        featureVector.merge(featureExtractor.extractFeature(repair, atom));
                                    }
                                }
                                featureVectors.add(featureVector);
                            }
                        } catch (Exception e) {
//                            e.printStackTrace();
                        }
                    }
                } catch (Throwable e) {
                    e.printStackTrace();
                }
                // based on L61-79 at FeaturesOnD4jTest.java
                JsonElement elAST = file.get("features");
//        			assertNotNull(elAST);
//		        	assertTrue(elAST instanceof JsonArray);
                JsonArray featuresOperationList = (JsonArray) elAST;
//			        assertTrue(featuresOperationList.size() > 0);
                int index = 0;
                for (JsonElement featuresOfOperation : featuresOperationList) {
                    // the first one in newFiles is human patch
                    FeatureVector featureVector = featureVectors.get(index);
                    index += 1;
                    JsonObject jso = featuresOfOperation.getAsJsonObject();
                    for (S4ROFeature.CodeFeature codeFeature : S4ROFeature.CodeFeature.values()) {
                        JsonElement property = jso.get(codeFeature.toString());
                        if (property != null) {
                            try {
                                JsonPrimitive value = property.getAsJsonPrimitive();
                                String str = value.getAsString();

                                if (str.equalsIgnoreCase("true")) {
                                    // handle boolean-form features
                                    List features = new ArrayList<>();
                                    features.add(codeFeature);
                                    FeatureCross featureCross = new S4ROFeatureCross(S4ROFeature.CrossType.CF_CT, features, 1.0);
                                    featureVector.addFeatureCross(featureCross);
                                } else if (str.equalsIgnoreCase("false")) {
                                    // handle boolean-form features
                                    List features = new ArrayList<>();
                                    features.add(codeFeature);
                                    FeatureCross featureCross = new S4ROFeatureCross(S4ROFeature.CrossType.CF_CT, features, 0.0);
                                    featureVector.addFeatureCross(featureCross);
                                } else {
                                    // handle numerical-form features
                                    try {
                                        double degree = Double.parseDouble(value.getAsString());
                                        List features = new ArrayList<>();
                                        features.add(codeFeature);
                                        FeatureCross featureCross = new S4ROFeatureCross(S4ROFeature.CrossType.CF_CT, features, degree);
                                        featureVector.addFeatureCross(featureCross);
                                    } catch (Exception e) {
//                                        e.printStackTrace();
                                    }
                                }
                            } catch (IllegalStateException e) {
//                                logger.error("Not a JSON Primitive");
                            }
                        }
                    }
//                    featureVectors.add(featureVector);
                }
                featureMatrices.add(new FeatureMatrix(true, fileKey, featureVectors));
            } else {
                // RepairGenerator receive diffEntry as parameter, so we do not need ErrorLocalizer
                {
                    FeatureExtractor featureExtractor = newFeatureExtractor();
                    List featureVectors = new ArrayList<>();
                    for (DiffEntry diffEntry : genDiffEntries(diff)) {
                        RepairGenerator generator = newRepairGenerator(diffEntry);
                        {
                            Repair repair = generator.obtainHumanRepair();
                            FeatureVector featureVector = new FeatureVector();
                            for (CtElement atom : repair.getCandidateAtoms()) {
                                featureVector.merge(featureExtractor.extractFeature(repair, atom));
                            }
                            featureVectors.add(featureVector);
                        }
                    }
                    featureMatrices.add(new FeatureMatrix(true, fileKey, featureVectors));
                }
                if (byGenerator) {
                    // only in this case, featureMatrices.size() > 1
                    // we only consider this case where each SPR repair owns one diffEntry
                    // as we learn by comparing feature-vectors and evaluate by feature-matrix scores
                    // also we do not need to consider the potential issue of combinatorial explosion
                    FeatureExtractor featureExtractor = newFeatureExtractor();
                    for (DiffEntry diffEntry : genDiffEntries(diff)) {
                        RepairGenerator generator = newRepairGenerator(diffEntry);
                        for (Repair repair : generator.obtainRepairCandidates()) {
                            for (CtElement atom : repair.getCandidateAtoms()) {
                                List featureVectors = new ArrayList<>();
                                featureVectors.add(featureExtractor.extractFeature(repair, atom));
                                featureMatrices.add(new FeatureMatrix(false, fileKey, featureVectors));
                            }
//                            List featureVectors = new ArrayList<>();
//                            FeatureVector featureVector = new FeatureVector();
//                            for (CtElement atom : repair.getCandidateAtoms()) {
//                                featureVector.merge(featureExtractor.extractFeature(repair, atom));
//                            }
//                            featureVectors.add(featureVector);
//                            featureMatrices.add(new FeatureMatrix(false, fileKey, featureVectors));
                        }
                    }
                }
            }
        } catch (IndexOutOfBoundsException e) {
            e.printStackTrace();
            logger.log(Level.WARN, "diff.commonAncestor() returns null value");
        }
        return featureMatrices;
    }

    // for DataLoader, we do not need to obtainRepairCandidates as they are given
    // byGenerator = false as long as this func gets called
    public List runByPatches(File oldFile, List newFiles) {
        List featureMatrices = new ArrayList<>();
        for (File newFile : newFiles) {
            featureMatrices.addAll(runByGenerator(oldFile, newFile));
        }
        // correct the prop "marked" for all featureMatrices except for the first one
        for (int idx = 1; idx < featureMatrices.size(); idx++) {
            featureMatrices.get(idx).correctMarked();
        }
        return featureMatrices;
    }

    public List runByGenerator(File oldFile, File newFile) {
        List featureMatrices = new ArrayList<>();
        try {
            AstComparator comparator = new AstComparator();
            Diff diff = comparator.compare(oldFile, newFile);
            String filePath = newFile.getPath();
            int leftIndex = filePath.indexOf(pathName) + pathName.length();
            int rightIndex = filePath.lastIndexOf("/");
            String fileKey = filePath.substring(leftIndex + 1, rightIndex);
            fileKey = fileKey.replace("/", "-");
            featureMatrices.addAll(genFeatureMatrices(diff, fileKey));
        } catch (Exception e) {
            e.printStackTrace();
        }
        return featureMatrices;
    }

    // for FeatureExtractorTest.java
    public List runByGenerator(String oldStr, String newStr) {
        AstComparator comparator = new AstComparator();
        Diff diff = comparator.compare(oldStr, newStr);
        List featureMatrices = genFeatureMatrices(diff, "");
        assert featureMatrices.size() == 1;
        return featureMatrices;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy