eu.solven.cleanthat.engine.java.eclipse.generator.CodeDiffHelper Maven / Gradle / Ivy
/*
* Copyright 2023 Benoit Lacelle - SOLVEN
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.solven.cleanthat.engine.java.eclipse.generator;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ExecutionException;
import org.apache.commons.text.similarity.LevenshteinDistance;
import com.github.difflib.DiffUtils;
import com.github.difflib.patch.DeltaType;
import com.github.difflib.patch.Patch;
import com.github.difflib.patch.PatchFailedException;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import eu.solven.cleanthat.formatter.ILintFixer;
/**
* Helps computing a difference score between code source
*
* @author blacelle
*
*/
public class CodeDiffHelper {
// Compute the diff can be expensive. However, we expect to encounter many times
// files formatted exactly the same
// way
protected final Cache, Long> cache = CacheBuilder.newBuilder().build();
protected long computeDiffScore(ILintFixer formatter, Collection contents) {
return contents.parallelStream().mapToLong(content -> {
try {
return computeDiffScore(formatter, content);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}).sum();
}
/**
*
* @param lintFixer
* @param content
* @return a score indicating how much this formatter impacts given content. If 0, the formatter has no impacts. A
* higher score means a bigger difference
* @throws IOException
*/
protected long computeDiffScore(ILintFixer lintFixer, String content) throws IOException {
var formatted = lintFixer.doFormat(content);
if (formatted == null) {
// It means something failed while formatting
return Long.MAX_VALUE;
}
long deltaDiff;
try {
deltaDiff = cache.get(Arrays.asList(content, formatted), () -> deltaDiff(content, formatted));
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
return deltaDiff;
}
public long deltaDiff(String pathAsString, String formatted) {
List originalRows = Arrays.asList(pathAsString.split("[\r\n]+"));
List formattedRows = Arrays.asList(formatted.split("[\r\n]+"));
Patch diff = DiffUtils.diff(originalRows, formattedRows);
List patchApplied;
try {
patchApplied = diff.applyTo(originalRows);
} catch (PatchFailedException e) {
throw new RuntimeException(e);
}
if (!formattedRows.equals(patchApplied)) {
throw new IllegalArgumentException("Issue computing the diff?");
}
var deltaDiff = diff.getDeltas().stream().mapToLong(d -> {
if (d.getType() == DeltaType.EQUAL) {
return 0L;
}
// We count the number of impacted characters
List sourceLines = d.getSource().getLines();
List targetLines = d.getTarget().getLines();
if (sourceLines.size() == 1 && targetLines.size() == 1) {
var sourceLine = sourceLines.get(0);
var targetLine = targetLines.get(0);
// int common = new LongestCommonSubsequence().apply(sourceLine, targetLine);
// The diff is the longest difference between the 2 lines
// return Math.max(sourceLine.length(), targetLine.length()) - common;
return LevenshteinDistance.getDefaultInstance().apply(sourceLine, targetLine);
} else {
var sourceSize = sourceLines.stream().mapToLong(String::length).sum();
var targetSize = targetLines.stream().mapToLong(String::length).sum();
// Given a diff, we consider the biggest square between the source and the
// target
return Math.max(sourceSize, targetSize);
}
}).sum();
return deltaDiff;
}
}