All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.solven.cleanthat.engine.java.eclipse.generator.CodeDiffHelper Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 Benoit Lacelle - SOLVEN
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package eu.solven.cleanthat.engine.java.eclipse.generator;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.ExecutionException;

import org.apache.commons.text.similarity.LevenshteinDistance;

import com.github.difflib.DiffUtils;
import com.github.difflib.patch.DeltaType;
import com.github.difflib.patch.Patch;
import com.github.difflib.patch.PatchFailedException;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;

import eu.solven.cleanthat.formatter.ILintFixer;

/**
 * Helps computing a difference score between code source
 * 
 * @author blacelle
 *
 */
public class CodeDiffHelper {

	// Compute the diff can be expensive. However, we expect to encounter many times
	// files formatted exactly the same
	// way
	protected final Cache, Long> cache = CacheBuilder.newBuilder().build();

	protected long computeDiffScore(ILintFixer formatter, Collection contents) {
		return contents.parallelStream().mapToLong(content -> {
			try {
				return computeDiffScore(formatter, content);
			} catch (IOException e) {
				throw new UncheckedIOException(e);
			}
		}).sum();
	}

	/**
	 * 
	 * @param lintFixer
	 * @param content
	 * @return a score indicating how much this formatter impacts given content. If 0, the formatter has no impacts. A
	 *         higher score means a bigger difference
	 * @throws IOException
	 */
	protected long computeDiffScore(ILintFixer lintFixer, String content) throws IOException {
		var formatted = lintFixer.doFormat(content);

		if (formatted == null) {
			// It means something failed while formatting
			return Long.MAX_VALUE;
		}

		long deltaDiff;
		try {
			deltaDiff = cache.get(Arrays.asList(content, formatted), () -> deltaDiff(content, formatted));
		} catch (ExecutionException e) {
			throw new RuntimeException(e);
		}
		return deltaDiff;
	}

	public long deltaDiff(String pathAsString, String formatted) {
		List originalRows = Arrays.asList(pathAsString.split("[\r\n]+"));
		List formattedRows = Arrays.asList(formatted.split("[\r\n]+"));
		Patch diff = DiffUtils.diff(originalRows, formattedRows);
		List patchApplied;
		try {
			patchApplied = diff.applyTo(originalRows);
		} catch (PatchFailedException e) {
			throw new RuntimeException(e);
		}
		if (!formattedRows.equals(patchApplied)) {
			throw new IllegalArgumentException("Issue computing the diff?");
		}
		var deltaDiff = diff.getDeltas().stream().mapToLong(d -> {
			if (d.getType() == DeltaType.EQUAL) {
				return 0L;
			}
			// We count the number of impacted characters
			List sourceLines = d.getSource().getLines();
			List targetLines = d.getTarget().getLines();

			if (sourceLines.size() == 1 && targetLines.size() == 1) {
				var sourceLine = sourceLines.get(0);
				var targetLine = targetLines.get(0);
				// int common = new LongestCommonSubsequence().apply(sourceLine, targetLine);
				// The diff is the longest difference between the 2 lines
				// return Math.max(sourceLine.length(), targetLine.length()) - common;

				return LevenshteinDistance.getDefaultInstance().apply(sourceLine, targetLine);
			} else {
				var sourceSize = sourceLines.stream().mapToLong(String::length).sum();
				var targetSize = targetLines.stream().mapToLong(String::length).sum();
				// Given a diff, we consider the biggest square between the source and the
				// target
				return Math.max(sourceSize, targetSize);
			}
		}).sum();
		return deltaDiff;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy