All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.codemodder.plugins.llm.LLMDiffs Maven / Gradle / Ivy

There is a newer version: 0.95.0
Show newest version
package io.codemodder.plugins.llm;

import com.github.difflib.UnifiedDiffUtils;
import com.github.difflib.patch.Patch;
import com.github.difflib.patch.PatchFailedException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** Utilities for working with diff patches returned by an LLM. */
public final class LLMDiffs {

  private static final Pattern HUNK_RANGE_PATTERN =
      Pattern.compile("^@@\\s+-(\\d+)(?:,\\d+)?\\s+\\+\\d+(?:,\\d+)?\\s+@@");

  private static final int MAX_FUZZ = 3;

  private LLMDiffs() {} // Prevent instantiation.

  /**
   * Applies a diff in unified format to {@code target}.
   *
   * 

If the LLM was handed code with unusual indentation, it can struggle to create a diff patch * with that indentation preserved. This will attempt to fix the indentation of the original lines * in the diff patch before passing it to {@link UnifiedDiffUtils} for parsing. * * @param target The target. * @param diff The diff. * @return The patched target. * @throws IllegalArgumentException If the diff cannot be applied to {@code target}. */ public static List applyDiff(final List target, final String diff) { try { List fixedDiff = fixDiffWhitespace(target, List.of(diff.strip().split("\n"))); Patch patch = UnifiedDiffUtils.parseUnifiedDiff(fixedDiff); return patch.applyFuzzy(target, MAX_FUZZ); } catch (PatchFailedException e) { throw new IllegalArgumentException(e); } } /** * Fixes the whitespace in a diff so that it matches the target. * * @param target The target. * @param diff The diff. * @return The diff with its whitespace fixed. */ private static List fixDiffWhitespace( final List target, final List diff) { List fixedDiff = new ArrayList<>(); boolean inHeader = true; List hunk = new ArrayList<>(); int start = 0; for (String line : diff) { if (inHeader) { // Append the header. fixedDiff.add(line); if (line.startsWith("+++")) { inHeader = false; } continue; } Matcher m = HUNK_RANGE_PATTERN.matcher(line); if (m.find()) { // If we've found a new hunk, fix and append the previous hunk. if (!hunk.isEmpty()) { fixedDiff.addAll(fixHunkWhitespace(target, hunk, start)); hunk.clear(); } // Append the hunk range (minus the section heading, which UnifiedDiffUtils chokes on). fixedDiff.add(m.group(0)); // Save the starting line number for the new hunk. start = Integer.parseInt(m.group(1)); } else { hunk.add(line.length() > 0 ? line : " "); } } // Fix and append the final hunk. fixedDiff.addAll(fixHunkWhitespace(target, hunk, start)); return List.copyOf(fixedDiff); } /** * Fixes the whitespace in a change hunk so that it matches the target. * *

For example, given these inconsistently-indented lines from the target (3 spaces, 9 spaces, * and 2 spaces respectively): * *

   *    else {
   *          response.sendRedirect("LoginPage?error=Unauthorized Access");
   *   }
   * 
* *

And this consistently-indented hunk from the LLM: * *

   *      else {
   * +        System.out.println("Failed login attempt for user: " + user);
   *          response.sendRedirect("LoginPage?error=Unauthorized Access");
   *      }
   * 
* *

This will restore the inconsistent indentation of the target so that the hunk can be * applied: * *

   *     else {
   * +        System.out.println("Failed login attempt for user: " + user);
   *           response.sendRedirect("LoginPage?error=Unauthorized Access");
   *    }
   * 
* * @param target The target. * @param hunk The change hunk. * @param start The starting line number (1-based). * @return The hunk with its whitespace fixed. */ private static List fixHunkWhitespace( final List target, final List hunk, final int start) { int index = -1; // The starting line number from the LLM can be slightly off -- this is usually observed when // a file starts with blank lines -- so we need to find the actual starting line number. for (int i = start > MAX_FUZZ ? start - MAX_FUZZ - 1 : 0; i < start + MAX_FUZZ && i < target.size(); i++) { if (isStart(target, hunk, i)) { index = i; break; } } // We couldn't find the matching lines. Let the patch fail. :( if (index == -1) { return hunk; } List fixedHunk = new ArrayList<>(hunk.size()); for (String line : hunk) { if (line.startsWith("+")) { fixedHunk.add(line); } else { fixedHunk.add(line.charAt(0) + target.get(index++)); } } return List.copyOf(fixedHunk); } /** * Determines whether {@code index} is the starting index of a change hunk. * * @param target The target. * @param hunk The change hunk. * @param index The index to check (0-based). * @return {@code true} if {@code index} is the starting index; otherwise, {@code false}. */ private static boolean isStart( final List target, final List hunk, final int index) { int adds = 0; for (int i = 0; i < hunk.size(); i++) { // Skip adds. if (hunk.get(i).startsWith("+")) { adds++; continue; } int targetIndex = index + i - adds; if (targetIndex >= target.size()) { return false; } // If the lines don't match with whitespace ignored, it wasn't the starting index. if (!hunk.get(i).substring(1).strip().equals(target.get(targetIndex).strip())) { return false; } } return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy