All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.diffx.algorithm.MyersGreedyAlgorithm2 Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2021 Allette Systems (Australia)
 *    http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.diffx.algorithm;

import org.jetbrains.annotations.NotNull;
import org.pageseeder.diffx.api.DiffAlgorithm;
import org.pageseeder.diffx.api.DiffHandler;
import org.pageseeder.diffx.api.Operator;

import java.util.ArrayList;
import java.util.List;

/**
 * An implementation of the greedy algorithm as outlined in Eugene Myers' paper
 * "An O(ND) Difference Algorithm and its Variations".
 *
 * @param  The type of token being compared
 *
 * @author Christophe Lauret
 * @version 0.9.0
 * @implNote This alternative version does not compute te snakes: it computes backwards first
 * and reports to the handler during backtrace.
 * @see An O(ND) Difference Algorithm and its Variations
 * @see Myers' Diff Algorithm: The basic greedy algorithm
 */
public final class MyersGreedyAlgorithm2 implements DiffAlgorithm {

  @Override
  public void diff(@NotNull List from, @NotNull List to, @NotNull DiffHandler handler) {
    MyersGreedyAlgorithm2.Instance instance = new MyersGreedyAlgorithm2.Instance<>(from, to);
    instance.diff(handler);
  }

  /**
   * An instance of this algorithm for the sequences being compared.
   *
   * @param  The type of token
   */
  private static class Instance {

    private final List a;
    private final List b;
    private final int sizeA;
    private final int sizeB;

    Instance(List a, List b) {
      this.a = a;
      this.b = b;
      this.sizeA = a.size();
      this.sizeB = b.size();
    }

    /**
     * Compute the path to generate the shortest edit sequence (SES) between the two lists.
     *
     * @param handler receives notifications of edits
     *
     * @throws IllegalStateException If no solution was found.
     */
    private void diff(DiffHandler handler) {
      // Maximum length for the path (N + M)
      final int max = this.sizeA + this.sizeB;
      final int delta = this.sizeA - this.sizeB;

      Vector vector = Vector.create(this.sizeA, this.sizeB, false, max);
      List vectors = new ArrayList<>();

      // Find the endpoint of the furthest reaching D-path in diagonal k
      int diff = -1;
      for (int d = 0; d <= max; d++) {
        diff = reverse(vector, d);
        vectors.add(vector.snapshot(d, false, delta));
        if (diff >= 0) {
          break;
        }
      }

      if (diff < 0)
        throw new IllegalStateException("Unable to find a solution!");

      // Compute the snakes from the vectors
      solve(vectors, handler);
    }

    /**
     * @return the number of differences found
     */
    private int reverse(Vector vector, int d) {
      int delta = this.sizeA - this.sizeB;
      for (int k = -d + delta; k <= d + delta; k += 2) {

        // UP (insertion) or LEFT (deletion)
        boolean up = (k == d + delta || (k != -d + delta && vector.getX(k - 1) < vector.getX(k + 1)));
        int x = up ? vector.getX(k - 1) : vector.getX(k + 1) - 1;
        int y = x - k;

        // Follow diagonals
        while (x > 0 && y > 0 && a.get(x - 1).equals(b.get(y - 1))) {
          x--;
          y--;
        }

        // Save end points
        vector.setX(k, x);

        // Check if we've reached the end
        if (x <= 0 && y <= 0) {
          return d;
        }
      }

      return -1;
    }

    /**
     * @throws IllegalStateException If no solution could be found
     */
    private void solve(@NotNull List vectors, DiffHandler handler) {
      Point target = new Point(0, 0);
      final int delta = this.sizeA - this.sizeB;
      int x = 0;
      int y = 0;

      // We are following the vectors to get the snakes
      for (int d = vectors.size() - 1; target.x() < sizeA || target.y() > sizeB; d--) {
        Vector vector = vectors.get(d);
        int k = target.x() - target.y();
        int startX = vector.getX(k);
        int startY = startX - k;

        if (target.isNotSame(startX, startY))
          throw new IllegalStateException("No solution for d:" + d + " k:" + k + " p:" + target + " V:( " + startX + ", " + startY + " )");

        boolean up = (k == d + delta || (k != -d + delta && vector.getX(k - 1) < vector.getX(k + 1)));
        int endX = up ? vector.getX(k - 1) : vector.getX(k + 1);
        int endY = endX - (up ? k - 1 : k + 1);

        int matching = Math.min(endX - startX, endY - startY);
        // Reverse: matching first
        for (int i = 0; i < matching; i++) {
          handler.handle(Operator.MATCH, a.get(x));
          x++;
          y++;
        }
        // Insertions and deletions
        while (x < endX && x < sizeA) {
          handler.handle(Operator.DEL, a.get(x));
          x++;
        }
        while (y < endY && y < sizeB) {
          handler.handle(Operator.INS, b.get(y));
          y++;
        }
        target = new Point(endX, Math.min(endY, sizeB));
      }
      // Insertions and deletions remaining at the end
      while (x < sizeA) {
        handler.handle(Operator.DEL, a.get(x));
        x++;
      }
      while (y < sizeB) {
        handler.handle(Operator.INS, b.get(y));
        y++;
      }
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy