All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.diffx.algorithm.HirschbergAlgorithm Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2021 Allette Systems (Australia)
 *    http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.diffx.algorithm;

import org.pageseeder.diffx.api.DiffAlgorithm;
import org.pageseeder.diffx.api.DiffHandler;
import org.pageseeder.diffx.api.Operator;

import java.util.List;

/**
 * An implementation of the Hirschberg algorithm to find the longest common subsequence (LCS).
 *
 * 

Hirschberg proposed a linear space algorithm for the LCS using a divide and conquer approach. * This algorithm (Algorithm C) finds the intersecting point of the LCS sequence with the m/2 th row and solve * the problem recursively. It solves LCS problem in O(mn) time and in O(m+n) space. * *

* See "A linear space algorithm for computing maximal common subsequences" * *

The algorithm has been altered slightly to be able to compute the Shortest Edit Script (SES). * * @author Christophe Lauret * @version 0.9.0 * @link Algorithm for Computing Maximal Common Subsequences D.S. Hirschberg */ public final class HirschbergAlgorithm implements DiffAlgorithm { /** * Set to true to show debug info. */ private static final boolean DEBUG = false; @Override public void diff(List from, List to, DiffHandler handler) { // It is more efficient to supply the sizes than retrieve from lists algorithmC(from.size(), to.size(), from, to, handler); } /** * Algorithm B as described by Hirschberg * * @return the last line of the Needleman-Wunsch score matrix */ private static int[] algorithmB(int m, int n, List a, List b) { int[][] k = new int[2][n + 1]; for (int i = 1; i <= m; i++) { if (n + 1 >= 0) System.arraycopy(k[1], 0, k[0], 0, n + 1); for (int j = 1; j <= n; j++) { if (a.get(i - 1).equals(b.get(j - 1))) { k[1][j] = k[0][j - 1] + 1; } else { k[1][j] = Math.max(k[1][j - 1], k[0][j]); } } } return k[1]; } /** * Algorithm B as described by Hirschberg (in reverse) * *

Implementation note: we traverse the list in reverse, it is more efficient than reversing the lists. */ private static int[] algorithmBRev(int m, int n, List a, List b) { int[][] k = new int[2][n + 1]; for (int i = m - 1; i >= 0; i--) { if (n + 1 >= 0) System.arraycopy(k[1], 0, k[0], 0, n + 1); for (int j = n - 1; j >= 0; j--) { if (a.get(i).equals(b.get(j))) { k[1][n - j] = k[0][n - j - 1] + 1; } else { k[1][n - j] = Math.max(k[1][n - j - 1], k[0][n - j]); } } } return k[1]; } /** * Find the index of the maximum sum of L1 and L2, as described by Hirschberg */ private static int findK(int[] l1, int[] l2, int n) { int m = 0; int k = 0; for (int j = 0; j <= n; j++) { int s = l1[j] + l2[n - j]; if (m < s) { m = s; k = j; } } return k; } /** * Algorithm C as described by Hirschberg */ private static void algorithmC(int m, int n, List a, List b, DiffHandler handler) { if (DEBUG) System.out.print("[m=" + m + ",n=" + n + "," + a + "," + b + "] ->"); if (n == 0) { if (DEBUG) System.out.println(" Step1 N=0"); for (T token : a) { handler.handle(Operator.DEL, token); } } else if (m == 0) { if (DEBUG) System.out.println(" Step1 M=0"); for (T token : b) { handler.handle(Operator.INS, token); } } else if (m == 1) { if (DEBUG) System.out.println(" Step1 M=1"); boolean match = false; T a0 = a.get(0); for (int j = 0; j < n; j++) { if (a0.equals(b.get(j)) && !match) { handler.handle(Operator.MATCH, a0); match = true; } else { handler.handle(Operator.INS, b.get(j)); } } if (!match) handler.handle(Operator.DEL, a0); } else { if (DEBUG) System.out.println(" Step2"); int h = (int) Math.floor(((double) m) / 2); int[] l1 = algorithmB(h, n, a.subList(0, h), b); int[] l2 = algorithmBRev(m - h, n, a.subList(h, a.size()), b); int k = findK(l1, l2, n); // Recursive call algorithmC(h, k, a.subList(0, h), b.subList(0, k), handler); algorithmC(m - h, n - k, a.subList(h, a.size()), b.subList(k, b.size()), handler); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy