com.topologi.diffx.algorithm.DiffXFitsy Maven / Gradle / Ivy
Show all versions of docx4j-diffx Show documentation
/*
* This file is part of the DiffX library.
*
* For licensing information please see the file license.txt included in the release.
* A copy of this licence can also be found at
* http://www.opensource.org/licenses/artistic-license-2.0.php
*/
package com.topologi.diffx.algorithm;
import java.io.IOException;
import com.topologi.diffx.event.AttributeEvent;
import com.topologi.diffx.event.DiffXEvent;
import com.topologi.diffx.format.DiffXFormatter;
import com.topologi.diffx.format.ShortStringFormatter;
import com.topologi.diffx.sequence.EventSequence;
/**
* Performs the diff comparison using the LCS algorithm.
*
* Implementation note: this algorithm effectively detects the correct changes in the
* sequences, but will not necessarily return events that can be serialised as well-formed
* XML as they stand.
*
*
Known problem in this implementation: elements that contain themselves tend to
* generate events that are harder to serialise as XML.
*
*
This class is said 'fit' because it will adapt the matrix to the sequences that it
* is being given in order to improve performance.
*
*
Note: The name of this class comes from a contracted version of the features of
* this algorithm, as explained below:
*
* - Weighted, each token is has a given weight;
* - Symmetrical, when possible, the algorithm will try to choose a path
* that is symmetrical in regards to the arrangement of the tokens;
* - Matrix, this class uses a matrix for its internal representation;
*
*
*
* This class is not synchronised.
*
* @author Christophe Lauret
* @version 8 April 2005
*/
public final class DiffXFitsy extends DiffXAlgorithmBase {
/**
* Set to true
to show debug info.
*/
private static final boolean DEBUG = false;
/**
* Set to true
to show profiling info.
*/
private static final boolean PROFILE = false;
// state variables ----------------------------------------------------------------------------
/**
* Matrix storing the paths.
*/
private transient Matrix matrix;
/**
* The state of the elements.
*/
private transient ElementState estate = new ElementState();
// constructor --------------------------------------------------------------------------------
/**
* Creates a new DiffXAlgorithmBase.
*
* @param seq0 The first sequence to compare.
* @param seq1 The second sequence to compare.
*/
public DiffXFitsy(EventSequence seq0, EventSequence seq1) {
super(seq0, seq1);
this.matrix = setupMatrix(seq0, seq1);
}
// methods ------------------------------------------------------------------------------------
/**
* Returns the length of the longest common sequence.
*
* @return the length of the longest common sequence.
*/
public int length() {
// case when one of the sequences is empty
if (this.length1 == 0 || this.length2 == 0) {
this.length = 0;
}
// normal case
if (this.length < 0) {
long t0 = System.currentTimeMillis();
if (PROFILE) {
System.err.println("Creating Matrix ["+(this.length1+1)+", "+(this.length2+1)+")");
}
this.matrix.setup(this.length1+1, this.length2+1);
long t1 = System.currentTimeMillis();
if (PROFILE) {
System.err.println(t1 - t0+" ms to setup matrix");
}
// allocate storage for array L;
for (int i = super.length1; i >= 0; i--) {
for (int j = super.length2; j >= 0; j--) {
// we reach the end of the sequence (fill with 0)
if (i >= super.length1 || j >= super.length2) {
this.matrix.set(i, j, 0);
} else {
// the events are the same
if (this.sequence1.getEvent(i).equals(this.sequence2.getEvent(j))) {
this.matrix.incrementPathBy(i, j, maxWeight(this.sequence1.getEvent(i), this.sequence2.getEvent(j)));
// different events
} else {
this.matrix.incrementByMaxPath(i, j);
}
}
}
if (i % (this.length1 / 50) == 0) {
System.err.println(i * 100 / this.length1+"% at "+(t1 - System.currentTimeMillis())+"ms");
}
}
this.length = this.matrix.get(0, 0);
System.err.println(System.currentTimeMillis() - t1+" ms to populate");
}
if (DEBUG) {
System.err.println();
for (int i = 0; i < this.sequence1.size(); i++) {
System.err.print(ShortStringFormatter.toShortString(this.sequence1.getEvent(i))+"\t");
}
System.err.println();
for (int i = 0; i < this.sequence2.size(); i++) {
System.err.print(ShortStringFormatter.toShortString(this.sequence2.getEvent(i))+"\n");
}
System.err.println();
System.err.println(this.matrix);
}
return this.length;
}
/**
* Writes the diff sequence using the specified formatter.
*
* @param formatter The formatter that will handle the output.
*
* @throws IOException If thrown by the formatter.
*/
public void process(DiffXFormatter formatter) throws IOException {
// handle the case when one of the two sequences is empty
processEmpty(formatter);
System.err.println("Start processing");
if (this.length1 == 0 || this.length2 == 0) return;
// calculate the LCS length to fill the matrix
length();
int i = 0;
int j = 0;
DiffXEvent e1 = this.sequence1.getEvent(i);
DiffXEvent e2 = this.sequence2.getEvent(j);
// start walking the matrix
while (i < super.length1 && j < super.length2) {
e1 = this.sequence1.getEvent(i);
e2 = this.sequence2.getEvent(j);
// both elements are considered equal
if (e1.equals(e2)) {
// if we can format checking at the stack, let's do it
if (this.estate.okFormat(e1)) {
if (DEBUG) {
System.err.print(" ef "+ShortStringFormatter.toShortString(e1));
}
formatter.format(e1);
this.estate.format(e1);
i++; j++;
// otherwise maybe we should insert.
} else if (this.estate.okInsert(e1)) {
if (DEBUG) {
System.err.print(" ei +"+ShortStringFormatter.toShortString(e1));
}
formatter.insert(e1);
this.estate.insert(e1);
i++;
// or delete.
} else if (this.estate.okDelete(e2)) {
if (DEBUG) {
System.err.print(" ed -"+ShortStringFormatter.toShortString(e2));
}
formatter.delete(e2);
this.estate.delete(e2);
j++;
} else {
if (DEBUG) {
System.err.println("\n(i) case equal");
}
if (DEBUG) {
printLost(i, j);
}
break;
}
// we can only insert or delete, priority to insert
} else
if (this.matrix.isGreaterX(i, j)) {
// follow the natural path and insert
if (this.estate.okInsert(e1)) {
if (DEBUG) {
System.err.print(" >i +"+ShortStringFormatter.toShortString(e1));
}
formatter.insert(e1);
this.estate.insert(e1);
i++;
// go counter current and delete
} else if (this.estate.okDelete(e2)) {
if (DEBUG) {
System.err.print(" >d -"+ShortStringFormatter.toShortString(e2));
}
formatter.delete(e2);
this.estate.delete(e2);
j++;
} else {
if (DEBUG) {
System.err.print("\n(i) case greater X");
}
if (DEBUG) {
printLost(i, j);
}
break;
}
// we can only insert or delete, priority to delete
} else if (this.matrix.isGreaterY(i, j)) {
// follow the natural and delete
if (this.estate.okDelete(e2)) {
if (DEBUG) {
System.err.print(" -->