All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.topologi.diffx.algorithm.GuanoAlgorithm Maven / Gradle / Ivy

Go to download

docx4j is a library which helps you to work with the Office Open XML file format as used in docx documents, pptx presentations, and xlsx spreadsheets.

The newest version!
/*
 * This file is part of the DiffX library.
 *
 * For licensing information please see the file license.txt included in the release.
 * A copy of this licence can also be found at
 *   http://www.opensource.org/licenses/artistic-license-2.0.php
 */
package com.topologi.diffx.algorithm;

import java.io.IOException;

import com.topologi.diffx.event.AttributeEvent;
import com.topologi.diffx.event.DiffXEvent;
import com.topologi.diffx.format.DiffXFormatter;
import com.topologi.diffx.format.ShortStringFormatter;
import com.topologi.diffx.sequence.EventSequence;

/**
 * A matrix-based algorithm using weighted events which produces correct results, but may require
 * minor adjustments during formatting.
 * 
 * 

Implementation note: this algorithm effectively detects the correct changes in the * sequences, but will not necessarily return events that can be serialised as well-formed * XML as they stand. * *

Known problem in this implementation: elements that contain themselves tend to * generate events that are harder to serialise as XML. * *

This class is said 'fit' because it will adapt the matrix to the sequences that it * is being given in order to improve performance. * *

Note: The name of this class comes from a contracted version of the features of * this algorithm, as explained below: *

    *
  • Weighted, each token is has a given weight;
  • *
  • Symmetrical, when possible, the algorithm will try to choose a path * that is symmetrical in regards to the arrangement of the tokens;
  • *
  • Matrix, this class uses a matrix for its internal representation;
  • * *
* * @author Christophe Lauret * @version 11 May 2010 */ public final class GuanoAlgorithm implements DiffXAlgorithm { /** * Set to true to show debug info. */ private static final boolean DEBUG = false; // class attributes --------------------------------------------------------------------------- /** * The first sequence of events to test. */ private final EventSequence sequence1; /** * The second sequence of events to test. */ private final EventSequence sequence2; /** * Length of the first sequence to compare. */ private final int length1; /** * Length of the second sequence to compare. */ private final int length2; // state variables ---------------------------------------------------------------------------- /** * Matrix storing the paths. */ private transient Matrix matrix; /** * The state of the elements. */ private transient ElementState estate = new ElementState(); /** * The length of the LCS. */ private transient int length = -1; // constructor -------------------------------------------------------------------------------- /** * Creates a new DiffXAlgorithmBase. * * @param seq0 The first sequence to compare. * @param seq1 The second sequence to compare. */ public GuanoAlgorithm(EventSequence seq0, EventSequence seq1) { this.sequence1 = seq0; this.sequence2 = seq1; this.length1 = seq0.size(); this.length2 = seq1.size(); this.matrix = setupMatrix(seq0, seq1); } // methods ------------------------------------------------------------------------------------ /** * Returns the length of the longest common sequence. * * @return the length of the longest common sequence. */ public int length() { // case when one of the sequences is empty if (this.length1 == 0 || this.length2 == 0) { this.length = 0; } // normal case if (this.length < 0) { this.matrix.setup(this.length1+1, this.length2+1); // allocate storage for array L; for (int i = this.length1; i >= 0; i--) { for (int j = this.length2; j >= 0; j--) { // we reach the end of the sequence (fill with 0) if (i >= this.length1 || j >= this.length2) { this.matrix.set(i, j, 0); } else { // the events are the same if (this.sequence1.getEvent(i).equals(this.sequence2.getEvent(j))) { this.matrix.incrementPathBy(i, j, 1); // different events } else { this.matrix.incrementByMaxPath(i, j); } } } } this.length = this.matrix.get(0, 0); } if (DEBUG) { System.err.println(); for (int i = 0; i < this.sequence1.size(); i++) { System.err.print(ShortStringFormatter.toShortString(this.sequence1.getEvent(i))+"\t"); } System.err.println(); for (int i = 0; i < this.sequence2.size(); i++) { System.err.print(ShortStringFormatter.toShortString(this.sequence2.getEvent(i))+"\n"); } System.err.println(); System.err.println(this.matrix); } return this.length; } /** * Writes the diff sequence using the specified formatter. * * @param formatter The formatter that will handle the output. * * @throws IOException If thrown by the formatter. */ public void process(DiffXFormatter formatter) throws IOException { // handle the case when one of the two sequences is empty processEmpty(formatter); if (this.length1 == 0 || this.length2 == 0) return; // calculate the LCS length to fill the matrix length(); int i = 0; int j = 0; DiffXEvent e1 = this.sequence1.getEvent(i); DiffXEvent e2 = this.sequence2.getEvent(j); // start walking the matrix while (i < this.length1 && j < this.length2) { e1 = this.sequence1.getEvent(i); e2 = this.sequence2.getEvent(j); // we can only insert or delete, priority to insert if (this.matrix.isGreaterX(i, j)) { // follow the natural path and insert if (this.estate.okInsert(e1) && !this.estate.hasPriorityOver(e2, e1)) { if (DEBUG) { System.err.print("["+i+","+j+"]->["+(i+1)+","+j+"] >i +"+ShortStringFormatter.toShortString(e1)); } formatter.insert(e1); this.estate.insert(e1); i++; // if we can format checking at the stack, let's do it } else if (e1.equals(e2) && this.estate.okFormat(e1)) { if (DEBUG) { System.err.print("["+i+","+j+"]->["+(i+1)+","+(j+1)+"] >f "+ShortStringFormatter.toShortString(e1)); } formatter.format(e1); this.estate.format(e1); i++; j++; // go counter current and delete } else if (this.estate.okDelete(e2)) { if (DEBUG) { System.err.print("["+i+","+j+"]->["+i+","+(j+1)+"] >d -"+ShortStringFormatter.toShortString(e2)); } formatter.delete(e2); this.estate.delete(e2); j++; } else { if (DEBUG) { System.err.print("\n(i) case greater X"); } if (DEBUG) { printLost(i, j); } break; } // we can only insert or delete, priority to delete } else if (this.matrix.isGreaterY(i, j)) { // follow the natural and delete if (this.estate.okDelete(e2) && !this.estate.hasPriorityOver(e1, e2)) { if (DEBUG) { System.err.print("["+i+","+j+"]->["+i+","+(j+1)+"] ["+(i+1)+","+(j+1)+"] ["+(i+1)+","+j+"] ["+(i+1)+","+(j+1)+"] =f "+ShortStringFormatter.toShortString(e1)); } formatter.format(e1); this.estate.format(e1); i++; j++; // we can insert the closing tag } else if (this.estate.okInsert(e1) && !(e2 instanceof AttributeEvent && !(e1 instanceof AttributeEvent))) { if (DEBUG) { System.err.print("["+i+","+j+"]->["+(i+1)+","+j+"] =i +"+ShortStringFormatter.toShortString(e1)); } this.estate.insert(e1); formatter.insert(e1); i++; // we can delete the closing tag } else if (this.estate.okDelete(e2) && !(e1 instanceof AttributeEvent && !(e2 instanceof AttributeEvent))) { if (DEBUG) { System.err.print("["+i+","+j+"]->["+i+","+(j+1)+"] =d -"+ShortStringFormatter.toShortString(e2)); } formatter.delete(e2); this.estate.delete(e2); j++; } else { if (DEBUG) { System.err.println("\n(i) case same"); } if (DEBUG) { printLost(i, j); } break; } } else { if (DEBUG) { System.err.println("\n(i) case ???"); } if (DEBUG) { printLost(i, j); } break; } if (DEBUG) { System.err.println(" stack:"+this.estate.currentChange()+ShortStringFormatter.toShortString(this.estate.current())); } } // finish off the events from the first sequence while (i < this.length1) { if (DEBUG) { System.err.println("["+i+","+j+"]->["+(i+1)+","+j+"] _i -"+ShortStringFormatter.toShortString(this.sequence1.getEvent(i))); } this.estate.insert(this.sequence1.getEvent(i)); formatter.insert(this.sequence1.getEvent(i)); i++; } // finish off the events from the second sequence while (j < this.length2) { if (DEBUG) { System.err.println("["+i+","+j+"]->["+i+","+(j+1)+"] _d -"+ShortStringFormatter.toShortString(this.sequence2.getEvent(j))); } this.estate.delete(this.sequence2.getEvent(j)); formatter.delete(this.sequence2.getEvent(j)); j++; } // free some resources // matrix.release(); } // getters and setters ------------------------------------------------------------------------- /** * @see com.topologi.diffx.algorithm.DiffXAlgorithm#getFirstSequence() */ public final EventSequence getFirstSequence() { return this.sequence1; } /** * @see com.topologi.diffx.algorithm.DiffXAlgorithm#getSecondSequence() */ public final EventSequence getSecondSequence() { return this.sequence2; } // private helpers (probably inlined by the compiler) ----------------------------------- /** * Writes the diff sequence using the specified formatter when one of * the sequences is empty. * *

The result becomes either only insertions (when the second sequence is * empty) or deletions (when the first sequence is empty). * * @param formatter The formatter that will handle the output. * * @throws IOException If thrown by the formatter. */ private void processEmpty(DiffXFormatter formatter) throws IOException { // the first sequence is empty, events from the second sequence have been deleted if (this.length1 == 0) { for (int i = 0; i < this.length2; i++) { formatter.delete(this.sequence2.getEvent(i)); } } // the second sequence is empty, events from the first sequence have been inserted if (this.length2 == 0) { for (int i = 0; i < this.length1; i++) { formatter.insert(this.sequence1.getEvent(i)); } } } /** * Determines the most appropriate matrix to use. * *

Calculates the maximum length of the shortest weighted path if both sequences * are totally different, which corresponds to the sum of all the events. * * @param s1 The first sequence. * @param s2 The second sequence. * * @return The most appropriate matrix. */ private static Matrix setupMatrix(EventSequence s1, EventSequence s2) { int max = 0; for (int i = 0; i < s1.size(); i++) { max += s1.getEvent(i).getWeight(); } for (int i = 0; i < s2.size(); i++) { max += s2.getEvent(i).getWeight(); } if (max > Short.MAX_VALUE) return new MatrixInt(); else return new MatrixShort(); } /** * Print information when the algorithm gets lost in the matrix, * ie when it does not know which direction to follow. * * @param i The X position. * @param j The Y position. */ private void printLost(int i, int j) { DiffXEvent e1 = this.sequence1.getEvent(i); DiffXEvent e2 = this.sequence2.getEvent(j); System.err.println("(!) Ambiguous choice in ("+i+","+j+")"); System.err.println(" ? +"+ShortStringFormatter.toShortString(e1)); System.err.println(" ? -"+ShortStringFormatter.toShortString(e2)); System.err.println(" current="+ShortStringFormatter.toShortString(this.estate.current())); System.err.println(" value in X+1="+this.matrix.get(i+1, j)); System.err.println(" value in Y+1="+this.matrix.get(i, j+1)); System.err.println(" equals="+e1.equals(e2)); System.err.println(" greaterX="+this.matrix.isGreaterX(i, j)); System.err.println(" greaterY="+this.matrix.isGreaterY(i, j)); System.err.println(" sameXY="+this.matrix.isSameXY(i, j)); System.err.println(" okFormat1="+this.estate.okFormat(e1)); System.err.println(" okFormat2="+this.estate.okFormat(e2)); System.err.println(" okInsert="+this.estate.okInsert(e1)); System.err.println(" okDelete="+this.estate.okDelete(e2)); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy