All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wings.util.EditTranscriptGenerator Maven / Gradle / Ivy

The newest version!
package org.wings.util;

import javax.swing.event.DocumentEvent;
import javax.swing.text.Document;
import javax.swing.text.Element;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Vector;


/**
 * This class takes two Strings an generates the shortes list of necessarry
 * change operations to transform the source string into the target string.
 * 

For more information about the used algorithm refer to: *

    *
  • http://www.merriampark.com/ld.htm
  • *
  • http://www.msci.memphis.edu/~giri/compbio/f99/ningxu/NOTE10.html
  • *
  • http://www.itu.dk/courses/AVA/E2005/StringEditDistance.pdf
  • *
*

Original source extracted from Glazed Lists (http://publicobject.com/glazedlists/) * * Implementation of Eugene W. Myer's paper, "An O(ND) Difference Algorithm and Its * Variations", the same algorithm found in GNU diff. *

*

Note that this is a cleanroom implementation of this popular algorithm that is * particularly suited for the Java programmer. The variable names are descriptive and the * approach is more object-oriented than Myer's sample algorithm. * * @author Jesse Wilson */ public final class EditTranscriptGenerator { /** * The exact calculation of document events is very memory and cpu intesive. * For texts beeing longer than this limit we use a dumb approximation. * (OL) turning this off until we actually can do something useful with it. */ private static final int MAX_LENGTH_FOR_TRANSCRIPT_GENERATION = 0; /** * Generates the shorted edit transcript needed to transform the source String into the target String. * The needed changes are noted down as {@link DocumentEvent}s. * * @return A list of {@link DocumentEvent}s either of type {@link javax.swing.event.DocumentEvent.EventType#INSERT} * or {@link javax.swing.event.DocumentEvent.EventType#REMOVE} with correct sourceIndexes and lengths. */ public static List generateEvents(String source, String target) { /* turning off complex handling until we need it */ // if (((source == null) || source.length() < MAX_LENGTH_FOR_TRANSCRIPT_GENERATION) && // ((target == null) || target.length() < MAX_LENGTH_FOR_TRANSCRIPT_GENERATION)) // return calculateEventsByStringDistance(source, target); // else return calculateEventsByDumbApproximation(source, target); } /** * Generate the document events by using the algorithm. * @return A list of {@link DocumentEvent}s either of type {@link javax.swing.event.DocumentEvent.EventType#INSERT} * or {@link javax.swing.event.DocumentEvent.EventType#REMOVE} with correct sourceIndexes and lengths. */ private static List calculateEventsByStringDistance(String source, String target) { final List editScript = shortestEditScript(new StringDiffMatcher(source, target)); final Vector actions = new Vector(); // target is x axis. Changes in X mean advance target index // source is y axis. Changes to y mean advance source index int targetIndex = 0; int sourceIndex = 0; // walk through points, applying changes as they arrive Point previousPoint = null; for (Object anEditScript : editScript) { Point currentPoint = (Point) anEditScript; // skip the first point if (previousPoint == null) { previousPoint = currentPoint; continue; } // figure out what the relationship in the values is int deltaX = currentPoint.getX() - previousPoint.getX(); int deltaY = currentPoint.getY() - previousPoint.getY(); if (deltaX == deltaY) { // handle an update targetIndex += deltaX; sourceIndex += deltaY; } else if (deltaX == 1 && deltaY == 0) { // handle a remove addOrUpdateChangeEvent(sourceIndex, actions, DocumentEvent.EventType.REMOVE); } else if (deltaX == 0 && deltaY == 1) { // handle an insert addOrUpdateChangeEvent(sourceIndex, actions, DocumentEvent.EventType.INSERT); sourceIndex++; targetIndex++; } else { // should never be reached throw new IllegalStateException(); } // the next previous point is this current point previousPoint = currentPoint; } return actions; } /** * Insert next atomic change (1 character) into event queue. Either consolidate as continuation with * an existing event in the queue or create a new one. * * @param sourceIndex character index in source string * @param actions Current queue of document events * @param eventType what happens at the source index? */ private static void addOrUpdateChangeEvent(int sourceIndex, Vector actions, DocumentEvent.EventType eventType) { int offset = sourceIndex; int length = 1; final Document sourceDocument = null; // dummy - we do not have a reference. // First change is always a new event if (actions.size() == 0) { SimpleDocumentEvent newEvent = new SimpleDocumentEvent(offset, length, sourceDocument, eventType); actions.add(newEvent); } else { // Is this a contiunuation of the last event type? if (((DocumentEvent) actions.lastElement()).getType().equals(eventType)) { SimpleDocumentEvent docEvent = (SimpleDocumentEvent) actions.lastElement(); offset = docEvent.getOffset(); length = docEvent.getLength(); // Continuation break for an insert? New event if ((sourceIndex != (offset + length)) && eventType.equals(DocumentEvent.EventType.INSERT) || (sourceIndex != offset) && eventType.equals(DocumentEvent.EventType.REMOVE)) { offset = sourceIndex; length = 1; DocumentEvent newEvent = new SimpleDocumentEvent(offset, length, sourceDocument, eventType); actions.add(newEvent); } // New remove? else { docEvent.increaseLength(); } } // Anderes Event folgt else { DocumentEvent newEvent = new SimpleDocumentEvent(offset, length, sourceDocument, eventType); actions.add(newEvent); } } } /** * Our simple implementation of {@link DocumentEvent} */ private static class SimpleDocumentEvent implements DocumentEvent { private int offset; private int length; private Document document; private EventType type; public SimpleDocumentEvent(int offset, int length, Document document, EventType type) { this.offset = offset; this.length = length; this.document = document; this.type = type; } @Override public ElementChange getChange(Element elem) { return null; } @Override public int getOffset() { return offset; } @Override public int getLength() { return length; } @Override public Document getDocument() { return document; } @Override public EventType getType() { return type; } void increaseLength() { length += 1; } public String toString() { return "SimpleDocumentEvent{" + "offset=" + offset + ", length=" + length + ", document=" + document + ", type=" + type + '}'; } } /** * Calculate the length of the longest common subsequence for the specified input. */ private final static List shortestEditScript(StringDiffMatcher input) { // calculate limits based on the size of the input matcher int N = input.getAlphaLength(); int M = input.getBetaLength(); Point maxPoint = new Point(N, M); int maxSteps = N + M; // use previous round furthest reaching D-path to determine the // new furthest reaching (D+1)-path Map furthestReachingPoints = new HashMap(); // walk through in stages, each stage adding one non-diagonal. // D == count of non-diagonals in current stage for (int D = 0; D <= maxSteps; D++) { // exploit diagonals in order to save storing both X and Y // diagonal k means every point on k, (k = x - y) for (int k = -D; k <= D; k += 2) { // the furthest reaching D-path on the left and right diagonals // either of these may be null. The terms 'below left' and 'above right' // refer to the diagonals that the points are on and may not be // representative of the point positions Point belowLeft = (Point) furthestReachingPoints.get(k - 1); Point aboveRight = (Point) furthestReachingPoints.get(k + 1); // the new furthest reaching point to create Point point; // first round: we have matched zero in word X if (furthestReachingPoints.isEmpty()) { point = new Point(0, 0); // if this is the leftmost diagonal, or the left edge is further // than the right edge, our new X is that value and our y is one greater // (shift verically by one) } else if (k == -D || (k != D && belowLeft.getX() < aboveRight.getX())) { point = aboveRight.createDeltaPoint(0, 1); // if the right edge is further than the left edge, use that x // and keep y the same (shift horizontally by one) } else { point = belowLeft.createDeltaPoint(1, 0); } // match as much diagonal as possible from the previous endpoint while (point.isLessThan(maxPoint) && input.matchPair(point.getX(), point.getY())) { point = point.incrementDiagonally(); } // save this furthest reaching path furthestReachingPoints.put(k, point); // if we're past the end, we have a solution! if (point.isEqualToOrGreaterThan(maxPoint)) { return point.trail(); } } } // no solution was found throw new IllegalStateException(); } /** * Generate a very simple list of document events to avoid cost intensive distance calculation: * Remove all existing characters, add all new characters. * @return A list of {@link DocumentEvent}s either of type {@link javax.swing.event.DocumentEvent.EventType#INSERT} * or {@link javax.swing.event.DocumentEvent.EventType#REMOVE} with correct sourceIndexes and lengths. */ private static List calculateEventsByDumbApproximation(String source, String target) { List events = new ArrayList(2); if (source != null) events.add(new SimpleDocumentEvent(0, source.length(), null, DocumentEvent.EventType.REMOVE)); if (target != null) events.add(new SimpleDocumentEvent(0, target.length(), null, DocumentEvent.EventType.INSERT)); return events; } /** * Models an X and Y point in a path. The top-left corner of the axis is the point (0, * 0). This is the lowest point in both the x and y dimensions. Negative points are * not allowed. */ private final static class Point { private int x = 0; private int y = 0; private Point predecessor = null; /** * Create a new point with the specified coordinates and no predecessor. */ public Point(int x, int y) { this.x = x; this.y = y; } /** * Creates a new point from this point by shifting its values as specified. The * new point keeps a reference to its source in order to create a path later. */ public Point createDeltaPoint(int deltaX, int deltaY) { Point result = new Point(x + deltaX, y + deltaY); result.predecessor = this; return result; } /** * Shifts x and y values down and to the * right by one. */ public Point incrementDiagonally() { Point result = createDeltaPoint(1, 1); // shortcut to the predecessor (to save memory!) if (predecessor != null) { int deltaX = result.x - predecessor.x; int deltaY = result.y - predecessor.y; if (deltaX == deltaY) { result.predecessor = this.predecessor; } } return result; } public int getX() { return x; } public int getY() { return y; } public boolean isLessThan(Point other) { return x < other.x && y < other.y; } public boolean isEqualToOrGreaterThan(Point other) { return x >= other.x && y >= other.y; } public String toString() { return "(" + x + ',' + y + ')'; } /** * Get a trail from the original point to this point. This is a list of all points * created via a series of {@link #createDeltaPoint(int,int)} calls. */ public List trail() { List reverse = new ArrayList(); Point current = this; while (current != null) { reverse.add(current); current = current.predecessor; } Collections.reverse(reverse); return reverse; } } /** * Determines if the values at the specified points match or not. * *

This class specifies that each element should specify a character value. * This is for testing and debugging only and it is safe for implementing * classes to throw {@link UnsupportedOperationException} for both the * {@link #alphaAt(int)} and {@link #betaAt(int)} methods. */ private final static class StringDiffMatcher { private String alpha; private String beta; public StringDiffMatcher(String alpha, String beta) { this.alpha = alpha; this.beta = beta; } public int getAlphaLength() { return alpha.length(); } public char alphaAt(int index) { return alpha.charAt(index); } public char betaAt(int index) { return beta.charAt(index); } public int getBetaLength() { return beta.length(); } public boolean matchPair(int alphaIndex, int betaIndex) { return alpha.charAt(alphaIndex) == beta.charAt(betaIndex); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy