
com.sun.xml.treediff.TreeDiff Maven / Gradle / Ivy
The newest version!
/*
*
* Copyright (c) 1998 Sun Microsystems, Inc. All Rights Reserved.
*
* This software is the confidential and proprietary information of Sun
* Microsystems, Inc. ("Confidential Information"). You shall not
* disclose such Confidential Information and shall use it only in
* accordance with the terms of the license agreement you entered into
* with Sun.
*
* SUN MAKES NO REPRESENTATION OR WARRANTIES ABOUT THE SUITABILITY OF
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
*
*/
/**
* @author Ram Jeyaraman
* @version 1.0 November 1998
*/
package com.sun.xml.treediff;
import java.util.*;
import org.w3c.dom.*;
import com.sun.xml.tree.*;
/**
* This class implements the tree diff algorithm.
*/
/* Some of the optimizations, which were done to optimize memory:
*
* 1) The levelContainer data structure is not built for tree1.
* 2) setNodeForId is never done for tree2, as it will not be required.
* 3) setIdForNode is not done for tree1's internal nodes, as it will
* not be required.
* 4) setNodeForId is not done for tree1's freshly created nodes,
* because they will never be queried for, and its parent's id is
* assigned based on the id of the peer node in tree2.
* 5) setNodeForId is not done for tree1's root node in level zero scan,
* since there will be no more tree traversals.
*/
public class TreeDiff extends Object {
// Instance variables
DocumentTree tree1 = null;
DocumentTree tree2 = null;
Vector deltaOps = new Vector();
// Constructor
/**
* @param doc1 first xml document to be compared.
* @param doc2 second xml document to be compared.
*/
public TreeDiff(String doc1, String doc2) {
super();
// build the document trees and related data structures.
tree1 = new DocumentTree(doc1);
tree2 = new DocumentTree(doc2, true);
}
// Static Methods.
/**
* Appends the elements in one vector to another. This is a
* substitute method. This can be replaced by Vector.addAll()
* in JDK 1.2 (or Java 2 Platform!).
*
* @param dst the destination vector.
* @param src the source vector.
*
* @return the appended vector.
*/
static Vector appendAllToVector(Vector dst, Vector src) {
//Vector.addAll(list1);
for (int i = 0, size = src.size(); i < size; i++) {
dst.addElement(src.elementAt(i));
}
return dst;
}
/**
* fetch the node for the given id. This removes the fetched
* node from the table as well.
*
* @param table hashtable holding the nodeToId info.
* @param id identifier for the node.
* @return the node corresponding to the identifier.
*/
static Node fetchNodeForId(Hashtable table, String id) {
Object obj = table.get(id);
if (obj instanceof Node) {
table.remove(id);
return (Node) obj;
}
if (!(obj instanceof Vector)) {
System.err.println("TreeDiff.fetchNodeForId failed");
return null;
}
Vector vlist = (Vector) obj;
Node node = (Node) vlist.elementAt(0);
vlist.removeElementAt(0);
return node;
}
/**
* store the node against its id.
*
* @param table hashtable holding the nodeToId info.
* @param id identifier for the node.
* @param node the node for which the id is being set.
*/
static void storeNodeForId(Hashtable table, String id, Node node) {
Object obj = table.get(id);
if (obj == null) {
table.put(id, node);
return;
}
if (obj instanceof Node) {
Vector vlist = new Vector();
vlist.addElement(obj);
vlist.addElement(node);
table.put(id, vlist);
return;
}
if (!(obj instanceof Vector)) {
System.err.println("TreeDiff.storeNodeForId failed");
return;
}
((Vector) obj).addElement(node);
return;
}
// Instance Methods.
/**
* Utility method for printing output.
*/
private void print(String output) {
System.out.println(output);
}
/**
* print the delta operations (for debug purposes).
*/
public void printDeltaOps() {
StringBuffer buffer = new StringBuffer(1000);
for (int i = 0; i < deltaOps.size(); i++) {
buffer.append((String) deltaOps.elementAt(i));
buffer.append('\n');
}
System.out.println(buffer);
}
/**
* Writes the diffs in the form of XML to stdout.
*/
public void xmlize() {
print("");
for (int i = 0; i < deltaOps.size(); i++) {
xmlGen((String) deltaOps.elementAt(i));
}
// test
/*
xmlGen("MOV(a0.b0, a0.b1)");
xmlGen("SWP(a0.b0, a0.b1)");
xmlGen("CLP(a0.b0, a0.b1)");
xmlGen("SPT(a0.b0, 4)");
xmlGen("UPT(a0.bo, CENTAUR)");
xmlGen("STA(ao.bo, name, ram)");
xmlGen("RMA(ao.bo, name)");
*/
print(" ");
}
private void xmlGen(String operation) {
String op = operation.substring(0, 3);
print(" <" + op + ">");
if (op.equals("INS")) {
int index = operation.indexOf(',', 4);
if (index == -1) {
print("\t");
print("\t " +
operation.substring(4, operation.indexOf(')', 4)));
print("\t ");
} else {
print("\t");
print("\t " + operation.substring(4, index));
print("\t ");
index = index + 3;
print("\t");
print("\t ");
int nextIndex = operation.indexOf(',', index);
print("\t\t" + operation.substring(index, nextIndex));
print("\t ");
print("\t ");
print("\t\t" +
operation.substring(++nextIndex,
operation.indexOf(')', nextIndex)));
print("\t ");
print("\t");
}
} else if (op.equals("DEL")) {
int index = operation.indexOf(')', 4);
print("\t");
print("\t " +
operation.substring(4, operation.indexOf(')', 4)));
print("\t ");
} else if (op.equals("MOV") || op.equals("SWP") || op.equals("CLP")) {
int index = operation.indexOf(',', 4);
print("\t");
print("\t " + operation.substring(4, index));
print("\t ");
print("\t");
print("\t " +
operation.substring(index + 2,
operation.indexOf(')', index + 2)));
print("\t ");
} else if (op.equals("SPT")) {
int index = operation.indexOf(',', 4);
print("\t");
print("\t " + operation.substring(4, index));
print("\t ");
print("\t");
print("\t " +
operation.substring(index + 2,
operation.indexOf(')', index + 2)));
print("\t ");
} else if (op.equals("UPT")) {
int index = operation.indexOf(',', 4);
print("\t");
print("\t " + operation.substring(4, index));
print("\t ");
print("\t");
print("\t " +
operation.substring(index + 2,
operation.indexOf(')', index + 2)));
print("\t ");
} else if (op.equals("STA")) {
int index = operation.indexOf(',', 4);
print("\t");
print("\t " + operation.substring(4, index));
print("\t ");
int nextIndex = operation.indexOf(',', index + 2);
print("\t");
print("\t " + operation.substring(index + 2, nextIndex));
print("\t ");
print("\t");
print("\t " +
operation.substring(nextIndex + 2,
operation.indexOf(')', nextIndex + 2)));
print("\t ");
} else if (op.equals("RMA")) {
int index = operation.indexOf(',', 4);
print("\t");
print("\t " + operation.substring(4, index));
print("\t ");
print("\t");
print("\t " +
operation.substring(index + 2,
operation.indexOf(')', index + 2)));
print("\t ");
}
print(" " + op + ">");
}
/**
* Generates the differences in the attribute nodes.
* Note: Attribute names and values are case-insensitive.
*
* @param refNode the reference node.
* @param node the node being compared.
* @param updatedNode the node that was previously updated (setTag()).
*/
private void generateAttrDiffs(ElementEx refNode, ElementEx node,
ElementEx updatedNode) {
/* I think that the updatedNode is not really necessary, in the
* ideal case. i.e., if the DOM api were to allow a setTag() to
* be done on an ElementNode, then we do not need this extra
* parameter at all. Since UPT operation is not using setTag(),
* but instead creating a new node.
*/
NamedNodeMap refAttrs = refNode.getAttributes();
NamedNodeMap nodeAttrs = null;
if (updatedNode == null)
nodeAttrs = node.getAttributes();
else
nodeAttrs = updatedNode.getAttributes();
int refAttrSize = refAttrs.getLength();
int nodeAttrSize = nodeAttrs.getLength();
// if ref node has no attributes.
if (refAttrSize == 0) {
// delete attrs if any, from the node being compared.
if (nodeAttrSize != 0) {
for (int i = 0; i < nodeAttrSize; i++) {
Attr attr = (Attr) nodeAttrs.item(i);
String name = attr.getName();
//nodeAttrs.removeNamedItem(name);
deltaOps.addElement("RMA(" +
DocumentTree.getNodePath(node) + ", " + name + ")");
}
}
}
// if ref node has attributes.
if (refAttrSize != 0) {
// if node being compared has no attrs.
if (nodeAttrSize == 0) {
for (int i = 0; i < refAttrSize; i++) {
Attr attr = (Attr) refAttrs.item(i);
deltaOps.addElement("STA(" +
DocumentTree.getNodePath(node) + ", (" +
attr.getName() + ", " + attr.getValue() + "))");
}
return;
}
// well now, we gotto find the what matches and not.
// build a hashtable of the node attributes.
Hashtable attrsTable = new Hashtable();
for (int i = 0; i < nodeAttrSize; i++) {
Attr attr = (Attr) nodeAttrs.item(i);
String name = attr.getName();
attrsTable.put(name.toLowerCase(), attr);
}
// find what matches.
for (int i = 0; i < refAttrSize; i++) {
Attr refAttr = (Attr) refAttrs.item(i);
String name = refAttr.getName().toLowerCase();
Attr attr = (Attr) attrsTable.get(name);
if (attr != null) {
attrsTable.remove(name);
if (refAttr.getValue().equalsIgnoreCase(attr.getValue()))
continue;
}
// no match found (so add the attribute), or
// match found but not same value (so set the value).
deltaOps.addElement("STA(" +
DocumentTree.getNodePath(node) + ", (" +
refAttr.getName() + ", " + refAttr.getValue() + "))");
}
// deal with the attributes in the node being compared,
// which do not have a match.
Enumeration enum = attrsTable.elements();
while (enum.hasMoreElements()) {
Attr attr = (Attr) enum.nextElement();
deltaOps.addElement("RMA(" +
DocumentTree.getNodePath(node) + ", " +
attr.getName() + ")");
}
}
return;
}
/**
* matches the leaf nodes of the two DOM trees based on their content.
* It does so by matching specific leafTypes with their
* corresponding peers.
*
* @return a vector containing all the elements to be deleted in tree1.
*/
public Vector matchingPhase() {
// build the leafNodeIdentifiers and levelInfo
tree1.buildLeafInfo();
tree2.buildLeafInfo();
// match every possible leafnode type.
Vector deleteList = new Vector();
TreeDiff.appendAllToVector(deleteList, matchLeafType(Node.TEXT_NODE));
TreeDiff.appendAllToVector(deleteList,
matchLeafType(Node.COMMENT_NODE));
TreeDiff.appendAllToVector(deleteList,
matchLeafType(Node.CDATA_SECTION_NODE));
TreeDiff.appendAllToVector(deleteList,
matchLeafType(Node.PROCESSING_INSTRUCTION_NODE));
TreeDiff.appendAllToVector(deleteList,
matchLeafType(Node.ENTITY_REFERENCE_NODE));
TreeDiff.appendAllToVector(deleteList,
matchLeafType(Node.ELEMENT_NODE));
// we do not need the leafInfo datastructure, so discard reference.
tree1.setLeafNodeInfo(null);
tree2.setLeafNodeInfo(null);
return deleteList;
}
/**
* matches the leaf nodes of a specific leafType based on their content.
*
* @return a vector of node of specific leafType to be deleted in tree1.
*/
Vector matchLeafType(int nodeType) {
// get the right hashtable based on the leaf node type.
Hashtable table1 = (Hashtable) tree1.getLeafNodeInfo().
get(new Integer(nodeType));
Hashtable table2 = (Hashtable) tree2.getLeafNodeInfo().
get(new Integer(nodeType));
// nodes to be deleted in tree1.
Vector deleteList = new Vector();
// the prime objective here is to isolate the elements
// in table2 which qualify as insertions, and identify
// elements in table1 which qualify for deletions. So,
// when a match is found, we just ignore it as well as
// remove it from the tables.
Enumeration enum = table2.keys();
while (enum.hasMoreElements()) {
Integer hashKey = (Integer) enum.nextElement();
Vector list1 = (Vector) table1.get(hashKey);
Vector list2 = (Vector) table2.get(hashKey);
// if there is no matching hashKey in table1,
// move all the elements in list1 to insertList.
if (list1 == null) {
continue;
}
// since there is a potential match in table1, find a
// match in list1 for each element enlisted in list2.
// If a match exists, remove the matching element from
// list1 and the corresponding element from list2.
// Else, if a match does not exist, move the element
// into the insertList.
for (int i = 0; i < list2.size(); i++) {
Node leaf2 = (Node) list2.elementAt(i);
String s2 = DocumentTree.getDataString(leaf2);
Node chosenLeaf = null;
String posPath2 = DocumentTree.getPositionPath(leaf2);
for (int j = 0, maxCount = 0, len2 = posPath2.length();
j < list1.size(); j++) {
Node leaf1 = (Node) list1.elementAt(j);
String s1 = DocumentTree.getDataString(leaf1);
// if there are duplicate nodes, then find the
// closest node possible!
if (s2.equals(s1)) {
int matchCount = DocumentTree.charactersMatched(
DocumentTree.getPositionPath(leaf1),
posPath2, true);
if (matchCount > maxCount) {
chosenLeaf = leaf1;
if (matchCount == len2)
break; // exact match.
maxCount = matchCount;
}
}
}
if (chosenLeaf != null) { // match has been found. Remove it!
String id = tree2.getIdForNode(leaf2);
tree1.setIdForNode(chosenLeaf, id);
tree1.setNodeForId(id, chosenLeaf);
list1.removeElement(chosenLeaf);
}
}
}
// now table2 should be empty. The remaining entries in
// table1 need to be added to the deleteList.
enum = table1.keys();
while (enum.hasMoreElements()) {
Integer hashKey = (Integer) enum.nextElement();
Vector list1 = (Vector) table1.get(hashKey);
//deleteList.addAll(list1);
TreeDiff.appendAllToVector(deleteList, list1);
}
return deleteList;
}
/**
* generates the deletion operations. If all children of a parent
* node are to be deleted, the operation is represented by a single
* delete operation on the parent, instead of individual ones on the
* children. This is a recursive function. It calls itself recursively
* until the deleteList can be reduced nomore or if the list is empty.
*
* @param deleteList list of nodes in tree1 to be deleted.
*/
public void pruningPhase(Vector deleteList) {
if (deleteList == null || deleteList.size() == 0)
return;
while (deleteList.size() != 0) {
// get the parent.
Node node1 = (Node) deleteList.elementAt(0);
Node parent1 = node1.getParentNode();
if (parent1 == null) {
System.err.println("TreeDiff: DOM tree corrupted");
return;
}
// if the node to be deleted is the root..
if (!(parent1 instanceof ElementEx) ||
node1 == tree1.getRootNode()) {
/*
// remove node from tree1.levelContainer.
// note: getLevel() method should preceed delete().
int level = DocumentTree.getLevel(node1);
Vector levelList = (Vector) tree1.getLevelContainer();
Hashtable table = (Hashtable) levelList.elementAt(level);
table.remove(node1);
*/
// add delta operation to the list.
String path = DocumentTree.getNodePath(node1);
deltaOps.addElement("DEL(" + path + ")");
// delete node from tree1.
tree1.delete(node1, false);
// empty the deleteList.
deleteList.removeAllElements();
return;
}
// vector for holding sibling elements.
Vector siblings = new Vector();
// find possible siblings (includes the same node as well).
for (int i = 0, size = deleteList.size(); i < size; i++) {
Node node2 = (Node) deleteList.elementAt(i);
Node parent2 = node2.getParentNode();
if (parent2.equals(parent1)) {
siblings.addElement(node2);
}
}
// if the parent's childCount is same as the sibling list,
// put the parent node in the deleteList and remove the
// nodes in the sibling list from the deleteList.
boolean commonParentFound = false;
if (((NodeList) parent1).getLength() == siblings.size()) {
commonParentFound = true;
deleteList.addElement(parent1);
}
// generate the deltaOperation.
for (int i = 0, size = siblings.size(); i < size; i++) {
Node node = (Node) siblings.elementAt(i);
/*
// remove node from tree1.levelContainer.
// note: getLevel() method should preceed delete().
int level = DocumentTree.getLevel(node);
Vector levelList = (Vector) tree1.getLevelContainer();
Hashtable table = (Hashtable) levelList.elementAt(level);
table.remove(node);
*/
if (!commonParentFound) {
// add delta operation to the list.
String path = DocumentTree.getNodePath(node);
deltaOps.addElement("DEL(" + path + ")");
// delete node from tree1.
tree1.delete(node, false);
}
deleteList.removeElement(node);
}
}
}
/**
* generates the tree modification operations.
*/
public void modificationPhase() {
// This hashtable is again a cluge for not being able to setTag
// values in element nodes.
Hashtable updatedNodeList = new Hashtable();
Vector levelContainer = tree2.getLevelContainer();
int level = levelContainer.size();
for (; level > 1; level--) {
modifyLevel((Vector) levelContainer.elementAt(level - 1),
updatedNodeList);
}
/* dealing with the root level. */
Node root1 = tree1.getRootNode();
Node root2 = tree2.getRootNode();
// tree2's rootNode is empty (no document tree).
if (level <= 0 || root2 == null) {
System.out.println("TreeDiff: DOM tree is null");
return;
}
// level zero scan.
String id2 = tree2.getIdForNode(root2);
if (id2 == null) {
System.err.println("TreeDiff: possibly no PCDATA in document");
return;
}
Node node1 = tree1.getNodeForId(id2, root2);
// no node matching id in tree1. (i.e) tree1's root can be null
// or it might have a different node. So, discard tree1's root
// node and create a new one similar to the one in tree2.
if (node1 == null) {
// check if tree2's root node is a valid tree node.
if (!DocumentTree.isLeafInstance(root2) ||
!(root2 instanceof ElementEx)) {
System.err.println("TreeDiff: invalid root node in DOM tree");
return;
}
// generate the delta operation.
String oprtn = null;
if (DocumentTree.isLeafInstance(root2)) {
String[] strArr = DocumentTree.getCompleteData(root2);
oprtn = "INS(" + DocumentTree.getNodePath(root2) + ", (" +
strArr[0] + ", " + strArr[1] + "))";
} else if (root2 instanceof ElementEx)
oprtn = "INS(" + DocumentTree.getNodePath(root2) + ")";
if (root1 != null)
deltaOps.addElement("DEL(" + DocumentTree.getNodePath(root1) +
")");
deltaOps.addElement(oprtn);
// Well, if the algorithm went thru fine, at this
// point tree2's root node should not have any children.
// so, deep cloning is a farse.
Node newRoot1 = null;
if (DocumentTree.isLeafInstance(root2)) {
newRoot1 = tree1.createLeafNode(root2);
} else {
newRoot1 = tree1.createElementNode(root2);
}
// replace the existing root node in tree1.
tree1.setRootNode(newRoot1);
// generate attr diffs, if any.
if (newRoot1 instanceof ElementEx) {
// node1 has tobe an ElementNode!
generateAttrDiffs((ElementEx) root2,
(ElementEx) newRoot1, null);
}
return;
}
// matching node is present in tree1, but is not root node.
// make the matching node, tree1's root node.
if (node1 != root1) {
// tree1's rootnode is null ?? boy, something's really wrong..
if (root1 == null) {
System.err.println("TreeDiff: DOM tree corrupted");
return;
}
deltaOps.addElement("MOV(" + DocumentTree.getNodePath(node1) +
", " + DocumentTree.getNodePath(root2) +
")");
// remove the matching node from tree1.
Node parent1 = node1.getParentNode();
if (parent1 == null) {
System.err.println("TreeDiff: DOM tree corrupted");
return;
}
parent1.removeChild(node1);
// replace the existing root node in tree1.
tree1.setRootNode(node1);
}
// matching node is present in tree1, and is also a root node.
// we are happy!!
// generate attr diffs, if any.
if (node1 instanceof ElementEx) {
// get the node that was updated to extract attributes.
Node updatedNode = (Node) updatedNodeList.get(node1);
// node1 has tobe an ElementNode!
generateAttrDiffs((ElementEx) root2,
(ElementEx) node1, (ElementEx) updatedNode);
}
return;
}
/**
* This is the core piece of the differencing algorithm. This walks
* up the tree in a bottom-up breadth-first fashion (what a way :)),
* and generates the delta operations in one single pass!!.
*
* @param toBeCompletedList list of node at a particular level.
* @param updatedNodeList list of nodes whose tags were updated.
*/
void modifyLevel(Vector toBeCompletedList, Hashtable updatedNodeList) {
while (toBeCompletedList.size() != 0) {
// get the sibling list (note: the parent node has it all).
Node _node = (Node) toBeCompletedList.elementAt(0);
ElementNode parent2 = (ElementNode) _node.getParentNode();
// assign id to parent2.
String newId = tree2.assignIdToParent(parent2);
// find the matching nodes in tree1, for nodes in the sibling list.
// place the matching nodes in matchingList, and
// the unmatched ones in the insertList.
Vector matchingList = new Vector();
Vector insertList = new Vector();
Hashtable posForNode1 = new Hashtable();
Hashtable nodeForId1 = new Hashtable();
for (int i = 0, size = parent2.getLength(); i < size; i++) {
Node node2 = parent2.item(i);
String id2 = tree2.getIdForNode(node2);
Node node1 = tree1.getNodeForId(id2, node2);
if (node1 != null) {
// used in move operation.
posForNode1.put(node1, new Integer(i));
matchingList.addElement(node1);
// remember the nodeToId info for the swap oprn.
TreeDiff.storeNodeForId(nodeForId1, id2, node1);
// make sure to remove the matched node in tree1.
tree1.removeNodeForId(id2, node1);
// check the attribute node diffs.
if (node2 instanceof ElementEx) {
// get the node that was updated to extract attributes.
Node updatedNode = (Node) updatedNodeList.get(node1);
// node1 has tobe an ElementNode!
generateAttrDiffs((ElementEx) node2,
(ElementEx) node1, (ElementEx) updatedNode);
}
} else {
insertList.addElement(node2);
}
// remove the node from toBeCompletedList.
toBeCompletedList.removeElement(node2);
}
// if none of the nodes in the siblingList match, then
// create a new parent node and populate it with children
// (clones of the nodes in the siblingList) and
// attach the new parent sub-tree to tree1.
if (matchingList.size() == 0 &&
insertList.size() == parent2.getLength()) {
// create a new sub-tree.
ElementNode parent1 = (ElementNode) tree1.
createElementNode(parent2);
Node child1 = null, child2 = null;
for (int i = 0, size = parent2.getLength(); i < size; i++) {
child2 = parent2.item(i);
if (child2 instanceof ElementEx) {
child1 = tree1.createElementNode(child2);
} else {
child1 = tree1.createLeafNode(child2);
}
parent1.appendChild(child1);
}
// assign id to the parent1.
tree1.setNodeForId(newId, parent1);
// attach the new parent subtree to tree1.
// for now, we use the root node as a surrogate(?) parent.
if (parent2 == tree2.getRootNode()) { // this will not be true.
tree1.setRootNode(parent1);
} else {
Node oldRoot1 = tree1.getRootNode();
if (tree1.getRootNode() == null) {
// create a new root node.
// we did not clone since we do not want the
// attribute set to be carried over as well.
Node newRoot = tree1.createElementNode("root1");
tree1.setRootNode(newRoot);
deltaOps.addElement("INS(" +
DocumentTree.getNodePath(newRoot) + ")");
}
if (DocumentTree.isLeafInstance(tree1.getRootNode())) {
Node newRoot = tree1.createElementNode("root1");
// this makes the oldRoot the child of the new one.
tree1.insertRootNode(newRoot);
deltaOps.addElement("INS(" +
DocumentTree.getNodePath(newRoot) + ")");
}
// find a node in tree1 to place the sub-tree.
// if a node at level (l-2) is not found, use
// the root node instead.
int level = DocumentTree.getLevel(parent2);
int[] indices = new int[level + 1];
Node tmpNode2 = parent2;
Node tmpParent2 = parent2.getParentNode();
while (level >= 0) {
indices[level--] = DocumentTree.
getIndex(tmpParent2, tmpNode2);
tmpNode2 = tmpParent2;
tmpParent2 = tmpNode2.getParentNode();
}
// find an appropriate grand parent in tree1.
ElementNode grandParent1 = (ElementNode)
tree1.getRootNode();
for (int i = 1; i < (indices.length - 1); i++) {
Node possibleParent1 = grandParent1.item(indices[i]);
if (!(possibleParent1 instanceof ElementEx))
break;
grandParent1 = (ElementNode) possibleParent1;
}
Node refNode = grandParent1.
item(indices[indices.length - 1]);
if (refNode != null)
grandParent1.insertBefore(parent1, refNode);
else
grandParent1.appendChild(parent1);
//tree1.getRootNode().appendChild(parent1);
}
// generate the deltaOps.
deltaOps.addElement("INS(" +
DocumentTree.getNodePath(parent1) + ")");
for (int i = 0, size = parent1.getLength(); i < size; i++) {
Node refNode = parent2.item(i);
Node insNode = parent1.item(i);
if (insNode instanceof ElementEx) { // will this be true ??
// generate attr diffs, if any.
if (insNode instanceof ElementEx) {
// child1 has tobe an ElementNode!
generateAttrDiffs((ElementEx) refNode,
(ElementEx) insNode, null);
}
deltaOps.addElement("INS(" +
DocumentTree.getNodePath(insNode) + ")");
} else {
String[] strArr = DocumentTree.getCompleteData(insNode);
String oprtn = "INS(" +
DocumentTree.getNodePath(insNode) +
", (" + strArr[0] + ", " + strArr[1] + "))";
deltaOps.addElement(oprtn);
}
}
}
// if some of the nodes in the siblingList have a match
// in tree1, then find a parent in tree1 which has the most
// children in the matchedList. Choose that parent, and move
// the rest of the matched nodes to the parent (this might
// involve collapse and move operations. Insert the nodes in
// the unmatchedList into the parent. Re-order the children
// of the parent node, and then split it if necessary to bring
// it in sync with its peer in tree2.
if (matchingList.size() != 0 &&
insertList.size() != parent2.getLength()) {
// holds the possible parent nodes and their matched children.
Hashtable siblingInfo = new Hashtable();
// holds the possible parents to be chosen.
Vector chosenParents = new Vector();
// find a list of parents in tree1 which has most
// siblings in the siblingList (this can be more than one).
for (int i = 0, maxChildren = 0,
size = matchingList.size(); i < size; i++) {
Node node = (Node) matchingList.elementAt(i);
Node parent = node.getParentNode();
// place the matched nodes against their parents.
Vector clist = (Vector) siblingInfo.get(parent);
if (clist == null) {
clist = new Vector();
}
clist.addElement(node);
// keep track of the parent node with the max children.
int matchedChildren = clist.size();
if (maxChildren < matchedChildren) {
maxChildren = matchedChildren;
chosenParents.removeAllElements();
chosenParents.addElement(parent);
} else if (maxChildren == matchedChildren) {
chosenParents.addElement(parent);
}
siblingInfo.put(parent, clist);
}
// choose an appropriate parent from the list of chosen parents.
// This is based on the closeness of a particular parents'
// nodepath to its peer in tree2.
ElementNode chosenParent = null;
if (chosenParents.size() == 1)
chosenParent = (ElementNode) chosenParents.elementAt(0);
else
chosenParent = (ElementNode) DocumentTree.
findClosestMatch(parent2, chosenParents);
// move the other matched nodes to the chosenParent.
siblingInfo.remove(chosenParent);
Enumeration otherParents = siblingInfo.keys();
while (otherParents.hasMoreElements()) {
Node parent = (Node) otherParents.nextElement();
Vector clist = (Vector) siblingInfo.get(parent);
if (clist == null) { // cannot be null
System.err.println("TreeDiff: corrupted structures");
return;
}
int childCount = ((NodeList) parent).getLength();
int matchCount = clist.size();
if (childCount < matchCount) {
System.err.println("TreeDiff: corrupted structures");
return;
} else if (childCount == matchCount) {
// collapse operation.
deltaOps.addElement("CLP(" +
DocumentTree.getNodePath(chosenParent) + ", " +
DocumentTree.getNodePath(parent) + ")");
tree1.collapse(chosenParent, parent);
// delete the parent branch.
Node child = null;
Node rootNode1 = tree1.getRootNode();
do {
child = parent;
parent = child.getParentNode();
if (parent == null) {
System.err.
println("TreeDiff(CLP): branch not rooted");
return;
}
if (parent == rootNode1 &&
((NodeList) parent).getLength() ==1) {
System.err.
println("TreeDiff(CLP): corrupted DOM Tree");
return;
}
} while (((NodeList) parent).getLength() == 1);
deltaOps.addElement("DEL(" +
DocumentTree.getNodePath(child) + ")");
tree1.delete(child, false);
} else {
// move operation.
for (int i = 0, size = clist.size(); i < size; i++) {
Node node = (Node) clist.elementAt(i);
String oprtn = "MOV(" +
DocumentTree.getNodePath(node)+ ", ";
int pos = ((Integer) posForNode1.get(node)).
intValue();
tree1.move(node, chosenParent, pos);
deltaOps.addElement(oprtn +
DocumentTree.getNodePath(node) + ")");
}
}
}
// insert the unmatched nodes to the chosenParent.
Node node1 = null, node2 = null;
for (int i = 0, size = insertList.size(); i < size; i++) {
node2 = (Node) insertList.elementAt(i);
if (node2 instanceof ElementEx) {
node1 = tree1.createElementNode(node2);
} else {
node1 = tree1.createLeafNode(node2);
}
chosenParent.appendChild(node1);
// generate attr diffs, if any.
if (node1 instanceof ElementEx) {
// child1 has tobe an ElementNode!
generateAttrDiffs((ElementEx) node2,
(ElementEx) node1, null);
}
// assign an id to the inserted node.
TreeDiff.storeNodeForId(nodeForId1,
tree2.getIdForNode(node2), node1);
if (node2 instanceof ElementEx) {
// this block will never be entered!, but still..
} else {
String[] strArr = DocumentTree.getCompleteData(node2);
String oprtn = "INS(" + DocumentTree.getNodePath(node2)
+ ", (" + strArr[0] + ", " + strArr[1] + "))";
deltaOps.addElement(oprtn);
}
}
// diagnostics.
if (chosenParent.getLength() < parent2.getLength()) {
System.err.println("Error: chosenParent childcount wrong.");
return;
}
// re-order the children of chosenParent through swap
// operation, and split the node if needed.
Hashtable nodePosition = new Hashtable();
for (int i = 0, len = chosenParent.getLength(); i < len; i++) {
nodePosition.put(chosenParent.item(i), new Integer(i));
}
// swap operation.
for (int i = 0, len = parent2.getLength(); i < len; i++) {
Node peer2 = parent2.item(i);
String id = tree2.getIdForNode(peer2);
//Node peer1 = (Node) tree1.getNodeForId(id, peer2);
Node peer1 = (Node) TreeDiff.fetchNodeForId(nodeForId1, id);
// diagnostics.
Integer tmpInt = (Integer) nodePosition.get(peer1);
if (tmpInt == null) {
System.err.println("Error: no match found");
return;
}
int pos = ((Integer) nodePosition.get(peer1)).intValue();
// swap the node if out of place.
if (pos != i) {
deltaOps.addElement("SWP(" +
DocumentTree.getNodePath(peer1) +
", " +
DocumentTree.getNodePath(chosenParent.item(i)) +
")");
nodePosition.put(peer1, new Integer(i));
nodePosition.put(chosenParent.item(i),
new Integer(pos));
tree1.swap(chosenParent, pos, i);
}
}
// split operation.
int len1 = chosenParent.getLength();
int len2 = parent2.getLength();
if (len1 != len2) {
// Erroneous: if len1 is < len2, the algo is wrong!
if (len1 < len2) {
System.err.println("TreeDiff: diff operation failed");
return;
}
// if the chosenParent happens to be the root node,
// then create a new root and make the chosenParent
// its child.
if (chosenParent == tree1.getRootNode()) {
Node newRoot = tree1.createElementNode("root1");
// this makes the oldRoot the child of the new one.
tree1.insertRootNode(newRoot);
deltaOps.addElement("INS(" +
DocumentTree.getNodePath(newRoot) + ")");
}
deltaOps.addElement("SPT(" +
DocumentTree.getNodePath(chosenParent) + ", " +
len2 + ")");
tree1.split(chosenParent, len2);
}
// assign id to the chosenParent.
tree1.setNodeForId(newId, chosenParent);
// make sure tags are the same.
if (chosenParent.getTagName().
equalsIgnoreCase(parent2.getTagName()) == false) {
// currently setTag is package private, so can't set tag.
deltaOps.addElement("UPT(" +
DocumentTree.getNodePath(chosenParent) + ", " +
parent2.getTagName() + ")");
// this is a cluge, since the DOM tree yet does not support
// setTag operation.
Node newChosenParent = tree1.createElementNode(parent2);
tree1.blockMove(chosenParent, newChosenParent, 0,
chosenParent.getLength() - 1);
// set the id for the newChosenParent.
tree1.removeNodeForId(newId, chosenParent);
tree1.setNodeForId(newId, newChosenParent);
// make sure while updating root nodes.
if (chosenParent == tree1.getRootNode()) {
tree1.setRootNode(newChosenParent);
} else {
Node chosenGrandParent = chosenParent.getParentNode();
chosenGrandParent.replaceChild(newChosenParent,
chosenParent);
}
// put the node on the updatedTagList.
updatedNodeList.put(newChosenParent, chosenParent);
}
}
}
}
/**
* generate the tree diffs.
*/
public void generateDiffs() {
double startTime = System.currentTimeMillis();
Vector deleteList = matchingPhase();
double elapsedTime = (System.currentTimeMillis() - startTime) / 1000.0;
System.err.println("matchingPhase: " + elapsedTime);
double beginTime = System.currentTimeMillis();
pruningPhase(deleteList);
elapsedTime = (System.currentTimeMillis() - beginTime) / 1000.0;
System.err.println("pruningPhase: " + elapsedTime);
beginTime = System.currentTimeMillis();
modificationPhase();
elapsedTime = (System.currentTimeMillis() - beginTime) / 1000.0;
System.err.println("modificationPhase: " + elapsedTime);
double totalTime = (System.currentTimeMillis() - startTime) / 1000.0;
System.err.println("Total Time: " + totalTime);
//printDeltaOps();
xmlize();
//tree1.write();
}
/*
* Main function.
*/
public static void main(String[] args) {
/*
if (args.length < 2 || args.length > 3) {
System.err.println("Usage: java com.sun.treediff.TreeDiff" +
"doc1.xml doc2.xml {validate | novalidate}");
return;
}
boolean validate = false;
if (args.length == 3)
validate = (args[2].equalsIgnoreCase("validate"));
*/
TreeDiff treeDiff = new TreeDiff(args[0], args[1]);
treeDiff.generateDiffs();
}
}
/*
* TODO items:
*
* 1. A sequence of DEL and INS on leaf nodes can be instead done with a
* a single UPD operation.
*
* 2. The algo needs to be tinkered: if all the non-leaf nodes are identical
* and if all the leaf nodes are different, then the current algo will
* regenerate the whole tree.
*
* 3. Swap optimize.
*
* 4. Use the protected ElementNode.setTag() method.
*
* 5. Optimization during matching: getNodeForId() - can use attributes
* to find a match if two probable nodes have the same id. But this comes
* at the added cost of time.
*
* 6. Update whitePaper for attr node diff mechanism.
*/
© 2015 - 2025 Weber Informatics LLC | Privacy Policy