All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.xml.treediff.TreeDiff Maven / Gradle / Ivy

The newest version!
/*
 *
 * Copyright (c) 1998 Sun Microsystems, Inc. All Rights Reserved.
 *
 * This software is the confidential and proprietary information of Sun
 * Microsystems, Inc.  ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Sun.
 *
 * SUN MAKES NO REPRESENTATION OR WARRANTIES ABOUT THE SUITABILITY OF
 * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
 * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT.  SUN SHALL NOT BE LIABLE FOR
 * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
 * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
 *
 */

/**
 * @author Ram Jeyaraman
 * @version 1.0 November 1998
 */

package com.sun.xml.treediff;

import java.util.*;
import org.w3c.dom.*;
import com.sun.xml.tree.*;

/**
 * This class implements the tree diff algorithm.
 */

 /* Some of the optimizations, which were done to optimize memory:
  *
  * 1) The levelContainer data structure is not built for tree1.
  * 2) setNodeForId is never done for tree2, as it will not be required.
  * 3) setIdForNode is not done for tree1's internal nodes, as it will
  *    not be required.
  * 4) setNodeForId is not done for tree1's freshly created nodes,
  *    because they will never be queried for, and its parent's id is
  *	   assigned based on the id of the peer node in tree2.
  * 5) setNodeForId is not done for tree1's root node in level zero scan,
  *    since there will be no more tree traversals.
  */
public class TreeDiff extends Object {

    // Instance variables

    DocumentTree tree1 = null;
    DocumentTree tree2 = null;
    Vector deltaOps = new Vector();

    // Constructor

    /**
     * @param doc1 first xml document to be compared.
     * @param doc2 second xml document to be compared.
     */
    public TreeDiff(String doc1, String doc2) {
        super();

        // build the document trees and related data structures.
        tree1 = new DocumentTree(doc1);
        tree2 = new DocumentTree(doc2, true);
    }

    // Static Methods.

    /**
     * Appends the elements in one vector to another. This is a
     * substitute method. This can be replaced by Vector.addAll()
     * in JDK 1.2 (or Java 2 Platform!).
     *
     * @param dst the destination vector.
     * @param src the source vector.
     *
     * @return the appended vector.
     */
    static Vector appendAllToVector(Vector dst, Vector src) {

    	//Vector.addAll(list1);
        for (int i = 0, size = src.size(); i < size; i++) {
        	dst.addElement(src.elementAt(i));
        }

        return dst;
    }

    /**
     * fetch the node for the given id. This removes the fetched
     * node from the table as well.
     *
     * @param table hashtable holding the nodeToId info.
     * @param id identifier for the node.
     * @return the node corresponding to the identifier.
     */
    static Node fetchNodeForId(Hashtable table, String id) {
    	Object obj = table.get(id);

        if (obj instanceof Node) {
        	table.remove(id);
        	return (Node) obj;
        }

        if (!(obj instanceof Vector)) {
        	System.err.println("TreeDiff.fetchNodeForId failed");
            return null;
        }

        Vector vlist = (Vector) obj;
        Node node = (Node) vlist.elementAt(0);
        vlist.removeElementAt(0);
        return node;
    }

    /**
     * store the node against its id.
     *
     * @param table hashtable holding the nodeToId info.
     * @param id identifier for the node.
     * @param node the node for which the id is being set.
     */
    static void storeNodeForId(Hashtable table, String id, Node node) {
    	Object obj = table.get(id);

        if (obj == null) {
        	table.put(id, node);
            return;
        }

        if (obj instanceof Node) {
        	Vector vlist = new Vector();
            vlist.addElement(obj);
            vlist.addElement(node);
        	table.put(id, vlist);
        	return;
        }

        if (!(obj instanceof Vector)) {
        	System.err.println("TreeDiff.storeNodeForId failed");
            return;
        }

        ((Vector) obj).addElement(node);

        return;
    }

    // Instance Methods.

    /**
     * Utility method for printing output.
     */
    private void print(String output) {
        System.out.println(output);
    }

    /**
     * print the delta operations (for debug purposes).
     */
    public void printDeltaOps() {
        StringBuffer buffer = new StringBuffer(1000);
    	for (int i = 0; i < deltaOps.size(); i++) {
        	buffer.append((String) deltaOps.elementAt(i));
            buffer.append('\n');
        }

        System.out.println(buffer);
    }

    /**
     * Writes the diffs in the form of XML to stdout.
     */
    public void xmlize() {
        print("");

        for (int i = 0; i < deltaOps.size(); i++) {
        	xmlGen((String) deltaOps.elementAt(i));
        }

        // test
        /*
        xmlGen("MOV(a0.b0, a0.b1)");
        xmlGen("SWP(a0.b0, a0.b1)");
        xmlGen("CLP(a0.b0, a0.b1)");
        xmlGen("SPT(a0.b0, 4)");
        xmlGen("UPT(a0.bo, CENTAUR)");
        xmlGen("STA(ao.bo, name, ram)");
        xmlGen("RMA(ao.bo, name)");
        */
        print("");
    }

    private void xmlGen(String operation) {
        String op = operation.substring(0, 3);
        print("      <" + op + ">");

        if (op.equals("INS")) {
            int index = operation.indexOf(',', 4);
            if (index == -1) {
                print("\t");
                print("\t      " +
                    operation.substring(4, operation.indexOf(')', 4)));
                print("\t");
            } else {
                print("\t");
                print("\t      " + operation.substring(4, index));
                print("\t");

                index = index + 3;
                print("\t");
                print("\t      ");
                int nextIndex = operation.indexOf(',', index);
                print("\t\t" + operation.substring(index, nextIndex));
                print("\t      ");
                print("\t      ");
                print("\t\t" +
                    operation.substring(++nextIndex,
                        operation.indexOf(')', nextIndex)));
                print("\t      ");
                print("\t");
            }
        } else if (op.equals("DEL")) {
            int index = operation.indexOf(')', 4);
            print("\t");
            print("\t      " +
                operation.substring(4, operation.indexOf(')', 4)));
            print("\t");
        } else if (op.equals("MOV") || op.equals("SWP") || op.equals("CLP")) {
            int index = operation.indexOf(',', 4);
            print("\t");
            print("\t      " + operation.substring(4, index));
            print("\t");

            print("\t");
            print("\t      " +
                operation.substring(index + 2,
                    operation.indexOf(')', index + 2)));
            print("\t");
        } else if (op.equals("SPT")) {
            int index = operation.indexOf(',', 4);
            print("\t");
            print("\t      " + operation.substring(4, index));
            print("\t");

            print("\t");
            print("\t      " +
                operation.substring(index + 2,
                    operation.indexOf(')', index + 2)));
            print("\t");
        } else if (op.equals("UPT")) {
            int index = operation.indexOf(',', 4);
            print("\t");
            print("\t      " + operation.substring(4, index));
            print("\t");

            print("\t");
            print("\t      " +
                operation.substring(index + 2,
                    operation.indexOf(')', index + 2)));
            print("\t");
        } else if (op.equals("STA")) {
            int index = operation.indexOf(',', 4);
            print("\t");
            print("\t      " + operation.substring(4, index));
            print("\t");

            int nextIndex = operation.indexOf(',', index + 2);
            print("\t");
            print("\t      " + operation.substring(index + 2, nextIndex));
            print("\t");

            print("\t");
            print("\t      " +
                operation.substring(nextIndex + 2,
                    operation.indexOf(')', nextIndex + 2)));
            print("\t");
        } else if (op.equals("RMA")) {
            int index = operation.indexOf(',', 4);
            print("\t");
            print("\t      " + operation.substring(4, index));
            print("\t");

            print("\t");
            print("\t      " +
                operation.substring(index + 2,
                    operation.indexOf(')', index + 2)));
            print("\t");
        }

        print("      ");
    }

    /**
     * Generates the differences in the attribute nodes.
     * Note: Attribute names and values are case-insensitive.
     *
     * @param refNode the reference node.
     * @param node the node being compared.
     * @param updatedNode the node that was previously updated (setTag()).
     */
    private void generateAttrDiffs(ElementEx refNode, ElementEx node,
    	ElementEx updatedNode) {

        /* I think that the updatedNode is not really necessary, in the
         * ideal case. i.e., if the DOM api were to allow a setTag() to
         * be done on an ElementNode, then we do not need this extra
         * parameter at all. Since UPT operation is not using setTag(),
         * but instead creating a new node.
         */
    	NamedNodeMap refAttrs = refNode.getAttributes();
        NamedNodeMap nodeAttrs = null;
        if (updatedNode == null)
        	nodeAttrs = node.getAttributes();
        else
        	nodeAttrs = updatedNode.getAttributes();

        int refAttrSize = refAttrs.getLength();
        int nodeAttrSize = nodeAttrs.getLength();

        // if ref node has no attributes.
        if (refAttrSize == 0) {
        	// delete attrs if any, from the node being compared.
        	if (nodeAttrSize != 0) {
            	for (int i = 0; i < nodeAttrSize; i++) {
                	Attr attr = (Attr) nodeAttrs.item(i);
                    String name = attr.getName();
                 	//nodeAttrs.removeNamedItem(name);
                    deltaOps.addElement("RMA(" +
                    	DocumentTree.getNodePath(node) + ", " + name + ")");
                }
            }
        }

        // if ref node has attributes.
        if (refAttrSize != 0) {
        	// if node being compared has no attrs.
        	if (nodeAttrSize == 0) {
                for (int i = 0; i < refAttrSize; i++) {
                	Attr attr = (Attr) refAttrs.item(i);
                	deltaOps.addElement("STA(" +
                    	DocumentTree.getNodePath(node) + ", (" +
                        attr.getName() + ", " + attr.getValue() + "))");
                }

                return;
            }

            // well now, we gotto find the what matches and not.

            // build a hashtable of the node attributes.
            Hashtable attrsTable = new Hashtable();
            for (int i = 0; i < nodeAttrSize; i++) {
            	Attr attr = (Attr) nodeAttrs.item(i);
                String name = attr.getName();
            	attrsTable.put(name.toLowerCase(), attr);
            }

            // find what matches.
            for (int i = 0; i < refAttrSize; i++) {
             	Attr refAttr = (Attr) refAttrs.item(i);
                String name = refAttr.getName().toLowerCase();
                Attr attr = (Attr) attrsTable.get(name);
                if (attr != null) {
                	attrsTable.remove(name);
                    if (refAttr.getValue().equalsIgnoreCase(attr.getValue()))
                    	continue;
                }

                // no match found (so add the attribute), or
                // match found but not same value (so set the value).
                deltaOps.addElement("STA(" +
                	DocumentTree.getNodePath(node) + ", (" +
                	refAttr.getName() + ", " + refAttr.getValue() + "))");
            }

            // deal with the attributes in the node being compared,
            // which do not have a match.
            Enumeration enum = attrsTable.elements();
            while (enum.hasMoreElements()) {
            	Attr attr = (Attr) enum.nextElement();
                deltaOps.addElement("RMA(" +
                    	DocumentTree.getNodePath(node) + ", " +
                        attr.getName() + ")");
            }
        }

        return;
    }

    /**
     * matches the leaf nodes of the two DOM trees based on their content.
     * It does so by matching specific leafTypes with their
     * corresponding peers.
     *
     * @return a vector containing all the elements to be deleted in tree1.
     */
    public Vector matchingPhase() {

        // build the leafNodeIdentifiers and levelInfo
        tree1.buildLeafInfo();
        tree2.buildLeafInfo();

        // match every possible leafnode type.
        Vector deleteList = new Vector();
        TreeDiff.appendAllToVector(deleteList, matchLeafType(Node.TEXT_NODE));
        TreeDiff.appendAllToVector(deleteList,
        	matchLeafType(Node.COMMENT_NODE));
        TreeDiff.appendAllToVector(deleteList,
        	matchLeafType(Node.CDATA_SECTION_NODE));
        TreeDiff.appendAllToVector(deleteList,
        	matchLeafType(Node.PROCESSING_INSTRUCTION_NODE));
        TreeDiff.appendAllToVector(deleteList,
        	matchLeafType(Node.ENTITY_REFERENCE_NODE));
        TreeDiff.appendAllToVector(deleteList,
        	matchLeafType(Node.ELEMENT_NODE));

        // we do not need the leafInfo datastructure, so discard reference.
        tree1.setLeafNodeInfo(null);
        tree2.setLeafNodeInfo(null);

        return deleteList;
    }

    /**
     * matches the leaf nodes of a specific leafType based on their content.
     *
     * @return a vector of node of specific leafType to be deleted in tree1.
     */
	Vector matchLeafType(int nodeType) {

    	// get the right hashtable based on the leaf node type.
        Hashtable table1 = (Hashtable) tree1.getLeafNodeInfo().
        							get(new Integer(nodeType));
        Hashtable table2 = (Hashtable) tree2.getLeafNodeInfo().
        							get(new Integer(nodeType));

        // nodes to be deleted in tree1.
        Vector deleteList = new Vector();

        // the prime objective here is to isolate the elements
        // in table2 which qualify as insertions, and identify
        // elements in table1 which qualify for deletions. So,
        // when a match is found, we just ignore it as well as
        // remove it from the tables.

        Enumeration enum = table2.keys();
        while (enum.hasMoreElements()) {

            Integer hashKey = (Integer) enum.nextElement();
            Vector list1 = (Vector) table1.get(hashKey);
            Vector list2 = (Vector) table2.get(hashKey);

            // if there is no matching hashKey in table1,
            // move all the elements in list1 to insertList.

            if (list1 == null) {
                continue;
            }

            // since there is a potential match in table1, find a
            // match in list1 for each element enlisted in list2.
            // If a match exists, remove the matching element from
            // list1 and the corresponding element from list2.
            // Else, if a match does not exist, move the element
            // into the insertList.

            for (int i = 0; i < list2.size(); i++) {
                Node leaf2 = (Node) list2.elementAt(i);
                String s2 = DocumentTree.getDataString(leaf2);

                Node chosenLeaf = null;
                String posPath2 = DocumentTree.getPositionPath(leaf2);
                for (int j = 0, maxCount = 0, len2 = posPath2.length();
                	j < list1.size(); j++) {
                    Node leaf1 = (Node) list1.elementAt(j);
                    String s1 = DocumentTree.getDataString(leaf1);

                    // if there are duplicate nodes, then find the
                    // closest node possible!
                    if (s2.equals(s1)) {
                        int matchCount = DocumentTree.charactersMatched(
                        	DocumentTree.getPositionPath(leaf1),
                            posPath2, true);

                        if (matchCount > maxCount) {
                        	chosenLeaf = leaf1;
                            if (matchCount == len2)
                            	break; // exact match.
                            maxCount = matchCount;
            			}
                    }
                }

                if (chosenLeaf != null) { // match has been found. Remove it!
                	String id = tree2.getIdForNode(leaf2);
                    tree1.setIdForNode(chosenLeaf, id);
                    tree1.setNodeForId(id, chosenLeaf);
                    list1.removeElement(chosenLeaf);
                }
            }
        }

        // now table2 should be empty. The remaining entries in
        // table1 need to be added to the deleteList.

        enum = table1.keys();

        while (enum.hasMoreElements()) {

        	Integer hashKey = (Integer) enum.nextElement();
         	Vector list1 = (Vector) table1.get(hashKey);

            //deleteList.addAll(list1);
            TreeDiff.appendAllToVector(deleteList, list1);
        }

        return deleteList;
    }

    /**
     * generates the deletion operations. If all children of a parent
     * node are to be deleted, the operation is represented by a single
     * delete operation on the parent, instead of individual ones on the
     * children. This is a recursive function. It calls itself recursively
     * until the deleteList can be reduced nomore or  if the list is empty.
     *
     * @param deleteList list of nodes in tree1 to be deleted.
     */
    public void pruningPhase(Vector deleteList) {

    	if (deleteList == null || deleteList.size() == 0)
        	return;

       	while (deleteList.size() != 0) {
        	// get the parent.
         	Node node1 = (Node) deleteList.elementAt(0);
            Node parent1 = node1.getParentNode();

            if (parent1 == null) {
            	System.err.println("TreeDiff: DOM tree corrupted");
				return;
            }

            // if the node to be deleted is the root..
            if (!(parent1 instanceof ElementEx) ||
            	node1 == tree1.getRootNode()) {

        		/*
                // remove node from tree1.levelContainer.
                // note: getLevel() method should preceed delete().
                int level = DocumentTree.getLevel(node1);
                Vector levelList = (Vector) tree1.getLevelContainer();
                Hashtable table = (Hashtable) levelList.elementAt(level);
                table.remove(node1);
                */

                // add delta operation to the list.
                String path =  DocumentTree.getNodePath(node1);
                deltaOps.addElement("DEL(" + path + ")");

                // delete node from tree1.
                tree1.delete(node1, false);

                // empty the deleteList.
                deleteList.removeAllElements();
                return;
            }

	        // vector for holding sibling elements.
    	   	Vector siblings = new Vector();

            // find possible siblings (includes the same node as well).
			for (int i = 0, size = deleteList.size(); i < size; i++) {
               	Node node2 = (Node) deleteList.elementAt(i);
            	Node parent2 = node2.getParentNode();

                if (parent2.equals(parent1)) {
                	siblings.addElement(node2);
                }
            }

            // if the parent's childCount is same as the sibling list,
            // put the parent node in the deleteList and remove the
            // nodes in the sibling list from the deleteList.
            boolean commonParentFound = false;
            if (((NodeList) parent1).getLength() == siblings.size()) {
            	commonParentFound = true;
            	deleteList.addElement(parent1);
            }

            // generate the deltaOperation.
            for (int i = 0, size = siblings.size(); i < size; i++) {
            	Node node = (Node) siblings.elementAt(i);

                /*
                // remove node from tree1.levelContainer.
                // note: getLevel() method should preceed delete().
                int level = DocumentTree.getLevel(node);
                Vector levelList = (Vector) tree1.getLevelContainer();
                Hashtable table = (Hashtable) levelList.elementAt(level);
                table.remove(node);
                */

	           	if (!commonParentFound) {
                    // add delta operation to the list.
                	String path =  DocumentTree.getNodePath(node);
                    deltaOps.addElement("DEL(" + path + ")");

                    // delete node from tree1.
                    tree1.delete(node, false);
                }
                deleteList.removeElement(node);
            }
        }
    }

    /**
     * generates the tree modification operations.
     */
    public void modificationPhase() {
    
    	// This hashtable is again a cluge for not being able to setTag
        // values in element nodes.
        Hashtable updatedNodeList = new Hashtable();

    	Vector levelContainer = tree2.getLevelContainer();

        int level = levelContainer.size();
        for (; level > 1; level--) {
        	modifyLevel((Vector) levelContainer.elementAt(level - 1),
            	updatedNodeList);
        }

        /* dealing with the root level. */

        Node root1 = tree1.getRootNode();
        Node root2 = tree2.getRootNode();

        // tree2's rootNode is empty (no document tree).
        if (level <= 0 || root2 == null) {
            System.out.println("TreeDiff: DOM tree is null");
            return;
        }

        // level zero scan.
        String id2 = tree2.getIdForNode(root2);
        if (id2 == null) {
        	System.err.println("TreeDiff: possibly no PCDATA in document");
            return;
        }

    	Node node1 = tree1.getNodeForId(id2, root2);

        // no node matching id in tree1. (i.e) tree1's root can be null
        // or it might have a different node. So, discard tree1's root
        // node and create a new one similar to the one in tree2.
        if (node1 == null) {

            // check if tree2's root node is a valid tree node.
            if (!DocumentTree.isLeafInstance(root2) ||
            	!(root2 instanceof ElementEx)) {
                System.err.println("TreeDiff: invalid root node in DOM tree");
                return;
            }

        	// generate the delta operation.
            String oprtn = null;
            if (DocumentTree.isLeafInstance(root2)) {
            	String[] strArr = DocumentTree.getCompleteData(root2);
            	oprtn = "INS(" + DocumentTree.getNodePath(root2) + ", (" +
                		strArr[0] + ", " + strArr[1] + "))";
            } else if (root2 instanceof ElementEx)
            	oprtn = "INS(" + DocumentTree.getNodePath(root2) + ")";

            if (root1 != null)
            	deltaOps.addElement("DEL(" + DocumentTree.getNodePath(root1) +
                					")");
            deltaOps.addElement(oprtn);

            // Well, if the algorithm went thru fine, at this
            // point tree2's root node should not have any children.
            // so, deep cloning is a farse.
            Node newRoot1 = null;
            if (DocumentTree.isLeafInstance(root2)) {
                newRoot1 = tree1.createLeafNode(root2);
            } else {
    	        newRoot1 = tree1.createElementNode(root2);
            }

            // replace the existing root node in tree1.
            tree1.setRootNode(newRoot1);

            // generate attr diffs, if any.
            if (newRoot1 instanceof ElementEx) {
                // node1 has tobe an ElementNode!
                generateAttrDiffs((ElementEx) root2,
                	(ElementEx) newRoot1, null);
            }

            return;
        }

        // matching node is present in tree1, but is not root node.
        // make the matching node, tree1's root node.
        if (node1 != root1) {

        	// tree1's rootnode is null ?? boy, something's really wrong..
            if (root1 == null) {
            	System.err.println("TreeDiff: DOM tree corrupted");
                return;
            }

            deltaOps.addElement("MOV(" + DocumentTree.getNodePath(node1) +
            					", " + DocumentTree.getNodePath(root2) +
                                ")");

        	// remove the matching node from tree1.
            Node parent1 = node1.getParentNode();
            if (parent1 == null) {
            	System.err.println("TreeDiff: DOM tree corrupted");
                return;
            }
            parent1.removeChild(node1);

            // replace the existing root node in tree1.
            tree1.setRootNode(node1);
        }

        // matching node is present in tree1, and is also a root node.
        // we are happy!!
        
        // generate attr diffs, if any.
        if (node1 instanceof ElementEx) {
        	// get the node that was updated to extract attributes.
            Node updatedNode  = (Node) updatedNodeList.get(node1);
            // node1 has tobe an ElementNode!
            generateAttrDiffs((ElementEx) root2,
            	(ElementEx) node1, (ElementEx) updatedNode);
        }

        return;
    }

    /**
     * This is the core piece of the differencing algorithm. This walks
     * up the tree in a bottom-up breadth-first fashion (what a way :)),
     * and generates the delta operations in one single pass!!.
     *
     * @param toBeCompletedList list of node at a particular level.
     * @param updatedNodeList list of nodes whose tags were updated.
     */
    void modifyLevel(Vector toBeCompletedList, Hashtable updatedNodeList) {

        while (toBeCompletedList.size() != 0) {

        	// get the sibling list (note: the parent node has it all).

         	Node _node = (Node) toBeCompletedList.elementAt(0);
            ElementNode parent2 = (ElementNode) _node.getParentNode();

            // assign id to parent2.
            String newId = tree2.assignIdToParent(parent2);

            // find the matching nodes in tree1, for nodes in the sibling list.
            // place the matching nodes in matchingList, and
            // the unmatched ones in the insertList.

            Vector matchingList = new Vector();
        	Vector insertList = new Vector();
            Hashtable posForNode1 = new Hashtable();
            Hashtable nodeForId1 = new Hashtable();
            for (int i = 0, size = parent2.getLength(); i < size; i++) {
            	Node node2 = parent2.item(i);
            	String id2 = tree2.getIdForNode(node2);

				Node node1 = tree1.getNodeForId(id2, node2);

                if (node1 != null) {
                	// used in move operation.
	                posForNode1.put(node1, new Integer(i));

                	matchingList.addElement(node1);

                    // remember the nodeToId info for the swap oprn.
                	TreeDiff.storeNodeForId(nodeForId1, id2, node1);

                    // make sure to remove the matched node in tree1.
                    tree1.removeNodeForId(id2, node1);

                    // check the attribute node diffs.
                    if (node2 instanceof ElementEx) {
                    	// get the node that was updated to extract attributes.
                    	Node updatedNode  = (Node) updatedNodeList.get(node1);
                        // node1 has tobe an ElementNode!
                        generateAttrDiffs((ElementEx) node2,
                        	(ElementEx) node1, (ElementEx) updatedNode);
                    }
                } else {
                	insertList.addElement(node2);
                }

                // remove the node from toBeCompletedList.
                toBeCompletedList.removeElement(node2);
            }

			// if none of the nodes in the siblingList match, then
            // create a new parent node and populate it with children
            // (clones of the nodes in the siblingList) and
            // attach the new parent sub-tree to tree1.

            if (matchingList.size() == 0 &&
            	insertList.size() == parent2.getLength()) {

                // create a new sub-tree.

                ElementNode parent1 = (ElementNode) tree1.
                							createElementNode(parent2);
                Node child1 = null, child2 = null;
                for (int i = 0, size = parent2.getLength(); i < size; i++) {
                 	child2 = parent2.item(i);
                    if (child2 instanceof ElementEx) {
                    	child1 = tree1.createElementNode(child2);
                    } else {
                        child1 = tree1.createLeafNode(child2);
                    }
                    parent1.appendChild(child1);
                }

                // assign id to the parent1.
                tree1.setNodeForId(newId, parent1);

                // attach the new parent subtree to tree1.
                // for now, we use the root node as a surrogate(?) parent.

                if (parent2 == tree2.getRootNode()) { // this will not be true.
                    	tree1.setRootNode(parent1);
                } else {
                	Node oldRoot1 = tree1.getRootNode();
                   	if (tree1.getRootNode() == null) {
                    	// create a new root node.
                    	// we did not clone since we do not want the
	                    // attribute set to be carried over as well.
        				Node newRoot = tree1.createElementNode("root1");
                		tree1.setRootNode(newRoot);
                        deltaOps.addElement("INS(" +
                        	DocumentTree.getNodePath(newRoot) + ")");
                    }

                	if (DocumentTree.isLeafInstance(tree1.getRootNode())) {
    	    			Node newRoot = tree1.createElementNode("root1");
                        // this makes the oldRoot the child of the new one.
                        tree1.insertRootNode(newRoot);
                        deltaOps.addElement("INS(" +
                        	DocumentTree.getNodePath(newRoot) + ")");
                	}

					// find a node in tree1 to place the sub-tree.
                    // if a node at level (l-2) is not found, use
                    // the root node instead.

                    int level = DocumentTree.getLevel(parent2);
					int[] indices = new int[level + 1];
                    Node tmpNode2 = parent2;
                    Node tmpParent2 = parent2.getParentNode();
                    while (level >= 0) {
                    	indices[level--] = DocumentTree.
                        	getIndex(tmpParent2, tmpNode2);
                        tmpNode2 = tmpParent2;
                        tmpParent2 = tmpNode2.getParentNode();
                    }

                	// find an appropriate grand parent in tree1.
                    ElementNode grandParent1 = (ElementNode)
                    	tree1.getRootNode();
                	for (int i = 1; i < (indices.length - 1); i++) {
                    	Node possibleParent1 = grandParent1.item(indices[i]);
                    	if (!(possibleParent1 instanceof ElementEx))
                        	break;
                        grandParent1 = (ElementNode) possibleParent1;
                    }

                    Node refNode = grandParent1.
                    	item(indices[indices.length - 1]);
                    if (refNode != null)
                    	grandParent1.insertBefore(parent1, refNode);
                    else
                    	grandParent1.appendChild(parent1);

                    //tree1.getRootNode().appendChild(parent1);
                }

				// generate the deltaOps.

                deltaOps.addElement("INS(" +
                					DocumentTree.getNodePath(parent1) + ")");
                for (int i = 0, size = parent1.getLength(); i < size; i++) {
                	Node refNode = parent2.item(i);
                	Node insNode = parent1.item(i);
                    if (insNode instanceof ElementEx) { // will this be true ??
                    	// generate attr diffs, if any.
        				if (insNode instanceof ElementEx) {
            				// child1 has tobe an ElementNode!
            				generateAttrDiffs((ElementEx) refNode,
            					(ElementEx) insNode, null);
        				}
                        deltaOps.addElement("INS(" +
                        	DocumentTree.getNodePath(insNode) + ")");
                    } else {
		            	String[] strArr = DocumentTree.getCompleteData(insNode);
    	        		String oprtn = "INS(" +
                        			DocumentTree.getNodePath(insNode) +
                        			", (" + strArr[0] + ", " + strArr[1] + "))";
                        deltaOps.addElement(oprtn);
                    }
                }
    		}

            // if some of the nodes in the siblingList have a match
            // in tree1, then find a parent in tree1 which has the most
            // children in the matchedList. Choose that parent, and move
            // the rest of the matched nodes to the parent (this might
            // involve collapse and move operations. Insert the nodes in
            // the unmatchedList into the parent. Re-order the children
            // of the parent node, and then split it if necessary to bring
            // it in sync with its peer in tree2.

            if (matchingList.size() != 0 &&
            	insertList.size() != parent2.getLength()) {

                // holds the possible parent nodes and their matched children.
                Hashtable siblingInfo = new Hashtable();

                // holds the possible parents to be chosen.
                Vector chosenParents = new Vector();

                // find a list of parents in tree1 which has most
                // siblings in the siblingList (this can be more than one).
                for (int i = 0, maxChildren = 0,
                	size = matchingList.size(); i < size; i++) {
                	Node node = (Node) matchingList.elementAt(i);
                	Node parent = node.getParentNode();

                    // place the matched nodes against their parents.
                    Vector clist = (Vector) siblingInfo.get(parent);
                    if (clist == null) {
                    	clist = new Vector();
                    }
                    clist.addElement(node);

                    // keep track of the parent node with the max children.
                    int matchedChildren = clist.size();
                    if (maxChildren < matchedChildren) {
                    	maxChildren = matchedChildren;
                        chosenParents.removeAllElements();
                        chosenParents.addElement(parent);
                    } else if (maxChildren == matchedChildren) {
                        chosenParents.addElement(parent);
                    }

                    siblingInfo.put(parent, clist);
                }

                // choose an appropriate parent from the list of chosen parents.
                // This is based on the closeness of a particular parents'
                // nodepath to its peer in tree2.
                ElementNode chosenParent = null;
                if (chosenParents.size() == 1)
                	chosenParent = (ElementNode) chosenParents.elementAt(0);
                else
                	chosenParent = (ElementNode) DocumentTree.
                    	findClosestMatch(parent2, chosenParents);

                // move the other matched nodes to the chosenParent.
                siblingInfo.remove(chosenParent);
                Enumeration otherParents = siblingInfo.keys();
                while (otherParents.hasMoreElements()) {
                	Node parent = (Node) otherParents.nextElement();
                    Vector clist = (Vector) siblingInfo.get(parent);

                    if (clist == null) { // cannot be null
                    	System.err.println("TreeDiff: corrupted structures");
                        return;
                    }

                    int childCount = ((NodeList) parent).getLength();
                    int matchCount = clist.size();
                    if (childCount < matchCount) {
                    	System.err.println("TreeDiff: corrupted structures");
                        return;
                    } else if (childCount == matchCount) {
                    	// collapse operation.
                        deltaOps.addElement("CLP(" +
                        	DocumentTree.getNodePath(chosenParent) + ", " +
                            DocumentTree.getNodePath(parent) + ")");
                        tree1.collapse(chosenParent, parent);

                        // delete the parent branch.
                        Node child = null;
                        Node rootNode1 = tree1.getRootNode();
                        do {
            				child = parent;
            				parent = child.getParentNode();

            				if (parent == null) {
            					System.err.
                                    println("TreeDiff(CLP): branch not rooted");
            					return;
				            }

                            if (parent == rootNode1 &&
                            	((NodeList) parent).getLength() ==1) {
								System.err.
                                   println("TreeDiff(CLP): corrupted DOM Tree");
            					return;
                            }
				        } while (((NodeList) parent).getLength() == 1);

                        deltaOps.addElement("DEL(" +
                        				DocumentTree.getNodePath(child) + ")");
                        tree1.delete(child, false);
                    } else {
                    	// move operation.
                    	for (int i = 0, size = clist.size(); i < size; i++) {
                         	Node node = (Node) clist.elementAt(i);
                            String oprtn = "MOV(" +
                            		DocumentTree.getNodePath(node)+ ", ";

                            int pos = ((Integer) posForNode1.get(node)).
                            										intValue();
                            tree1.move(node, chosenParent, pos);
                            deltaOps.addElement(oprtn +
                            		DocumentTree.getNodePath(node) + ")");
                        }
                    }
                }

                // insert the unmatched nodes to the chosenParent.
                Node node1 = null, node2 = null;
                for (int i = 0, size = insertList.size(); i < size; i++) {
                 	node2 = (Node) insertList.elementAt(i);

                    if (node2 instanceof ElementEx) {
                    	node1 = tree1.createElementNode(node2);
                    } else {
                        node1 = tree1.createLeafNode(node2);
                    }
                    chosenParent.appendChild(node1);

                    // generate attr diffs, if any.
                    if (node1 instanceof ElementEx) {
                    	// child1 has tobe an ElementNode!
                        generateAttrDiffs((ElementEx) node2,
                        	(ElementEx) node1, null);
                    }

                    // assign an id to the inserted node.
                    TreeDiff.storeNodeForId(nodeForId1,
                    	tree2.getIdForNode(node2), node1);

                    if (node2 instanceof ElementEx) {
                    	// this block will never be entered!, but still..
                    } else {
                		String[] strArr = DocumentTree.getCompleteData(node2);
    	        		String oprtn = "INS(" + DocumentTree.getNodePath(node2)
                        	+ ", (" + strArr[0] + ", " + strArr[1] + "))";
                        deltaOps.addElement(oprtn);
                    }
                }

                // diagnostics.
                if (chosenParent.getLength() < parent2.getLength()) {
                	System.err.println("Error: chosenParent childcount wrong.");
                    return;
                }

                // re-order the children of chosenParent through swap
                // operation, and split the node if needed.

                Hashtable nodePosition = new Hashtable();
                for (int i = 0, len = chosenParent.getLength(); i < len; i++) {
                    nodePosition.put(chosenParent.item(i), new Integer(i));
                }

                // swap operation.
                for (int i = 0, len = parent2.getLength(); i < len; i++) {
                	Node peer2 = parent2.item(i);
                    String id = tree2.getIdForNode(peer2);
                    //Node peer1 = (Node) tree1.getNodeForId(id, peer2);
                    Node peer1 = (Node) TreeDiff.fetchNodeForId(nodeForId1, id);

                    // diagnostics.
                    Integer tmpInt = (Integer) nodePosition.get(peer1);
                    if (tmpInt == null) {
                    	System.err.println("Error: no match found");
                        return;
                    }

                    int pos = ((Integer) nodePosition.get(peer1)).intValue();

                    // swap the node if out of place.
                    if (pos != i) {
                        deltaOps.addElement("SWP(" +
                        	DocumentTree.getNodePath(peer1) +
                            ", " +
                            DocumentTree.getNodePath(chosenParent.item(i)) +
                            ")");

                        nodePosition.put(peer1, new Integer(i));
                        nodePosition.put(chosenParent.item(i),
	                        				new Integer(pos));
                        tree1.swap(chosenParent, pos, i);
                    }
                }

                // split operation.
                int len1 = chosenParent.getLength();
                int len2 = parent2.getLength();
                if (len1 != len2) {

                	// Erroneous: if len1 is < len2, the algo is wrong!
					if (len1 < len2) {
                    	System.err.println("TreeDiff: diff operation failed");
                        return;
                    }

                    // if the chosenParent happens to be the root node,
                    // then create a new root and make the chosenParent
                    // its child.
                    if (chosenParent == tree1.getRootNode()) {
                    	Node newRoot = tree1.createElementNode("root1");
                        // this makes the oldRoot the child of the new one.
                        tree1.insertRootNode(newRoot);
                        deltaOps.addElement("INS(" +
                        DocumentTree.getNodePath(newRoot) + ")");
                    }

                    deltaOps.addElement("SPT(" +
                    	DocumentTree.getNodePath(chosenParent) + ", " +
                        len2 + ")");

                    tree1.split(chosenParent, len2);
                }

                // assign id to the chosenParent.
                tree1.setNodeForId(newId, chosenParent);

                // make sure tags are the same.
                if (chosenParent.getTagName().
                	equalsIgnoreCase(parent2.getTagName()) == false) {
                	// currently setTag is package private, so can't set tag.
                    deltaOps.addElement("UPT(" +
                    	DocumentTree.getNodePath(chosenParent) + ", " +
                        parent2.getTagName() + ")");

                    // this is a cluge, since the DOM tree yet does not support
                    // setTag operation.
                    Node newChosenParent = tree1.createElementNode(parent2);
                    tree1.blockMove(chosenParent, newChosenParent, 0,
                    				chosenParent.getLength() - 1);
                    // set the id for the newChosenParent.
                    tree1.removeNodeForId(newId, chosenParent);
                    tree1.setNodeForId(newId, newChosenParent);

                    // make sure while updating root nodes.
                    if (chosenParent == tree1.getRootNode()) {
                    	tree1.setRootNode(newChosenParent);
                     } else {
                     	Node chosenGrandParent = chosenParent.getParentNode();
                    	chosenGrandParent.replaceChild(newChosenParent,
                    											chosenParent);
					}

                    // put the node on the updatedTagList.
                    updatedNodeList.put(newChosenParent, chosenParent);
                }
            }
        }
    }

    /**
     * generate the tree diffs.
     */
    public void generateDiffs() {
    	double startTime = System.currentTimeMillis();

        Vector deleteList = matchingPhase();

        double elapsedTime = (System.currentTimeMillis() - startTime) / 1000.0;
        System.err.println("matchingPhase: " + elapsedTime);

        double beginTime = System.currentTimeMillis();
        pruningPhase(deleteList);
        elapsedTime = (System.currentTimeMillis() - beginTime) / 1000.0;
        System.err.println("pruningPhase: " + elapsedTime);

        beginTime = System.currentTimeMillis();
        modificationPhase();
        elapsedTime = (System.currentTimeMillis() - beginTime) / 1000.0;
        System.err.println("modificationPhase: " + elapsedTime);

        double totalTime = (System.currentTimeMillis() - startTime)  / 1000.0;
        System.err.println("Total Time: " + totalTime);
        //printDeltaOps();
        xmlize();
        //tree1.write();
    }

    /*
     * Main function.
     */
    public static void main(String[] args) {
    	/*
    	if (args.length < 2 || args.length > 3) {
         	System.err.println("Usage: java com.sun.treediff.TreeDiff" +
            	"doc1.xml doc2.xml {validate | novalidate}");
            return;
        }

        boolean validate = false;
        if (args.length == 3)
    		validate = (args[2].equalsIgnoreCase("validate"));
        */

        TreeDiff treeDiff = new TreeDiff(args[0], args[1]);
        treeDiff.generateDiffs();
    }
}

/*
 * TODO items:
 *
 * 1. A sequence of DEL and INS on leaf nodes can be instead done with a
 * a single UPD operation.
 *
 * 2. The algo needs to be tinkered: if all the non-leaf nodes are identical
 * and if all the leaf nodes are different, then the current algo will
 * regenerate the whole tree.
 *
 * 3. Swap optimize.
 *
 * 4. Use the protected ElementNode.setTag() method.
 *
 * 5. Optimization during matching: getNodeForId() - can use attributes
 * to find a match if two probable nodes have the same id. But this comes
 * at the added cost of time.
 * 
 * 6. Update whitePaper for attr node diff mechanism.
 */




© 2015 - 2025 Weber Informatics LLC | Privacy Policy