com.sun.xml.treediff.DocumentTree Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of greenpepper-remote-agent Show documentation
The newest version!
/*
 * 
 * Copyright (c) 1998 Sun Microsystems, Inc. All Rights Reserved.
 *             
 * This software is the confidential and proprietary information of Sun
 * Microsystems, Inc.  ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Sun.
 *
 * SUN MAKES NO REPRESENTATION OR WARRANTIES ABOUT THE SUITABILITY OF
 * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
 * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT.  SUN SHALL NOT BE LIABLE FOR
 * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
 * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
 *
 */

/**
 * @author Ram Jeyaraman
 * @version 1.0 November 1998
 */

package com.sun.xml.treediff;

import java.io.*;
import java.util.*;
import org.w3c.dom.*;
import org.xml.sax.InputSource;
import com.sun.xml.tree.*;
import com.sun.xml.parser.*;

/**
 * This builds the document tree, and supports methods for insert, delete,
 * swap, move, split and collapse operations. It also has helper methods
 * to find a  node based on its node path, do blockMoves, build relevant
 * data structures.
 */
public class DocumentTree extends Object {

    // Instance variables

    private boolean buildInfo = false;
    private String filename = null;
    private XmlDocument _document = null;
    private Node rootNode = null;

    // this is a vector of Vectors, with each Vector holding all the
    // nodes at a particular level in the DOM tree.
    private Vector levelContainer = new Vector();

    // this holds hashtables as the value against the different LeafNode
    // types as the key, with each such hashtable with
    // holding a  hashIds (key) and the corresponding leaf nodes
    // which hashes to the same bucket.
    private Hashtable leafNodeInfo = new Hashtable();

    // this holds the unique ids assigned to each node in the tree
    // as the value, and its corresponding node element as the key.
    // Note: The association is one to one.
    private Hashtable node2valueId = new Hashtable();

    // this holds the unique ids assigned to each node in the tree
    // as its key, and its corresponding node element as the value.
    // Note: The association is one to many, since there can be many
    // nodes with the same valueId.
    private Hashtable valueId2node = new Hashtable();

    // Constructor

	/**
     * @param filename the xml file to be parsed.
     */
    public DocumentTree(String filename) {
    	this(filename, false);
    }

    /**
     * @param filename the xml file to be parsed.
     * @param buildInfo flag to build special data structures.
     */
     public DocumentTree(String filename, boolean buildInfo) {
        super();
        this.buildInfo = buildInfo;
        this.filename = filename;
        read();

        // construct the leafNodeInfo datastructure.
		this.leafNodeInfo.put(new Integer(Node.TEXT_NODE), new Hashtable());
        this.leafNodeInfo.put(new Integer(Node.COMMENT_NODE), new Hashtable());
		this.leafNodeInfo.put(new Integer(Node.CDATA_SECTION_NODE),
        					  								new Hashtable());
		this.leafNodeInfo.put(new Integer(Node.ENTITY_REFERENCE_NODE),
        													new Hashtable());
		this.leafNodeInfo.put(new Integer(Node.PROCESSING_INSTRUCTION_NODE),
        													new Hashtable());
     	this.leafNodeInfo.put(new Integer(Node.ELEMENT_NODE), new Hashtable());
    }

    // Static Methods
    
    /**
     * Match a string for closeness with a reference string. This compares
     * the given string with the reference string to determine the degree
     * of match, starting from the left most character, and stops
     * when a non-matching character is reached.
     * Then, the count of the matched characters is returned.
     * Note: The matching is not case-sensitive.
     *
     * @param refString reference string.
     * @param str string to be matched for closeness.
     * @return the count of matched characters.
     */

    static int charactersMatched(String refString,
    	String string, boolean caseSensitive) {

    	if (refString == null || string == null)
        	return 0;

        if (refString.length() == 0 || string.length() == 0)
        	return 0;

    	char[] refArr = refString.toCharArray();
      	char[] strArr = string.toCharArray();

        int refSize = refArr.length, strSize = strArr.length, size = 0;
        if (refSize < strSize)
        	size = refSize;
        else
        	size = strSize;

		for (int i = 0; i < size; i++) {
        	if (caseSensitive && refArr[i] != strArr[i]) {
            	return i;
            } else if (Character.toLowerCase(refArr[i]) !=
            	Character.toLowerCase(strArr[i])) {
            	return i;
            }
        }

        return size;
    }

    /**
     * Choose the node whose position in the DOM tree closely
     * matches that of the reference
     * node. If there is an exact match, it returns the matched node
     * immedietely, else it returns the closest matching node.
     *
     * @param refNode the reference node.
     * @param nodes vector containing the nodes to be compared.
     * @return the chosen node with the closest or exact match.
     */
    static Node findClosestMatch(Node refNode, Vector nodes) {

        /* The strategy to find the closest match does not always yield
         * the closest node. It find the node which shares most of
         * its ancestor nodes with the node being matched, but this
         * may not be the closest node.
         */

    	String refString = DocumentTree.getPositionPath(refNode);
        int refLength = refString.length();
    	Node chosenNode = null;

     	for (int i = 0, maxCount = 0, size = nodes.size(); i < size; i++){
        	Node node = (Node) nodes.elementAt(i);
        	int matchCount = DocumentTree.charactersMatched(refString,
                DocumentTree.getPositionPath(node), true);
            if (matchCount > maxCount) {
            	if (matchCount == refLength)
                	return node; // exact match.
            	maxCount = matchCount;
            	chosenNode = node;
            }
        }

        return chosenNode;
    }

    /**
     * get the index of the child node.
     *
     * @param parent the parent node.
     * @param node the child node whose index is to be found.
     *
     * @return the index of the child node.
     */
    public static int getIndex(Node parent, Node child) {
        Node node = null;
    	int len = ((NodeList) parent).getLength();
    	for (int index = 0; index < len; index++) {
        	node = ((NodeList) parent).item(index);

            if (node.equals(child)) {
                return index;
            }
        }

        return -1;
    }

    /**
     * Check if the node is a leaf node. A leaf node can be several
     * node types (Text, PIs, Attribute, CDATA, Comment, entity,
     * entity reference).
     *
     * @parameter node the node to be checked for leaf-ness.
     * @return true if the node is a leaf node.
     */
    public static boolean isLeafInstance(Node node) {

     	switch (node.getNodeType()) {
            case Node.ATTRIBUTE_NODE:
            case Node.CDATA_SECTION_NODE:
            case Node.COMMENT_NODE:
            case Node.DOCUMENT_TYPE_NODE:
            case Node.ENTITY_REFERENCE_NODE:
            case Node.ENTITY_NODE:
            case Node.PROCESSING_INSTRUCTION_NODE:
            case Node.TEXT_NODE:
            	return true;
            default:
            	return false;
        }
	}

    /**
     * This is a helper method to check if the internal node has lost
     * all its children, in which case the hanging branch should be pruned.
     * In the case of leaf nodes, it always returns true.
     *
     * @param node tree node.
     * @return true if child count is zero.
     */
    public static boolean isLeaf(Node node) {
    	if (DocumentTree.isLeafInstance(node))
        	return true;
        else
	        return ((NodeList) node).getLength() == 0;
    }

    /**
     * Extracts the substring enclosed by the bounding characters.
     *
     * @param token the parent string.
     * @param bound_x the bound character (start).
     * @param bounc_y the bound character (end).
     *
     * @return the substring enclosed by the bounding characters.
     */
	static String extractSubstring(String token, char bound_x, char bound_y) {
    	int len = token.length();

    	int i = token.indexOf(bound_x);
        if (i == -1 || i > (len-2)) {
			return null;
        }

        int j = token.indexOf(bound_y);
        if (j == -1 || j < i) {
			return null;
        }

        return token.substring(i+1, j);
    }

    /**
     * @param node tree node.
     * @return the level of the node in the tree.
     */
    public static int getLevel(Node node) {
        int level = 0;
        Node parent = node;

        while ((parent = parent.getParentNode()) instanceof ElementNode) {
            level++;
        }

        return level;
    }

 	/**
     * This returns the position path. A position path of a node contains
     * the position of all the nodes in the path from root node to the
     * specified node.
     *
     * @return the position path.
     */
    static String getPositionPath(Node node) {
        String posPath = null;
        Node current = node;
        Node parent = current.getParentNode();

        if (parent instanceof ElementNode) {
            if (current instanceof ElementNode ||
            	DocumentTree.isLeafInstance(current))
                posPath = String.
                	valueOf(DocumentTree.getIndex(parent, current));
			else
                return null;
        } else {  // current is the rootNode.
            if (current instanceof ElementNode ||
            	DocumentTree.isLeafInstance(current))
            	posPath = "0";
            else
            	return null;
            return posPath;
        }

        current = parent;
        parent = current.getParentNode();
        while (true) {
            if (parent instanceof ElementNode) {
                if (current instanceof ElementNode)
                    posPath = DocumentTree.getIndex(parent, current) +
                    	"." + posPath;
                else
                    return null;
            } else { // root node.
                if (current instanceof ElementNode)
                    posPath = "0." + posPath;
                else
                    return null;

                return posPath;
            }

            current = parent;
            parent = current.getParentNode();
        }
    }

    /**
     * This returns the node path.
     * e.g (Document<0>.Section<1>.Paragraph<2>).
     * which indicates that the node is accessible from the root node, its
     * second child (section<1>), and then its third child
     * (Paragraph<2>).
     *
     * @return the node path.
     */
    public static String getNodePath(Node node) {
        String nodePath = null;
        Node current = node;
        Node parent = current.getParentNode();

        if (parent instanceof ElementNode) {
            if (current instanceof ElementNode)
                nodePath = current.getNodeName() + "[" +
                			DocumentTree.getIndex(parent, current) + "]";
            else if (DocumentTree.isLeafInstance(current))
                nodePath = "Leaf{" + current.getNodeType() + "}[" +
                			DocumentTree.getIndex(parent, current) + "]";
            else
                return null;
        } else {  // current is the rootNode
            if (current instanceof ElementNode)
                nodePath = current.getNodeName() + "[0]";
            else if (DocumentTree.isLeafInstance(current))
            	nodePath = "Leaf{" + current.getNodeType() + "}[0]";
            else
            	return null;
            return nodePath;
        }

        current = parent;
        parent = current.getParentNode();
        while (true) {
            if (parent instanceof ElementNode) {
                if (current instanceof ElementNode)
                    nodePath = current.getNodeName() +
                        "[" + DocumentTree.getIndex(parent, current) +
                        "]." + nodePath;
                else
                    return null;
            } else {
                if (current instanceof ElementNode)
                    nodePath = current.getNodeName() + "[0]." + nodePath;
                else
                    return null;

                return nodePath;
            }

            current = parent;
            parent = current.getParentNode();
        }
    }

    /**
     * get the string value of the leaf data.
     *
     * @return the string form of the leaf data.
     */
    public static String getDataString(Node node) {

        switch (node.getNodeType()) {
            case Node.ATTRIBUTE_NODE:
            case Node.ENTITY_NODE:
            case Node.DOCUMENT_TYPE_NODE:
            	return null;   // NYI - maybe later.

            case Node.ENTITY_REFERENCE_NODE:
            	return ((EntityReference) node).getNodeName();

            case Node.PROCESSING_INSTRUCTION_NODE:
            	return ((ProcessingInstruction) node).getData();

            case Node.CDATA_SECTION_NODE:
            case Node.COMMENT_NODE:
            case Node.TEXT_NODE:
            	return ((CharacterData) node).getData();

            case Node.ELEMENT_NODE:
            	return ((ElementEx) node).getTagName();
        }

        return null;
    }

    /**
     * get the complete data of the leaf node (name, value).
     * Note: name and value can be empty.
     *
     * @return a string array holding the name, value of the leaf data.
     */
    public static String[] getCompleteData(Node node) {
    	if (!DocumentTree.isLeafInstance(node))
        	return null;

        // set up the return data (name, value).
        String[] strArr = new String[2];
        strArr[0] = null;
        strArr[1] = null;

        switch (node.getNodeType()) {
            case Node.ATTRIBUTE_NODE:
            case Node.ENTITY_NODE:
            case Node.DOCUMENT_TYPE_NODE:
            	break;   // NYI - maybe later.

            case Node.ENTITY_REFERENCE_NODE:
            	strArr[0] = ((EntityReference) node).getNodeName();

            case Node.PROCESSING_INSTRUCTION_NODE:
            	strArr[0] = ((ProcessingInstruction) node).getTarget();
                strArr[1] = ((ProcessingInstruction) node).getData();
                break;

            case Node.CDATA_SECTION_NODE:
            case Node.COMMENT_NODE:
            case Node.TEXT_NODE:
                strArr[1] = ((CharacterData) node).getData();
                break;
        }

        return strArr;
    }

    /**
     * set the value of the leaf data.
     *
     * @param data the data to be set.
     */
    public static void setDataString(Node node, String data) {
    	if (!DocumentTree.isLeafInstance(node))
        	return;

        switch (node.getNodeType()) {
            case Node.ATTRIBUTE_NODE:
            case Node.ENTITY_NODE:
            case Node.DOCUMENT_TYPE_NODE:
            	return; // NYI - maybe later.

            case Node.ENTITY_REFERENCE_NODE:
            	return; // entities only have data name, not value.

            case Node.PROCESSING_INSTRUCTION_NODE:
            	((ProcessingInstruction) node).setData(data); break;

            case Node.CDATA_SECTION_NODE:
            case Node.COMMENT_NODE:
            case Node.TEXT_NODE:
            	((CharacterData) node).setData(data); break;
        }

        return;
    }

    // Instance Methods

    /**
     * set the value identifier for the node.
     *
     * @param value identifier string to be set.
     */
    void setIdForNode(Node node, String id) {
        this.node2valueId.put(node, id);
    }

    /**
     * @return the value identifier associated with the node.
     */
    String getIdForNode(Node node) {
        return (String) this.node2valueId.get(node);
    }

    /**
     * There can be more than one node that has the same id.
     * Sets the value identifier for the node.
     *
     * @param id identifier string to be set.
     * @param node node corresponding to the id.
     */
    void setNodeForId(String id, Node node) {

    	// see whatz in there first..
        Object obj = getNodeForId(id);

    	// no node is present for the id.
    	if (obj == null) {
        	this.valueId2node.put(id, node);
            return;
        }

        // just one node is present.
        Vector nodeList = null;
        if (obj instanceof Node) {
        	nodeList = new Vector();
            nodeList.addElement(obj);
            nodeList.addElement(node);
            this.valueId2node.put(id, nodeList);
            return;
        }

        // has to be an instance of Vector or Node.
        if (!(obj instanceof Vector)) {
        	System.err.println("setNodeForId: illegal entry in hashtable");
            return;
        }

        // if there already exists more than a node for the given id,
        // add it to the list of nodes.
        nodeList = (Vector) obj;
        nodeList.addElement(node);

        return;
    }

    /**
     * remove a node from the list of nodes that have the same id.
     *
     * @param node node to be removed.
     */
    void removeNodeForId(String id, Node node) {
    	Object obj = getNodeForId(id);

        if (obj == null || node == null)
        	return;

        // Well, just one node to choose from..
		if (obj instanceof Node) {
        	if ((Node) obj == node)
            	this.valueId2node.remove(id);
            return;
        }

       	// has to be an instance of Vector or Node.
       	if (!(obj instanceof Vector)) {
			System.err.println("removeNodeForId: illegal entry in Hashtable");
            return;
        }

        Vector nodeList = (Vector) obj;
        nodeList.removeElement(node);

        return;
    }

    /**
     * @return the node for a given id.
     */
    private Object getNodeForId(String id) {
        return this.valueId2node.get(id);
    }

    /**
     * There can be more than one node that has the same id.
     * This returns the most appropriate node for the given
     * nodePath. If any node has the exact nodePath as the
     * refNode, it is chosen, else if any node has the same
     * parent, it is chosen, else an arbitrary node is chosen.
     *
     * @param refNode node whose nodePath will be used for choosing a node,
     * 		  if multiple nodes have the same id.
     *
     * @return an appropriate node corresponding to the id and nodePath.
     */
    Node getNodeForId(String id, Node refNode) {

       	Object obj = getNodeForId(id);

        if (obj == null)
        	return null;

        // Well, just one node to choose from..
        if (obj instanceof Node) {
        	return (Node) obj;
        }

       	// has to be an instance of Vector.
       	if (!(obj instanceof Vector)) {
        	System.err.println("setNodeForId: illegal entry in Hashtable");
            return null;
        }

        // return a node with the closest match based on node path.
        return DocumentTree.findClosestMatch(refNode, (Vector) obj);
    }

    /**
     * assigns a value identifier to a node, derived from
     * the id of its children. Note: Its is assumed that
     * all the children have ids preassigned.
     *
     * @param parent the parent node.
     */
    String assignIdToParent(Node parent) {

    	if (!(parent instanceof ElementEx)) {
        	System.err.println("assignIdToParent: illegal parent node type");
            return null;
        }

        String newId = "";
        NodeList p = (NodeList) parent;
      	for (int i = 0, size = p.getLength(); i < size; i++) {
        	newId += ":" + getIdForNode(p.item(i));
        }

        setIdForNode(parent, newId);
        return newId;
    }

    /**
	   * This inserts a new root node and makes the old root node
     * (subtree, if one exists) the child of the newly created root node.
     *
     * @param newRoot the new root node to be inserted.
     */
    public void insertRootNode(Node newRoot) {

        Node node = this.rootNode;

        if (node != null && node != _document.getDocumentElement()) {
        	System.err.println("insertRootNode: corrupted rootNode reference");
            return;
        }

        /* Well.. Its okay to have leaf node as root.
        if (node != null && DocumentTree.isLeafInstance(newRoot)) {
          	System.err.println("insertRootNode: failed - invalid operation");
            return;
        }
        */

        // replace the existing root node in the DOM tree.
        this.setRootNode(newRoot);

        // set the new root node the parent of the current node.
        // Note: the current node is no more the root node.
        newRoot.appendChild(node);
  	}

    /**
     * create a leaf node using the refNode as a reference node. This
     * effectively clones the node. This is needed in cases where the
     * refNode is in fact in a different DOM tree. If the refNode is in
     * the same DOM tree, then cloneNode() could be used instead.
     *
     * @param refNode the reference node.
     * @return the newly created node.
     */
    public Node createLeafNode(Node refNode) {
    	String name = null, value = null;

        // create an appropriate leaf node.

        switch (refNode.getNodeType()) {
        case Node.ATTRIBUTE_NODE:
            case Node.ENTITY_NODE:
            case Node.DOCUMENT_TYPE_NODE:
              return null;   // NYI - maybe later.

            case Node.ENTITY_REFERENCE_NODE:
                name = refNode.getNodeName();
              if (name == null) return null;
              return _document.createEntityReference(name);

            case Node.PROCESSING_INSTRUCTION_NODE:
              name = ((ProcessingInstruction) refNode).getTarget();
                value = ((ProcessingInstruction) refNode).getData();
              if (name == null || value == null) return null;
              return _document.createProcessingInstruction(name, value);

            case Node.CDATA_SECTION_NODE:
              value = ((CharacterData) refNode).getData();
              if (value == null) return null;
              return _document.createCDATASection(value);

            case Node.COMMENT_NODE:
              value = ((CharacterData) refNode).getData();
              if (value == null) return null;
              return _document.createComment(value);
            case Node.TEXT_NODE:
              value = ((CharacterData) refNode).getData();
              if (value == null) return null;
              return _document.createTextNode(value);

            default:
              return null;
        }
    }

    /**
     * Create a leaf node. This extracts the nodeType info inscribed in
     * the tag, creates the leaf node of the appropriate type, sets
     * the data (name and value). Certain types of leaf nodes do not have
     * a name for its data, in which case the name field is empty.
     *
     * @param tag the leaf node tag with the inscribed nodeType information.
     * @param name the name part of the data (may be empty).
     * @param value the value portion of the data.
     *
     * @return a newly created leaf node of the appropriate nodeType.
     */
     public Node createLeafNode(String tag, String name, String value) {

     	// get the node type

        String nodeTypeString = DocumentTree.extractSubstring(tag, '[', ']');
        if (nodeTypeString == null || nodeTypeString.equals(""))
        	return null;

        int nodeType = Integer.parseInt(nodeTypeString);

        // create an appropriate leaf node.

        switch (nodeType) {
        	case Node.ATTRIBUTE_NODE:
            case Node.ENTITY_NODE:
            case Node.DOCUMENT_TYPE_NODE:
            	return null;   // NYI - maybe later.

            case Node.ENTITY_REFERENCE_NODE:
            	if (name == null) return null;
            	return _document.createEntityReference(name);

            case Node.PROCESSING_INSTRUCTION_NODE:
            	if (name == null || value == null) return null;
            	return _document.createProcessingInstruction(name, value);

            case Node.CDATA_SECTION_NODE:
            	if (value == null) return null;
            	return _document.createCDATASection(value);

            case Node.COMMENT_NODE:
            	if (value == null) return null;
            	return _document.createComment(value);
            case Node.TEXT_NODE:
            	if (value == null) return null;
            	return _document.createTextNode(value);

            default:
            	return null;
        }
     }

     /**
      * Creates an non-leaf node and sets its tag.
      *
      * @return a new non-leaf node.
      */
     public Node createElementNode(String tag) {
	 	return _document.createElement(tag);
     }

     /**
      * Creates an non-leaf node and sets its tag.
      *
      * @return a new non-leaf node.
      */
     public Node createElementNode(Node refNode) {
     	if (!(refNode instanceof ElementEx)) {
        	System.err.println("createElementNode: illegal node type");
			return null;
        }

     	String tag = ((ElementEx) refNode).getTagName();
	 	return _document.createElement(tag);
     }

    /**
     * The levelContainer object is a vector of vectors. Each vector is
     * a list of nodes at a particular level in the tree. This data
     * structure is populated by invoking the buildLevelInfo() method.
     *
     * @see DocumentTree#buildLeafInfo
     * @return the levelContainer object.
     */
    Vector getLevelContainer() {
        return this.levelContainer;
    }

    /**
     * The leafValueIdentifiers object is a hashtable, which has each leaf
     * node hashed against its hash value. The hash function is derived from
     * the String class'es hashcode() method. This data structure is populated
     * invoking  the buildLevelInfo() method.
     *
     * @see DocumentTree#buildLeafInfo
     * @return the leafValueIdentifiers object.
     */
    Hashtable getLeafNodeInfo() {
        return this.leafNodeInfo;
    }

    /**
     * set the leafNodeInfo datastructure.
     *
     * @param table the hashtable to be used for leafNodeInfo (maybe null).
     */
    void setLeafNodeInfo(Hashtable table) {
    	this.leafNodeInfo = table;
    }

    /**
     * @return the root node of the document tree.
     */
    public Node getRootNode() {
        return this.rootNode;
    }

    /**
     * set root node. This makes sure the document super tree also knows
     * about the change.
     *
     * @param node the new root node.
     */
     public void setRootNode(Node node) {

     	if (node == null) {
        	System.err.println("setRootNode: cannot set root node to null");
            return;
        }

     	// make sure the document super tree exists.
		if (_document == null) {
        	System.err.println("setRootNode: document tree does not exist");
        	return;
        }

        if (this.rootNode != _document.getDocumentElement()) {
        	System.err.println("setRootNode: illegal root node in DOM tree");
            return;
        }

        // update the document super tree.
        if (this.rootNode == null) {
	     	_document.insertBefore(node, _document.item(0));
        } else {
        	// make sure document tree hierarchy is correct.
        	Node rootParent = this.rootNode.getParentNode();
        	if (rootParent == null || rootParent != _document) {
      		  	System.err.println("setRootNode: DOM tree hierarchy corrupted");
        		return;
        	}

          	// replace the existing root node.
            rootParent.replaceChild(node, this.rootNode);
        }

        // make sure this.rootNode points to the new root node.
        this.rootNode = node;
	}

    /**
     * @param s the node path string.
     * @return the node corresponding to the node path string, if it exists.
     */
    public Node findNode(String s) {

        if (s == null) {
            System.err.println("findNode: null node path string");
            return null;
        }

        char[] chars = s.toCharArray();
        int len = chars.length;
        Node node = null;

        for (int begin = 0; begin < len;) {
            int end = charIndex(chars, begin, len, '.');
            if (end == -1)
                end = len;

            int index = charIndex(chars, begin, end, '<');
            if (index == -1) {
                begin = end + 1;
                continue;
            }

            String tag = String.copyValueOf(chars, begin, index - begin);
            int cindex = charIndex(chars, index + 1, end, '>');
            if (cindex == -1) {
                begin = end + 1;
                continue;
            }

            String posString = String.copyValueOf(chars, index + 1,
                                                  cindex - (index + 1));
            int pos = Integer.parseInt(posString);

            // find the node
            if (node == null) {
                node = this.rootNode;
            } else {
                node = ((NodeList) node).item(pos);
            }

            // node does not exist
            if (node == null) {
                return null;
            }

            boolean mismatch = false;
            if (DocumentTree.isLeafInstance(node) == false) {
                mismatch = (node.getNodeName().equals(tag) == false);
            } else {
            	String nodeTypeString = DocumentTree.
                							extractSubstring(tag, '[', ']');
                int nodeType = Integer.parseInt(nodeTypeString);
                mismatch = (node.getNodeType() != nodeType);
            }

            if (node == null || mismatch) {
                System.err.println("Invalid nodepath: " + s);
                return null;
            }

            begin = end + 1;
        }

        return node;
    }

    /*
     * a helper method to index into char array and find the index of a char.
     */
    private int charIndex(char[] chars, int begin, int end, char ch) {
        for (int i = begin; i < end; i++) {
            if (chars[i] == ch)
                return i;
        }

        return -1;
    }

    /*
     * a helper method to extract the parent node path substring
     * from the node path.
     */
    String getParentNodePath(String s) {
        if (s == null) {
            System.err.println("getParentNodePath: null nodePath string");
            return null;
        }

        char[] chars = s.toCharArray();
        int len = chars.length, i = len - 1;

        // skip the trailing '.'
        for (; chars[i] == '.'; i--);

        for (; i > -1; i--) {
            if (chars[i] == '.') {
                return String.copyValueOf(chars, 0, i);
            }
        }

        return null;
    }

    /*
     * a helper method to extract the child node path substring from the
     * node path (i.e child node path = nodepath -  parent node path).
     */
    String getChildNodePath(String s) {
        if (s == null) {
            System.err.println("getChildNodePath: null nodePath string");
            return null;
        }

        char[] chars = s.toCharArray();
        int len = chars.length, i = len - 1;

        // skip the trailing '.'
        for (; chars[i] == '.'; i--);

        for (; i > -1; i--) {
            if (chars[i] == '.') {
                return String.copyValueOf(chars, i + 1, len - (i + 1));
            }
        }

        return null;
    }

    /**
     * insert a new node in the DOM tree. The nodePath parameter
     * gives the location in the tree, a node is to inserted.
     * If the data (name + value) is null, then a non-leaf node
     * is created, else a leaf node is created. In the case of
     * leaf node, the tag information is irrelevant.
     * Note: This does not work for root nodes!!
     *
     * @param nodePath the node path of the node to be inserted.
     * @param name the name part of the data (maybe empty).
     * @param value the value portion of the data (maybe empty).
     */
    public void insert( String nodePath, String name, String value) {
        if (nodePath == null) {
            System.err.println("INS: null node path string");
            return;
        }

        Node parent = findNode(getParentNodePath(nodePath));

        // sanity check
        if (parent == null || !(parent instanceof ElementNode)) {
            System.err.println("INS: INS("+ nodePath + ") failed");
            return;
        }

        // find child node tag and position

        String s = getChildNodePath(nodePath);
        if (s == null) {
            System.err.println("INS: child nodepath is null");
        }

        char[] chars = s.toCharArray();
        int begin = 0, end = chars.length;
        int index = charIndex(chars, begin, end, '<');
        if (index == -1) {
            System.err.println("INS: illegal nodepath");
            return;
        }

        String tag = String.copyValueOf(chars, begin, index - begin);

        int cindex = charIndex(chars, index + 1, end, '>');
        if (cindex == -1) {
            System.err.println("INS: illegal nodepath");
            return;
        }

        String posString = String.copyValueOf(chars, index + 1,
                                              cindex - (index + 1));
        int childPos = Integer.parseInt(posString);

        // create a leaf or non-leaf node

        Node child = null;
        if ( name != null || value != null) {
        	child = createLeafNode(tag, name, value);
            if (child == null) {
            	System.err.println("INS: leaf node creation failed");
            }
        } else {
        	child = createElementNode(tag);
        }

        // the insert operation

        Node refChild = ((NodeList) parent).item(childPos);
        if (refChild == null)
        	parent.appendChild(child);
        else {
            parent.insertBefore(child, refChild);
        }
    }

    /**
     * delete a node pointed to by the nodepath. If pruneBranch is set to
     * true, then if the deletion of a node
     * leaves its parent with no children, then the parent is deleted as
     * well recursively.
     *
     * @param nodePath node path of the node to be deleted.
     * @param pruneBranch true if branch needs to be pruned.
     */
    public void delete(String s, boolean pruneBranch) {
        if (s == null) {
            System.err.println("DEL: null node path string");
            return;
        }

        Node node = findNode(s);
        if (node == null) {
            System.err.println("DEL: invalid node path");
            return;
        }

        delete(node, pruneBranch);
    }

    /**
     * delete a node pointed to by the nodepath. If pruneBranch is set to
     * true, then if the deletion of a node
     * leaves its parent with no children, then the parent is deleted as
     * well recursively.
     *
     * @param child the node to be deleted.
     * @param pruneBranch true if branch needs to be pruned.
     */
    public void delete(Node child, boolean pruneBranch) {

    	Node parent = child.getParentNode();
        if (parent == null) {
            System.err.println("DEL: no parent, tree node cannot be deleted");
            return;
        }

        // root node deletion.
        if (!(parent instanceof ElementEx) || child == this.rootNode) {
			parent.removeChild(child);
            this.rootNode = null;
            return;
        }

        while ((((NodeList) parent).getLength() == 1) && pruneBranch) {
            child = parent;
            parent = child.getParentNode();

            if (!(parent instanceof ElementNode) || child == this.rootNode) {
            	this.rootNode = null;
            	break; // the document root node has been reached
            }

            if (parent == null) {
            	System.err.println("DEL: branch being pruned is not rooted");
            	return; // branch the node belongs to is not rooted
            }
        }

        parent.removeChild(child);
        return;
    }

    /**
     * update the data of the leaf node only.
     *
     * @param nodePath node path of the node whose data is to be updated.
     * @param data the new data.
     */
    public void update(String nodePath, String data) {
        Node node = findNode(nodePath);

        if (node == null) {
            System.err.println("UPD: non-existent node");
            return;
        }

        if (!DocumentTree.isLeafInstance(node)) {
            System.err.println("UPD: only leaf nodes shall be updated");
            return;
        }

        DocumentTree.setDataString(node, data);
    }

    /**
     * swap the two nodes. Nodes can be swapped only if they have a common
     * parent.
     *
     * @param s1 node path of the first node to be swapped.
     * @param s2 node path of the second node to be swapped.
     */
    public void swap(String s1, String s2) {
        Node child1 = findNode(s1);
        Node child2 = findNode(s2);

        if (child1 == null || child2 == null) {
            System.err.println("SWP: non-existent nodes");
            return;
        }

        // check if they have a common parent

        Node p1 = child1.getParentNode();
        Node p2 = child2.getParentNode();

        if (p1 == null || p2 == null) {
        	System.err.println("SWP: cannot swap nodes without a parent");
            return;
        }

         if (!(p1 instanceof ElementNode) || !(p2 instanceof ElementNode)) {
        	System.err.println("SWP: illegal parent node");
            return;
        }

        if (p1 != p2) {
            System.err.println("SWP: nodes do not share a common parent");
            return;
        }

        swap(p1, child1, child2);
    }

    /**
     * swap the two children.
     *
     * @param parent the parent node.
     * @param child1 the first child node to be swapped.
     * @param child2 the second child node to be swapped.
     */
    public void swap(Node parent, Node child1, Node child2) {
        swap(parent, DocumentTree.getIndex(parent, child1),
        	 DocumentTree.getIndex(parent, child2));
    }

    /**
     * swap the two children located at the given indices.
     *
     * @param parent the parent node.
     * @param i the index of the first child node to be swapped.
     * @param j the index of the second child node to be swapped.
     */
    public void swap(Node parent, int i, int j) {

        if (i > j) { // swap i, j if i > j
            i = i ^ j;
            j = i ^ j;
            i = i ^ j;
        }

        Node child1 = ((NodeList) parent).item(i);
        Node child2 = ((NodeList) parent).item(j);
        Node refChild = null;

        if (child1 == null || child2 == null) {
            System.err.println("swap: non-existent children");
            return;
        }

        // swap the nodes

        if ((j - i) > 1) {
            refChild = ((NodeList) parent).item(i + 1);
            parent.removeChild(child1);
            parent.insertBefore(child1, child2);
        } else {
            refChild = child1;
        }

        parent.removeChild(child2);
        parent.insertBefore(child2, refChild);
    }

    /**
     * move the node (subtree) to a different position.
     *
     * @param node the node to be moved.
     * @param dst  the new parent.
     * @param childPos the position at which the node will be inserted.
     */
    public void move(Node node, Node dst, int childPos) {

        if (node == null || dst == null) {
            System.err.println("MOV: null value for node");
            return;
        }

        if (!(dst instanceof ElementNode)) {
        	System.err.println("MOV: illegal destination node type");
            return;
        }

        // the move operation

		delete(node, false);

        Node refChild = ((NodeList) dst).item(childPos);
        if (refChild == null)
        	dst.appendChild(node);
        else
            dst.insertBefore(node, refChild);
    }

    /**
     * move the node (subtree) from one position to another.
     * Note: this does not work if a node is moved
     * to the root position.
     *
     * @param s1 node path to the current position of the node.
     * @param s2 node path to the new position.
     */
    public void move(String s1, String s2) {
        Node src = findNode(s1);
        Node dst = findNode(getParentNodePath(s2));

        // sanity check
        if (src == null || dst == null) {
            System.err.println("move: MOV (" + s1 + ", " + s2 + ")" +
                               " failed");
            return;
        }

        // find destination child node index

        String s = getChildNodePath(s2);
        if (s == null) {
            System.err.println("move: child nodepath is null");
        }

        char[] chars = s.toCharArray();
        int begin = 0, end = chars.length;
        int index = charIndex(chars, begin, end, '<');
        if (index == -1) {
            System.err.println("MOV: illegal nodepath");
            return;
        }

        String tag = String.copyValueOf(chars, begin, index - begin);
        if (src instanceof ElementNode &&
            tag.equals(((ElementNode) src).getTagName()) == false) {
            System.err.println("MOV: source and destination" +
                               " node tags differ");
            return;
        }

        int cindex = charIndex(chars, index + 1, end, '>');
        if (cindex == -1) {
            System.err.println("MOV: illegal nodepath");
            return;
        }

        String posString = String.copyValueOf(chars, index + 1,
                                              cindex - (index + 1));
        int childPos = Integer.parseInt(posString);

        // the move operation
        move(src, dst, childPos);
    }

    /**
     * split the node into two at the given index. This effectively create
     * two nodes, with identical tag types. The newly created node will
     * have all the children from the original node starting at the index.
     * Note: This does not work in the case of root node.
     *
     * @param nodePath node path of the node to be split.
     * @param index index of the child node where the split has to happen.
     */
    public void split(String nodePath, int index) {

    	split(findNode(nodePath), index);
    }

    /**
     * split the node into two at the given index. This effectively create
     * two nodes, with identical tag types. The newly created node will
     * have all the children from the original node starting at the index.
     * Note: This does not work in the case of root node.
     *
     * @param node the node to be split.
     * @param index index of the child node where the split has to happen.
     */
    public void split(Node node, int index) {

        if (node == null) {
            System.err.println("SPT: non-existent node");
            return;
        }

        if (!(node instanceof ElementEx)) {
            System.err.println("SPT: illegal node type");
            return;
        }

        Node parent = node.getParentNode();
        if (parent == null) {
        	System.err.println("SPT: node to-be-split does not have a parent");
            return;
        }

        if (!(parent instanceof ElementNode) || node == this.rootNode) {
        	System.err.println("SPT: root node cannot be split");
            return;
        }

        if (index >= ((NodeList) node).getLength()) {
            System.err.println("SPT: index out of range");
            return;
        }

        // create a node with the same tag type.
        Node sibling = createElementNode(node);

        // append the node.
        Node refNode = node.getNextSibling();
        if (refNode == null)
            parent.appendChild(sibling);
        else
            parent.insertBefore(sibling, refNode);

        // move a subset of children to the sibling node
        blockMove(node, sibling, index, ((NodeList) node).getLength() - 1);

        return;
    }

    /**
     * block move a set of nodes starting at the begin index and
     * ending at the end index.
     *
     * @param begin beginning node index.
     * @param end end node index.
     */
    public void blockMove(Node src, Node dst,
                          int begin, int end) {

        if (src == null || dst == null) {
            System.err.println("blockMove: nodes are null");
            return;
        }

        if (!(src instanceof ElementEx) || !(dst instanceof ElementEx)) {
        	System.err.println("blockMove: nodes types are illegal");
            return;
        }

        if (begin > end) { // swap
            begin = begin ^ end;
            end = begin ^ end;
            begin = begin ^ end;
        }

        if (((NodeList) src).getLength() <= end) {
            System.err.println("blockMove: index out of range");
            return;
        }

        // move a subset of children to the sibling node
        for (int i = begin; i <= end; i++) {
            Node child = ((NodeList) src).item(begin);

            if (child == null)
                continue;

            move(child, dst, i - begin);
        }
    }

    /**
     * collapse two nodes together. All the children from the source node
     * is block moved to be the children ofthe destination node.
     *
     * @param s1 destination node to be collapsed.
     * @param s2 source node to be collapsed.
     */
    public void collapse(String s1, String s2) {

        Node dst = findNode(s1);
        Node src = findNode(s2);

        collapse(dst, src);
     }

    /**
     * collapse two nodes together. All the children from the source node
     * is block moved to be the children ofthe destination node.
     *
     * @param dst destination node to be collapsed.
     * @param src source node to be collapsed.
     */
    public void collapse(Node dst, Node src) {
        if (dst == null || src == null) {
            System.err.println("collapse: non-existent nodes");
            return;
        }

        if (!(src instanceof ElementEx) || !(dst instanceof ElementEx)) {
            System.err.println("CLP: illegal node types");
            return;
        }

        // move all the children to the dst node
        blockMove(src, dst, 0, ((NodeList) src).getLength() - 1);
    }

    /**
     * 
     * Builds a Vector of vectors, with  each vector corresponding
     * to a level in the document tree, and contains references to
     * all nodes at that level.
     *
     * Builds a hashtable of leafValueIdentifiers. Each leafValueIdentifier
     * key will have a vector of leaf nodes as its value, whose data hash
     * to the same key.
     *
     * And assigns the valueIdentifier to each leaf node traversed in the
     * Depth-first pre-order fashion.
     * 
     *
     */
    void buildLeafInfo() {

        int level = 0;
        Node current = this.rootNode, startPoint = this.rootNode;
        Node next = null;

        while (current != null) {

        	if (this.buildInfo) {
                int size = levelContainer.size();
                if (size <= level) {
                    for (int i = size; i <= level; i++)
                        levelContainer.addElement(new Vector());
                }
                Vector levelList = (Vector) levelContainer.elementAt(level);
                levelList.addElement(current);
            }

            switch (current.getNodeType()) {
            case Node.DOCUMENT_FRAGMENT_NODE:
            case Node.DOCUMENT_NODE:
            case Node.ELEMENT_NODE:

                // For elements that can have children, visit those
                // children before any siblings (i.e. depth first)
                // and after visiting this node (i.e. preorder)
                next = current.getFirstChild();
                if (next != null) {
                    current = next;
                    level++;
                    break;
                }

                // elements with no children.
                // fall through.

            case Node.CDATA_SECTION_NODE:
            case Node.COMMENT_NODE:
            case Node.ENTITY_REFERENCE_NODE:
            case Node.PROCESSING_INSTRUCTION_NODE:
            case Node.TEXT_NODE:

                // collect the leaf nodes in the hashtable.

                Node leaf = current;
                String leafData = DocumentTree.getDataString(leaf);

                // if leaf node data is null, skip.
                if (leafData != null && !leafData.equals("")) {

                  // get the right hashtable based on the leaf node type.
                  Integer leafType = new Integer(leaf.getNodeType());
                  Hashtable table = (Hashtable) this.leafNodeInfo.get(leafType);

                  // see if a hashtable entry exists.
                  Integer hashCode = new Integer(leafData.hashCode());
                  Vector v = (Vector) table.get(hashCode);
                  if (v == null)
                      v = new Vector();

                  // assign a valueIdentifier to the leaf node. If another
                  // leaf node in the list has idential content, then assign
                  // the same value identifier, else a different one.

                  int esize = v.size();
                  if (esize > 0) {
                  	  boolean matchFound = false;
                      for (int i = 0; i < esize; i++) {
                          Node listLeaf = (Node) v.elementAt(i);
                          if (leafData.equals(
                              DocumentTree.getDataString(listLeaf))) {
                              String id = this.getIdForNode(listLeaf);
                              this.setIdForNode(leaf, id);
                              matchFound = true; break;
                          }
                      }

                      if (!matchFound) {
                      	  String id = "[" + leaf.getNodeType() +  "]" +
                          				hashCode + "(" + esize + ")";
                          this.setIdForNode(leaf, id);
                      }
                  } else {
                  		  String id = "[" + leaf.getNodeType() +  "]" +
                          				hashCode + "(0)";
                          this.setIdForNode(leaf, id);
                  }

                  // place the vector back in the hashtable.
                  v.addElement(leaf);
                  table.put(hashCode, v);
                }

                // fall through

            case Node.ATTRIBUTE_NODE:
            case Node.ENTITY_NODE:
            case Node.DOCUMENT_TYPE_NODE:
				 /* TBI later */

                // For childless nodes, only look at siblings.  If no
                // siblings, climb the tree till we get to a spot there
                // are siblings, or till we terminate our walk.

                Node here = current;
                for (; here != null && here != startPoint;
                     here = here.getParentNode(), level--) {
                    next = here.getNextSibling();
                    if (next != null)
                        break;
                }

                if (here == null || here == startPoint)
                    return;

                current = next;
                break;

            default:
                System.err.println("buildLevelInfo: unknown node type: " +
                                   current.getNodeType());
                return;
            }
        }
    }

    /**
     * parse the xml document, build the DOM tree, and related
     * data structures.
     */
    private void read() {

        if (filename != null) {
            try {

				// create the document.
                InputStream inStream = new FileInputStream(filename);
                _document = XmlDocument.createXmlDocument(inStream, false);

                /*
                // create the parser.
        		    Parser parser = new Parser();
              	//parser.setEntityResolver(new Resolver());



                // create the document builder.
	              XmlDocumentBuilder docBuilder = new XmlDocumentBuilder();
              	//docBuilder.setParser(parser);
              	docBuilder.setIgnoringWhitespace(true);

                // parse the document.
                InputStream inStream = new FileInputStream(filename);
                parser.setDocumentHandler(docBuilder);
                parser.parse(new InputSource(inStream));

                // get the document DOM tree.
                this._document = docBuilder.getDocument();
                System.out.println("whiteSpaceIgnore: "  + docBuilder.isIgnoringWhitespace());
                */

                // get the document root node from DOM tree.
                this.rootNode = _document.getDocumentElement();
                ((ElementNode) this.rootNode).normalize();
            } catch (Throwable e) {
                System.err.println("Can't load XML file " +
                                   filename + " - " + e);
                e.printStackTrace();
            }
        }
    }

    /**
     * write the flattened tree on stdout.
     */
    public void write() {
        Writer writer = new StringWriter();

        try {
            ((ElementNode) this.rootNode).
            	writeXml(new XmlWriteContext(writer, 16));
        } catch (Exception e) {
            System.err.println("Exception while writing: " + e);
        }

        System.out.println(writer);
    }

    // Helper methods

    /* print the valueIds and the corresponding node. */
    void printIds() {
    	Enumeration enum = node2valueId.keys();

        while (enum.hasMoreElements()) {
        	Node node = (Node) enum.nextElement();
            String id = (String) node2valueId.get(node);
            if (node instanceof ElementNode)
            	System.err.println("id : " + id + "node: " + ((ElementNode) node).getTagName());
			else
                System.err.println("id : " + id + "node: " + getDataString(node));
        }
    }
}