All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.outerj.daisy.diff.html.dom.TagNode Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2007 Guy Van den Broeck
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.outerj.daisy.diff.html.dom;

import java.util.*;

import org.outerj.daisy.diff.html.ancestor.TextOnlyComparator;
import org.outerj.daisy.diff.html.dom.helper.AttributesMap;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.AttributesImpl;

/**
 * Node that can contain other nodes. Represents an HTML tag.
 * @version 18 Jun 2011
 */
public class TagNode extends Node implements Iterable {

    private List children = new ArrayList();

    private String qName;

    private final Attributes attributes;

    /**
     * Attributes objects are unmodifiable and {@link #attributes} is final, so we can usefully cache the
     * equality of ours to others.
     */
    private IdentityHashMap attributesEqualityTests = new IdentityHashMap();

    public TagNode(TagNode parent, String qName, Attributes attributesarg) {
        super(parent);
        this.qName = qName;
        attributes = new AttributesImpl(attributesarg);
    }

    /**
     * appends the provided node to the collection of children if 
     * this node is set as the parameter's parent.
     * This method is used in the Node's constructor
     * @param node - child to add must have 
     * this node set as its parent.
     * @throws java.lang.IllegalStateException - if the parameter
     * has different parent than this node.
     */
    public void addChild(Node node) {
        if (node.getParent() != this) {
			throw new IllegalStateException(
                    "The new child must have this node as a parent.");
		}
        children.add(node);
    }

    @Override
    protected void setRoot(TagNode root)
    {
        super.setRoot(root);
        for (Node child : children)
        {
            child.setRoot(root);
        }
    }

    /**
     * If the provided parameter is in the same tree with
     * this object then this method fetches 
     * index of the parameter object in the children collection. 
     * If the parameter is from a different tree, then this method
     * attempts to return the index of first semantically equivalent 
     * node to the parameter.
     * @param child - the template of a tag we need an index for.
     * @return the index of first semantically equivalent child 
     * or -1 if couldn't find one
     */
    public int getIndexOf(Node child) {
        return children.indexOf(child);
    }
    
    /**
     * Inserts provided node in the collection of children at the specified index 
     * if this node is set as a parent for the parameter.
     * @param index - desired position among the children
     * @param node - the node to insert as a child. 
     * @throws java.lang.IllegalStateException - if the provided node has
     * different parent from this node.
     */
    public void addChild(int index, Node node) {
        if (node.getParent() != this) {
			throw new IllegalStateException(
                    "The new child must have this node as a parent.");
		}
        children.add(index, node);
    }

    public Node getChild(int i) {
        return children.get(i);
    }

    /**
     * @return Iterator<Node> over children collection
     * @throws java.lang.NullPointerException - if children collection is null
     */
    public Iterator iterator() {
        return children.iterator();
    }

    /**
     * @return number of elements in the children collection or 0 if 
     * the collection is null
     */
    public int getNbChildren() {
    	if (children == null){
    		return 0;
    	} else {
    		return children.size();
    	}
    }

    public String getQName() {
        return qName;
    }

    public Attributes getAttributes() {
        return attributes;
    }

    /**
     * checks tags for being semantically equivalent if it's from 
     * a different tree and for being the same object if it's 
     * from the same tree as this tag.
     * @param other - tag to compare to
     */
    public boolean isSameTag(TagNode other) {
        if (other == null) {
			return false;
		}
        return equals(other);
    }

    /**
     * Considers tags from different trees equal 
     * if they have same name and equivalent attributes.
     * No attention paid to the content (children) of the tag.
     * Considers tags from the same tree equal if it is 
     * the same object.
     * @param obj - object to compare to.
     */
    @Override
    public boolean equals(Object obj) {
    	if (!(obj instanceof TagNode)) {
			return false;
		}

        return equals((TagNode) obj);
    }

    private boolean equals(TagNode tagNode) {
        if (tagNode == this) {
			return true;
		}

        if (this.getRoot() == tagNode.getRoot())
		 {
			return false; // Not the same and in the same tree so not equal
		}

        //still a chance for being equal
        //if we are in the different tree
        //we should use semantic equivalence instead
        if (isSimilarTag(tagNode)) {
//            if (getParent() != null && tagNode.getParent() != null) {
//                int indexInParent = getParent().getIndexOf(this);
//                int otherIndexInParent = tagNode.getParent().getIndexOf(tagNode);
//                if (indexInParent != otherIndexInParent) {
//                    // the nodes have a different position. If there is another similar
//                    // node at the *same* position, we declare to be not equal.
//                    if (tagNode.getParent().getNbChildren() > indexInParent) {
//                        Node tempNodeAtSamePosition = tagNode.getParent().getChild(indexInParent);
//                        if (this.isSimilarTag(tempNodeAtSamePosition)) {
//                            return false;
//                        }
//                    }
//                }
//            }
            return true; // similar enough to be equal
        }
	return false;
    }

    private boolean hasSameAttributes(final Attributes otherAttributes)
    {
        if (otherAttributes == null) {
			return false;
		}
        if (attributesEqualityTests.get(otherAttributes) != null) {
			return attributesEqualityTests.get(otherAttributes);
		}
        boolean result = getAttributesMap().hasSameAttributes(otherAttributes);
        attributesEqualityTests.put(otherAttributes, result);
        return result;
    }

    private AttributesMap getAttributesMap()
    {
        return new AttributesMap(getAttributes());
    }

    /**
     * Returns true if this tag is similar to the given other tag.
     * The tags may be from different trees. If the tag name and attributes
     * are the same, the result will be true.
     * @param another the tag to compare with
     * @return wether this tag is similar to the other node
     */
    protected boolean isSimilarTag(Node another) {
    	boolean result = false;
    	if (another instanceof TagNode) {
    		TagNode otherNode = (TagNode) another;
    		if (this.getQName().equalsIgnoreCase(otherNode.getQName())) {
    			result = hasSameAttributes(otherNode.getAttributes());
    		}
    	}
		return result;
    }

    /**
     * Since we only consider so much information of the TagNode in
     * equals method, we need to re-write 
     * hashCode method to correspond. Otherwise 
     * HashTables and HashMaps might
     * behave unexpectedly.
     */
    @Override
    public int hashCode(){
    	final int simple = 29;
    	int result = this.getQName().hashCode();
        AttributesMap attrs = getAttributesMap();
        result = result*simple + attrs.hashCode();
    	return result;
    }
    
    /**
     * Produces String for the opening HTML tag for this node.
     * Includes the attributes. This probably doesn't work for image tag.
     * @return the String representation of the corresponding
     * opening HTML tag.
     */
    public String getOpeningTag() {
        String s = "<" + getQName();
        Attributes localAttributes = getAttributes();
        for (int i = 0; i < localAttributes.getLength(); i++) {
            s += " " + localAttributes.getQName(i) + "=\""
                    + localAttributes.getValue(i) + "\"";
        }
        return s += ">";
    }

    /**
     * @return String representation of the closing HTML tag that
     * corresponds to the current node. Probably doesn't work for image tag.
     */
    public String getEndTag() {
        return "";
    }

    /**
     * 

This recursive method considers a descendant deleted if all its * children had TextNodes that now are marked as removed * with the provided id. If all children of a descendant is considered * deleted, only that descendant is kept in the collection of the * deleted nodes, and its children are removed from the collection * of the deleted nodes.
* The HTML tag nodes that never had any text content are never considered * removed

*

It actually might have nothing to do with being really deleted, because * the element might be kept after its text content was deleted.
* Example:
* table cells can be kept after its text content was deleted
* horizontal rule has never had text content, but can be deleted

*/ @Override public List getMinimalDeletedSet(long id) { List nodes = new ArrayList(); //no-content tags are never included in the set if (children.size() == 0) { return nodes; } //by default we think that all kids are in the deleted set //until we prove otherwise boolean hasNotDeletedDescendant = false; for (Node child : this) {//check if kids are in the deleted set List childrenChildren = child.getMinimalDeletedSet(id); nodes.addAll(childrenChildren); if (!hasNotDeletedDescendant && !(childrenChildren.size() == 1 && childrenChildren .contains(child))) { // This child is not entirely deleted hasNotDeletedDescendant = true; } } //if all kids are in the deleted set - remove them and put this instead if (!hasNotDeletedDescendant) { nodes.clear(); nodes.add(this); } return nodes; } @Override public String toString() { return getOpeningTag(); } /** * Attempts to create 2 TagNodes with * the same name and attributes as the original this node. * All children preceding split parameter are placed into the left part, * all children following the split parameter are placed into * the right part. Placement of the split node is determined by * includeLeft flag parameter. The newly created nodes are only added to * the parent of this node if they have some children. * The original this node is removed afterwards. The process * proceeds recursively hiking up the tree until the "parent" node is * reached. "Parent" node will not be touched. * This method is used when the parent tags of a deleted * TextNode can no longer be found in the new doc. (means * they either has been deleted or changed arguments). The "parent" * parameter in that case is the deepest common parent between the * deleted node and its surrounding remaining siblings. * @param parent - the node that should not participate in split operation * (where the split operation stops) * @param split - the node-divider to divide children among splitted parts * @param includeLeft - if true the "split" node will be * included in the left part. * @return true if single this node * was substituted with 2 new similar nodes with original children * divided among them. */ public boolean splitUntill(TagNode parent, Node split, boolean includeLeft) { boolean splitOccured = false; if (parent != this) { TagNode part1 = new TagNode(null, getQName(), getAttributes()); TagNode part2 = new TagNode(null, getQName(), getAttributes()); part1.setParent(getParent()); part2.setParent(getParent()); int i = 0; while (i < children.size() && children.get(i) != split) { children.get(i).setParent(part1); part1.addChild(children.get(i)); i++; } if (i < children.size()) {//means we've found "split" node if (includeLeft) { children.get(i).setParent(part1); part1.addChild(children.get(i)); } else { children.get(i).setParent(part2); part2.addChild(children.get(i)); } i++; } while (i < children.size()) { children.get(i).setParent(part2); part2.addChild(children.get(i)); i++; } if (part1.getNbChildren() > 0) { getParent().addChild(getParent().getIndexOf(this), part1); } if (part2.getNbChildren() > 0) { getParent().addChild(getParent().getIndexOf(this), part2); } if (part1.getNbChildren() > 0 && part2.getNbChildren() > 0) { splitOccured = true; } //since split isn't meant for no-children tags, //we won't have a case where we removed this and did not //substitute it with anything getParent().removeChild(this); if (includeLeft) { getParent().splitUntill(parent, part1, includeLeft); } else { getParent().splitUntill(parent, part2, includeLeft); } } return splitOccured; } private void removeChild(Node node) { children.remove(node); } //block tags private static Set blocks = new HashSet(); static { blocks.add("html"); blocks.add("body"); blocks.add("p"); blocks.add("blockquote"); blocks.add("h1"); blocks.add("h2"); blocks.add("h3"); blocks.add("h4"); blocks.add("h5"); blocks.add("pre"); blocks.add("div"); blocks.add("ul"); blocks.add("ol"); blocks.add("li"); blocks.add("table"); blocks.add("tbody"); blocks.add("tr"); blocks.add("td"); blocks.add("th"); blocks.add("br"); blocks.add("thead"); blocks.add("tfoot"); } public static boolean isBlockLevel(String qName) { return blocks.contains(qName.toLowerCase()); } public static boolean isBlockLevel(Node node) { try { TagNode tagnode = (TagNode) node; return isBlockLevel(tagnode.getQName()); } catch (ClassCastException e) { return false; } } public boolean isBlockLevel() { return isBlockLevel(this); } public static boolean isInline(String qName) { return !isBlockLevel(qName); } public static boolean isInline(Node node) { return !isBlockLevel(node); } public boolean isInline() { return isInline(this); } @Override public Node copyTree() { TagNode newThis = new TagNode(null, getQName(), new AttributesImpl( getAttributes())); newThis.setWhiteBefore(isWhiteBefore()); newThis.setWhiteAfter(isWhiteAfter()); for (Node child : this) { Node newChild = child.copyTree(); newChild.setParent(newThis); newThis.addChild(newChild); } return newThis; } public double getMatchRatio(TagNode other) { TextOnlyComparator txtComp = new TextOnlyComparator(other); return txtComp.getMatchRatio(new TextOnlyComparator(this)); } public void expandWhiteSpace() { int shift = 0; boolean spaceAdded = false; int nbOriginalChildren = getNbChildren(); for (int i = 0; i < nbOriginalChildren; i++) { Node child = getChild(i + shift); try { TagNode tagChild = (TagNode) child; if (!tagChild.isPre()) { tagChild.expandWhiteSpace(); } } catch (ClassCastException e) { } if (!spaceAdded && child.isWhiteBefore()) { WhiteSpaceNode ws = new WhiteSpaceNode(null, " ", child .getLeftMostChild()); ws.setParent(this); addChild(i + (shift++), ws); } if (child.isWhiteAfter()) { WhiteSpaceNode ws = new WhiteSpaceNode(null, " ", child .getRightMostChild()); ws.setParent(this); addChild(i + 1 + (shift++), ws); spaceAdded = true; } else { spaceAdded = false; } } } @Override public Node getLeftMostChild() { if (getNbChildren() < 1) { return this; } Node child = getChild(0); return child.getLeftMostChild(); } @Override public Node getRightMostChild() { if (getNbChildren() < 1) { return this; } Node child = getChild(getNbChildren() - 1); return child.getRightMostChild(); } public boolean isPre() { return getQName().equalsIgnoreCase("pre"); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy