All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.styledxmlparser.jsoup.nodes.Node Maven / Gradle / Ivy

There is a newer version: 9.0.0
Show newest version
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2020 iText Group NV
    Authors: iText Software.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License version 3
    as published by the Free Software Foundation with the addition of the
    following permission added to Section 15 as permitted in Section 7(a):
    FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
    ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
    OF THIRD PARTY RIGHTS

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE.
    See the GNU Affero General Public License for more details.
    You should have received a copy of the GNU Affero General Public License
    along with this program; if not, see http://www.gnu.org/licenses or write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA, 02110-1301 USA, or download the license from the following URL:
    http://itextpdf.com/terms-of-use/

    The interactive user interfaces in modified source and object code versions
    of this program must display Appropriate Legal Notices, as required under
    Section 5 of the GNU Affero General Public License.

    In accordance with Section 7(b) of the GNU Affero General Public License,
    a covered work must retain the producer line in every PDF that is created
    or manipulated using iText.

    You can be released from the requirements of the license by purchasing
    a commercial license. Buying such a license is mandatory as soon as you
    develop commercial activities involving the iText software without
    disclosing the source code of your own applications.
    These activities include: offering paid services to customers as an ASP,
    serving PDFs on the fly in a web application, shipping iText with a closed
    source product.

    For more information, please contact iText Software Corp. at this
    address: [email protected]
 */
package com.itextpdf.styledxmlparser.jsoup.nodes;

import com.itextpdf.styledxmlparser.jsoup.select.NodeTraversor;
import com.itextpdf.styledxmlparser.jsoup.select.NodeVisitor;
import com.itextpdf.styledxmlparser.jsoup.SerializationException;
import com.itextpdf.styledxmlparser.jsoup.helper.StringUtil;
import com.itextpdf.styledxmlparser.jsoup.helper.Validate;
import com.itextpdf.styledxmlparser.jsoup.parser.Parser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

/**
 The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.

 @author Jonathan Hedley, [email protected] */
public abstract class Node implements Cloneable {
    private static final List EMPTY_NODES = Collections.emptyList();
    Node parentNode;
    List childNodes;
    Attributes attributes;
    String baseUri;
    int siblingIndex;

    /**
     Create a new Node.
     @param baseUri base URI
     @param attributes attributes (not null, but may be empty)
     */
    protected Node(String baseUri, Attributes attributes) {
        Validate.notNull(baseUri);
        Validate.notNull(attributes);
        
        childNodes = EMPTY_NODES;
        this.baseUri = baseUri.trim();
        this.attributes = attributes;
    }

    protected Node(String baseUri) {
        this(baseUri, new Attributes());
    }

    /**
     * Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
     */
    protected Node() {
        childNodes = EMPTY_NODES;
        attributes = null;
    }

    /**
     Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
     @return node name
     */
    public abstract String nodeName();

    /**
     * Get an attribute's value by its key.
     * 

* To get an absolute URL from an attribute that may be a relative URL, prefix the key with abs, * which is a shortcut to the {@link #absUrl} method. *

* E.g.: *

String url = a.attr("abs:href");
* * @param attributeKey The attribute key. * @return The attribute, or empty string if not present (to avoid nulls). * @see #attributes() * @see #hasAttr(String) * @see #absUrl(String) */ public String attr(String attributeKey) { Validate.notNull(attributeKey); if (attributes.hasKey(attributeKey)) return attributes.get(attributeKey); else if (attributeKey.toLowerCase().startsWith("abs:")) return absUrl(attributeKey.substring("abs:".length())); else return ""; } /** * Get all of the element's attributes. * @return attributes (which implements iterable, in same order as presented in original HTML). */ public Attributes attributes() { return attributes; } /** * Set an attribute (key=value). If the attribute already exists, it is replaced. * @param attributeKey The attribute key. * @param attributeValue The attribute value. * @return this (for chaining) */ public Node attr(String attributeKey, String attributeValue) { attributes.put(attributeKey, attributeValue); return this; } /** * Test if this element has an attribute. * @param attributeKey The attribute key to check. * @return true if the attribute exists, false if not. */ public boolean hasAttr(String attributeKey) { Validate.notNull(attributeKey); if (attributeKey.startsWith("abs:")) { String key = attributeKey.substring("abs:".length()); if (attributes.hasKey(key) && !absUrl(key).equals("")) return true; } return attributes.hasKey(attributeKey); } /** * Remove an attribute from this element. * @param attributeKey The attribute to remove. * @return this (for chaining) */ public Node removeAttr(String attributeKey) { Validate.notNull(attributeKey); attributes.remove(attributeKey); return this; } /** Get the base URI of this node. @return base URI */ public String baseUri() { return baseUri; } /** Update the base URI of this node and all of its descendants. @param baseUri base URI to set */ public void setBaseUri(final String baseUri) { Validate.notNull(baseUri); traverse(new NodeVisitor() { public void head(Node node, int depth) { node.baseUri = baseUri; } public void tail(Node node, int depth) { } }); } /** * Get an absolute URL from a URL attribute that may be relative (i.e. an <a href> or * <img src>). *

* E.g.: String absUrl = linkEl.absUrl("href"); *

* If the attribute value is already absolute (i.e. it starts with a protocol, like * http:// or https:// etc), and it successfully parses as a URL, the attribute is * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made * absolute using that. *

* As an alternate, you can use the {@link #attr} method with the abs: prefix, e.g.: * String absUrl = linkEl.attr("abs:href"); * * @param attributeKey The attribute key * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or * could not be made successfully into a URL. * @see #attr * @see java.net.URL#URL(java.net.URL, String) */ public String absUrl(String attributeKey) { Validate.notEmpty(attributeKey); if (!hasAttr(attributeKey)) { return ""; // nothing to make absolute with } else { return StringUtil.resolve(baseUri, attr(attributeKey)); } } /** Get a child node by its 0-based index. @param index index of child node @return the child node at this index. Throws a {@code IndexOutOfBoundsException} if the index is out of bounds. */ public Node childNode(int index) { return childNodes.get(index); } /** Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes themselves can be manipulated. @return list of children. If no children, returns an empty list. */ public List childNodes() { return Collections.unmodifiableList(childNodes); } /** * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original * nodes * @return a deep copy of this node's children */ public List childNodesCopy() { List children = new ArrayList(childNodes.size()); for (Node node : childNodes) { children.add((Node)node.clone()); } return children; } /** * Get the number of child nodes that this node holds. * @return the number of child nodes that this node holds. */ public final int childNodeSize() { return childNodes.size(); } protected Node[] childNodesAsArray() { return childNodes.toArray(new Node[childNodeSize()]); } /** Gets this node's parent node. @return parent node; or null if no parent. */ public Node parent() { return parentNode; } /** Gets this node's parent node. Node overridable by extending classes, so useful if you really just need the Node type. @return parent node; or null if no parent. */ public final Node parentNode() { return parentNode; } /** * Gets the Document associated with this Node. * @return the Document associated with this Node, or null if there is no such Document. */ public Document ownerDocument() { if (this instanceof Document) return (Document) this; else if (parentNode == null) return null; else return parentNode.ownerDocument(); } /** * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. */ public void remove() { Validate.notNull(parentNode); parentNode.removeChild(this); } /** * Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling). * @param html HTML to add before this node * @return this node, for chaining * @see #after(String) */ public Node before(String html) { addSiblingHtml(siblingIndex, html); return this; } /** * Insert the specified node into the DOM before this node (i.e. as a preceding sibling). * @param node to add before this node * @return this node, for chaining * @see #after(Node) */ public Node before(Node node) { Validate.notNull(node); Validate.notNull(parentNode); parentNode.addChildren(siblingIndex, node); return this; } /** * Insert the specified HTML into the DOM after this node (i.e. as a following sibling). * @param html HTML to add after this node * @return this node, for chaining * @see #before(String) */ public Node after(String html) { addSiblingHtml(siblingIndex + 1, html); return this; } /** * Insert the specified node into the DOM after this node (i.e. as a following sibling). * @param node to add after this node * @return this node, for chaining * @see #before(Node) */ public Node after(Node node) { Validate.notNull(node); Validate.notNull(parentNode); parentNode.addChildren(siblingIndex + 1, node); return this; } private void addSiblingHtml(int index, String html) { Validate.notNull(html); Validate.notNull(parentNode); Element context = parent() instanceof Element ? (Element) parent() : null; List nodes = Parser.parseFragment(html, context, baseUri()); parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()])); } /** Wrap the supplied HTML around this node. @param html HTML to wrap around this element, e.g. {@code

}. Can be arbitrarily deep. @return this node, for chaining. */ public Node wrap(String html) { Validate.notEmpty(html); Element context = parent() instanceof Element ? (Element) parent() : null; List wrapChildren = Parser.parseFragment(html, context, baseUri()); Node wrapNode = wrapChildren.get(0); if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop return null; Element wrap = (Element) wrapNode; Element deepest = getDeepChild(wrap); parentNode.replaceChild(this, wrap); deepest.addChildren(this); // remainder (unbalanced wrap, like

-- The

is remainder if (wrapChildren.size() > 0) { for (int i = 0; i < wrapChildren.size(); i++) { Node remainder = wrapChildren.get(i); remainder.parentNode.removeChild(remainder); wrap.appendChild(remainder); } } return this; } /** * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping * the node but keeping its children. *

* For example, with the input html: *

* {@code

One Two Three
} *

* Calling {@code element.unwrap()} on the {@code span} element will result in the html: *

* {@code

One Two Three
} *

* and the {@code "Two "} {@link TextNode} being returned. * * @return the first child of this node, after the node has been unwrapped. Null if the node had no children. * @see #remove() * @see #wrap(String) */ public Node unwrap() { Validate.notNull(parentNode); Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null; parentNode.addChildren(siblingIndex, this.childNodesAsArray()); this.remove(); return firstChild; } private Element getDeepChild(Element el) { List children = el.children(); if (children.size() > 0) return getDeepChild(children.get(0)); else return el; } /** * Replace this node in the DOM with the supplied node. * @param in the node that will will replace the existing node. */ public void replaceWith(Node in) { Validate.notNull(in); Validate.notNull(parentNode); parentNode.replaceChild(this, in); } protected void setParentNode(Node parentNode) { if (this.parentNode != null) this.parentNode.removeChild(this); this.parentNode = parentNode; } protected void replaceChild(Node out, Node in) { Validate.isTrue(out.parentNode == this); Validate.notNull(in); if (in.parentNode != null) in.parentNode.removeChild(in); final int index = out.siblingIndex; childNodes.set(index, in); in.parentNode = this; in.setSiblingIndex(index); out.parentNode = null; } protected void removeChild(Node out) { Validate.isTrue(out.parentNode == this); final int index = out.siblingIndex; childNodes.remove(index); reindexChildren(index); out.parentNode = null; } protected void addChildren(Node... children) { //most used. short circuit addChildren(int), which hits reindex children and array copy for (Node child: children) { reparentChild(child); ensureChildNodes(); childNodes.add(child); child.setSiblingIndex(childNodes.size()-1); } } protected void addChildren(int index, Node... children) { Validate.noNullElements(children); ensureChildNodes(); for (int i = children.length - 1; i >= 0; i--) { Node in = children[i]; reparentChild(in); childNodes.add(index, in); reindexChildren(index); } } protected void ensureChildNodes() { if (childNodes == EMPTY_NODES) { childNodes = new ArrayList(4); } } protected void reparentChild(Node child) { if (child.parentNode != null) child.parentNode.removeChild(child); child.setParentNode(this); } private void reindexChildren(int start) { for (int i = start; i < childNodes.size(); i++) { childNodes.get(i).setSiblingIndex(i); } } /** Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not include this node (a node is not a sibling of itself). @return node siblings. If the node has no parent, returns an empty list. */ public List siblingNodes() { if (parentNode == null) return Collections.emptyList(); List nodes = parentNode.childNodes; List siblings = new ArrayList(nodes.size() - 1); for (Node node: nodes) if (node != this) siblings.add(node); return siblings; } /** Get this node's next sibling. @return next sibling, or null if this is the last sibling */ public Node nextSibling() { if (parentNode == null) return null; // root final List siblings = parentNode.childNodes; final int index = siblingIndex+1; if (siblings.size() > index) return siblings.get(index); else return null; } /** Get this node's previous sibling. @return the previous sibling, or null if this is the first sibling */ public Node previousSibling() { if (parentNode == null) return null; // root if (siblingIndex > 0) return parentNode.childNodes.get(siblingIndex-1); else return null; } /** * Get the list index of this node in its node sibling list. I.e. if this is the first node * sibling, returns 0. * @return position in node sibling list * @see Element#elementSiblingIndex() */ public int siblingIndex() { return siblingIndex; } protected void setSiblingIndex(int siblingIndex) { this.siblingIndex = siblingIndex; } /** * Perform a depth-first traversal through this node and its descendants. * @param nodeVisitor the visitor callbacks to perform on each node * @return this node, for chaining */ public Node traverse(NodeVisitor nodeVisitor) { Validate.notNull(nodeVisitor); NodeTraversor traversor = new NodeTraversor(nodeVisitor); traversor.traverse(this); return this; } /** Get the outer HTML of this node. @return HTML */ public String outerHtml() { StringBuilder accum = new StringBuilder(128); outerHtml(accum); return accum.toString(); } protected void outerHtml(Appendable accum) { new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this); } // if this node has no document (or parent), retrieve the default output settings Document.OutputSettings getOutputSettings() { return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings(); } /** Get the outer HTML of this node. @param accum accumulator to place HTML into @throws IOException if appending to the given accumulator fails. */ abstract void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException; abstract void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException; /** * Write this node and its children to the given {@link Appendable}. * * @param appendable the {@link Appendable} to write to. * @return the supplied {@link Appendable}, for chaining. */ public Appendable html(Appendable appendable) { outerHtml(appendable); return appendable; } public String toString() { return outerHtml(); } protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { accum.append("\n").append(StringUtil.padding(depth * out.indentAmount())); } /** * Check if this node is the same instance of another (object identity test). * @param o other object to compare to * @return true if the content of this node is the same as the other * @see Node#hasSameValue(Object) to compare nodes by their value */ @Override public boolean equals(Object o) { // implemented just so that javadoc is clear this is an identity test return this == o; } /** * Check if this node is has the same content as another node. A node is considered the same if its name, attributes and content match the * other node; particularly its position in the tree does not influence its similarity. * @param o other object to compare to * @return true if the content of this node is the same as the other */ public boolean hasSameValue(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; return this.outerHtml().equals(((Node) o).outerHtml()); } /** * Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or * parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the * original node. *

* The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}. * @return stand-alone cloned node */ @Override public Object clone() { Node thisClone = doClone(null); // splits for orphan // Queue up nodes that need their children cloned (BFS). LinkedList nodesToProcess = new LinkedList(); nodesToProcess.add(thisClone); while (!nodesToProcess.isEmpty()) { Node currParent = nodesToProcess.remove(); for (int i = 0; i < currParent.childNodes.size(); i++) { Node childClone = currParent.childNodes.get(i).doClone(currParent); currParent.childNodes.set(i, childClone); nodesToProcess.add(childClone); } } return thisClone; } /* * Return a clone of the node using the given parent (which can be null). * Not a deep copy of children. */ protected Node doClone(Node parent) { Node clone; clone = (Node) partialClone(); clone.parentNode = parent; // can be null, to create an orphan split clone.siblingIndex = parent == null ? 0 : siblingIndex; clone.attributes = attributes != null ? (Attributes) attributes.clone() : null; clone.baseUri = baseUri; clone.childNodes = new ArrayList(childNodes.size()); for (Node child: childNodes) clone.childNodes.add(child); return clone; } private Object partialClone() { try { return super.clone(); } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } } private static class OuterHtmlVisitor implements NodeVisitor { private Appendable accum; private Document.OutputSettings out; OuterHtmlVisitor(Appendable accum, Document.OutputSettings out) { this.accum = accum; this.out = out; } public void head(Node node, int depth) { try { node.outerHtmlHead(accum, depth, out); } catch (IOException exception) { throw new SerializationException(exception); } } public void tail(Node node, int depth) { if (!node.nodeName().equals("#text")) { // saves a void hit. try { node.outerHtmlTail(accum, depth, out); } catch (IOException exception) { throw new SerializationException(exception); } } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy