leap.lang.jsoup.nodes.Node Maven / Gradle / Ivy
package leap.lang.jsoup.nodes;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import leap.lang.jsoup.helper.StringUtil;
import leap.lang.jsoup.helper.Validate;
import leap.lang.jsoup.nodes.Attributes;
import leap.lang.jsoup.nodes.Document;
import leap.lang.jsoup.nodes.Element;
import leap.lang.jsoup.nodes.Node;
import leap.lang.jsoup.nodes.TextNode;
import leap.lang.jsoup.parser.Parser;
import leap.lang.jsoup.select.NodeTraversor;
import leap.lang.jsoup.select.NodeVisitor;
/**
The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.
@author Jonathan Hedley, [email protected] */
public abstract class Node implements Cloneable {
Node parentNode;
List childNodes;
Attributes attributes;
String baseUri;
int siblingIndex;
/**
Create a new Node.
@param baseUri base URI
@param attributes attributes (not null, but may be empty)
*/
protected Node(String baseUri, Attributes attributes) {
Validate.notNull(baseUri);
Validate.notNull(attributes);
childNodes = new ArrayList(4);
this.baseUri = baseUri.trim();
this.attributes = attributes;
}
protected Node(String baseUri) {
this(baseUri, new Attributes());
}
/**
* Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
*/
protected Node() {
childNodes = Collections.emptyList();
attributes = null;
}
/**
Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
@return node name
*/
public abstract String nodeName();
/**
* Get an attribute's value by its key.
*
* To get an absolute URL from an attribute that may be a relative URL, prefix the key with abs
,
* which is a shortcut to the {@link #absUrl} method.
* E.g.: String url = a.attr("abs:href");
* @param attributeKey The attribute key.
* @return The attribute, or empty string if not present (to avoid nulls).
* @see #attributes()
* @see #hasAttr(String)
* @see #absUrl(String)
*/
public String attr(String attributeKey) {
Validate.notNull(attributeKey);
if (attributes.hasKey(attributeKey))
return attributes.get(attributeKey);
else if (attributeKey.toLowerCase().startsWith("abs:"))
return absUrl(attributeKey.substring("abs:".length()));
else return "";
}
/**
* Get all of the element's attributes.
* @return attributes (which implements iterable, in same order as presented in original HTML).
*/
public Attributes attributes() {
return attributes;
}
/**
* Set an attribute (key=value). If the attribute already exists, it is replaced.
* @param attributeKey The attribute key.
* @param attributeValue The attribute value.
* @return this (for chaining)
*/
public Node attr(String attributeKey, String attributeValue) {
attributes.put(attributeKey, attributeValue);
return this;
}
/**
* Test if this element has an attribute.
* @param attributeKey The attribute key to check.
* @return true if the attribute exists, false if not.
*/
public boolean hasAttr(String attributeKey) {
Validate.notNull(attributeKey);
if (attributeKey.startsWith("abs:")) {
String key = attributeKey.substring("abs:".length());
if (attributes.hasKey(key) && !absUrl(key).equals(""))
return true;
}
return attributes.hasKey(attributeKey);
}
/**
* Remove an attribute from this element.
* @param attributeKey The attribute to remove.
* @return this (for chaining)
*/
public Node removeAttr(String attributeKey) {
Validate.notNull(attributeKey);
attributes.remove(attributeKey);
return this;
}
/**
Get the base URI of this node.
@return base URI
*/
public String baseUri() {
return baseUri;
}
/**
Update the base URI of this node and all of its descendants.
@param baseUri base URI to set
*/
public void setBaseUri(final String baseUri) {
Validate.notNull(baseUri);
traverse(new NodeVisitor() {
public void head(Node node, int depth) {
node.baseUri = baseUri;
}
public void tail(Node node, int depth) {
}
});
}
/**
* Get an absolute URL from a URL attribute that may be relative (i.e. an <a href>
or
* <img src>
).
*
* E.g.: String absUrl = linkEl.absUrl("href");
*
* If the attribute value is already absolute (i.e. it starts with a protocol, like
* http://
or https://
etc), and it successfully parses as a URL, the attribute is
* returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made
* absolute using that.
*
* As an alternate, you can use the {@link #attr} method with the abs:
prefix, e.g.:
* String absUrl = linkEl.attr("abs:href");
*
* @param attributeKey The attribute key
* @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or
* could not be made successfully into a URL.
* @see #attr
* @see java.net.URL#URL(java.net.URL, String)
*/
public String absUrl(String attributeKey) {
Validate.notEmpty(attributeKey);
String relUrl = attr(attributeKey);
if (!hasAttr(attributeKey)) {
return ""; // nothing to make absolute with
} else {
URL base;
try {
try {
base = new URL(baseUri);
} catch (MalformedURLException e) {
// the base is unsuitable, but the attribute may be abs on its own, so try that
URL abs = new URL(relUrl);
return abs.toExternalForm();
}
// workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
if (relUrl.startsWith("?"))
relUrl = base.getPath() + relUrl;
URL abs = new URL(base, relUrl);
return abs.toExternalForm();
} catch (MalformedURLException e) {
return "";
}
}
}
/**
Get a child node by its 0-based index.
@param index index of child node
@return the child node at this index. Throws a {@code IndexOutOfBoundsException} if the index is out of bounds.
*/
public Node childNode(int index) {
return childNodes.get(index);
}
/**
Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
themselves can be manipulated.
@return list of children. If no children, returns an empty list.
*/
public List childNodes() {
return Collections.unmodifiableList(childNodes);
}
/**
* Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original
* nodes
* @return a deep copy of this node's children
*/
public List childNodesCopy() {
List children = new ArrayList(childNodes.size());
for (Node node : childNodes) {
children.add(node.clone());
}
return children;
}
/**
* Get the number of child nodes that this node holds.
* @return the number of child nodes that this node holds.
*/
public final int childNodeSize() {
return childNodes.size();
}
protected Node[] childNodesAsArray() {
return childNodes.toArray(new Node[childNodeSize()]);
}
/**
Gets this node's parent node.
@return parent node; or null if no parent.
*/
public Node parent() {
return parentNode;
}
/**
Gets this node's parent node. Node overridable by extending classes, so useful if you really just need the Node type.
@return parent node; or null if no parent.
*/
public final Node parentNode() {
return parentNode;
}
/**
* Gets the Document associated with this Node.
* @return the Document associated with this Node, or null if there is no such Document.
*/
public Document ownerDocument() {
if (this instanceof Document)
return (Document) this;
else if (parentNode == null)
return null;
else
return parentNode.ownerDocument();
}
/**
* Remove (delete) this node from the DOM tree. If this node has children, they are also removed.
*/
public void remove() {
Validate.notNull(parentNode);
parentNode.removeChild(this);
}
/**
* Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling).
* @param html HTML to add before this node
* @return this node, for chaining
* @see #after(String)
*/
public Node before(String html) {
addSiblingHtml(siblingIndex(), html);
return this;
}
/**
* Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
* @param node to add before this node
* @return this node, for chaining
* @see #after(Node)
*/
public Node before(Node node) {
Validate.notNull(node);
Validate.notNull(parentNode);
parentNode.addChildren(siblingIndex(), node);
return this;
}
/**
* Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
* @param html HTML to add after this node
* @return this node, for chaining
* @see #before(String)
*/
public Node after(String html) {
addSiblingHtml(siblingIndex()+1, html);
return this;
}
/**
* Insert the specified node into the DOM after this node (i.e. as a following sibling).
* @param node to add after this node
* @return this node, for chaining
* @see #before(Node)
*/
public Node after(Node node) {
Validate.notNull(node);
Validate.notNull(parentNode);
parentNode.addChildren(siblingIndex()+1, node);
return this;
}
private void addSiblingHtml(int index, String html) {
Validate.notNull(html);
Validate.notNull(parentNode);
Element context = parent() instanceof Element ? (Element) parent() : null;
List nodes = Parser.parseFragment(html, context, baseUri());
parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
}
/**
Wrap the supplied HTML around this node.
@param html HTML to wrap around this element, e.g. {@code }. Can be arbitrarily deep.
@return this node, for chaining.
*/
public Node wrap(String html) {
Validate.notEmpty(html);
Element context = parent() instanceof Element ? (Element) parent() : null;
List wrapChildren = Parser.parseFragment(html, context, baseUri());
Node wrapNode = wrapChildren.get(0);
if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
return null;
Element wrap = (Element) wrapNode;
Element deepest = getDeepChild(wrap);
parentNode.replaceChild(this, wrap);
deepest.addChildren(this);
// remainder (unbalanced wrap, like -- The is remainder
if (wrapChildren.size() > 0) {
for (int i = 0; i < wrapChildren.size(); i++) {
Node remainder = wrapChildren.get(i);
remainder.parentNode.removeChild(remainder);
wrap.appendChild(remainder);
}
}
return this;
}
/**
* Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
* the node but keeping its children.
*
* For example, with the input html:
* {@code One Two Three}
* Calling {@code element.unwrap()} on the {@code span} element will result in the html:
* {@code One Two Three}
* and the {@code "Two "} {@link TextNode} being returned.
* @return the first child of this node, after the node has been unwrapped. Null if the node had no children.
* @see #remove()
* @see #wrap(String)
*/
public Node unwrap() {
Validate.notNull(parentNode);
int index = siblingIndex;
Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null;
parentNode.addChildren(index, this.childNodesAsArray());
this.remove();
return firstChild;
}
private Element getDeepChild(Element el) {
List children = el.children();
if (children.size() > 0)
return getDeepChild(children.get(0));
else
return el;
}
/**
* Replace this node in the DOM with the supplied node.
* @param in the node that will will replace the existing node.
*/
public void replaceWith(Node in) {
Validate.notNull(in);
Validate.notNull(parentNode);
parentNode.replaceChild(this, in);
}
protected void setParentNode(Node parentNode) {
if (this.parentNode != null)
this.parentNode.removeChild(this);
this.parentNode = parentNode;
}
protected void replaceChild(Node out, Node in) {
Validate.isTrue(out.parentNode == this);
Validate.notNull(in);
if (in.parentNode != null)
in.parentNode.removeChild(in);
Integer index = out.siblingIndex();
childNodes.set(index, in);
in.parentNode = this;
in.setSiblingIndex(index);
out.parentNode = null;
}
protected void removeChild(Node out) {
Validate.isTrue(out.parentNode == this);
int index = out.siblingIndex();
childNodes.remove(index);
reindexChildren();
out.parentNode = null;
}
protected void addChildren(Node... children) {
//most used. short circuit addChildren(int), which hits reindex children and array copy
for (Node child: children) {
reparentChild(child);
childNodes.add(child);
child.setSiblingIndex(childNodes.size()-1);
}
}
protected void addChildren(int index, Node... children) {
Validate.noNullElements(children);
for (int i = children.length - 1; i >= 0; i--) {
Node in = children[i];
reparentChild(in);
childNodes.add(index, in);
}
reindexChildren();
}
private void reparentChild(Node child) {
if (child.parentNode != null)
child.parentNode.removeChild(child);
child.setParentNode(this);
}
private void reindexChildren() {
for (int i = 0; i < childNodes.size(); i++) {
childNodes.get(i).setSiblingIndex(i);
}
}
/**
Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not
include this node (a node is not a sibling of itself).
@return node siblings. If the node has no parent, returns an empty list.
*/
public List siblingNodes() {
if (parentNode == null)
return Collections.emptyList();
List nodes = parentNode.childNodes;
List siblings = new ArrayList(nodes.size() - 1);
for (Node node: nodes)
if (node != this)
siblings.add(node);
return siblings;
}
/**
Get this node's next sibling.
@return next sibling, or null if this is the last sibling
*/
public Node nextSibling() {
if (parentNode == null)
return null; // root
List siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
if (siblings.size() > index+1)
return siblings.get(index+1);
else
return null;
}
/**
Get this node's previous sibling.
@return the previous sibling, or null if this is the first sibling
*/
public Node previousSibling() {
if (parentNode == null)
return null; // root
List siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
if (index > 0)
return siblings.get(index-1);
else
return null;
}
/**
* Get the list index of this node in its node sibling list. I.e. if this is the first node
* sibling, returns 0.
* @return position in node sibling list
* @see leap.lang.jsoup.nodes.Element#elementSiblingIndex()
*/
public int siblingIndex() {
return siblingIndex;
}
protected void setSiblingIndex(int siblingIndex) {
this.siblingIndex = siblingIndex;
}
/**
* Perform a depth-first traversal through this node and its descendants.
* @param nodeVisitor the visitor callbacks to perform on each node
* @return this node, for chaining
*/
public Node traverse(NodeVisitor nodeVisitor) {
Validate.notNull(nodeVisitor);
NodeTraversor traversor = new NodeTraversor(nodeVisitor);
traversor.traverse(this);
return this;
}
/**
Get the outer HTML of this node.
@return HTML
*/
public String outerHtml() {
StringBuilder accum = new StringBuilder(128);
outerHtml(accum);
return accum.toString();
}
protected void outerHtml(StringBuilder accum) {
new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this);
}
// if this node has no document (or parent), retrieve the default output settings
private Document.OutputSettings getOutputSettings() {
return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings();
}
/**
Get the outer HTML of this node.
@param accum accumulator to place HTML into
*/
abstract void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out);
abstract void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out);
public String toString() {
return outerHtml();
}
protected void indent(StringBuilder accum, int depth, Document.OutputSettings out) {
accum.append("\n").append(StringUtil.padding(depth * out.indentAmount()));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
// todo: have nodes hold a child index, compare against that and parent (not children)
return false;
}
@Override
public int hashCode() {
int result = parentNode != null ? parentNode.hashCode() : 0;
// not children, or will block stack as they go back up to parent)
result = 31 * result + (attributes != null ? attributes.hashCode() : 0);
return result;
}
/**
* Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
* parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
* original node.
*
* The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
* @return stand-alone cloned node
*/
@Override
public Node clone() {
Node thisClone = doClone(null); // splits for orphan
// Queue up nodes that need their children cloned (BFS).
LinkedList nodesToProcess = new LinkedList();
nodesToProcess.add(thisClone);
while (!nodesToProcess.isEmpty()) {
Node currParent = nodesToProcess.remove();
for (int i = 0; i < currParent.childNodes.size(); i++) {
Node childClone = currParent.childNodes.get(i).doClone(currParent);
currParent.childNodes.set(i, childClone);
nodesToProcess.add(childClone);
}
}
return thisClone;
}
/*
* Return a clone of the node using the given parent (which can be null).
* Not a deep copy of children.
*/
protected Node doClone(Node parent) {
Node clone;
try {
clone = (Node) super.clone();
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
clone.parentNode = parent; // can be null, to create an orphan split
clone.siblingIndex = parent == null ? 0 : siblingIndex;
clone.attributes = attributes != null ? attributes.clone() : null;
clone.baseUri = baseUri;
clone.childNodes = new ArrayList(childNodes.size());
for (Node child: childNodes)
clone.childNodes.add(child);
return clone;
}
private static class OuterHtmlVisitor implements NodeVisitor {
private StringBuilder accum;
private Document.OutputSettings out;
OuterHtmlVisitor(StringBuilder accum, Document.OutputSettings out) {
this.accum = accum;
this.out = out;
}
public void head(Node node, int depth) {
node.outerHtmlHead(accum, depth, out);
}
public void tail(Node node, int depth) {
if (!node.nodeName().equals("#text")) // saves a void hit.
node.outerHtmlTail(accum, depth, out);
}
}
}