com.itextpdf.styledxmlparser.jsoup.nodes.Element Maven / Gradle / Ivy
Show all versions of styled-xml-parser Show documentation
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.styledxmlparser.jsoup.nodes;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.styledxmlparser.jsoup.helper.ChangeNotifyingArrayList;
import com.itextpdf.styledxmlparser.jsoup.helper.Validate;
import com.itextpdf.styledxmlparser.jsoup.internal.Normalizer;
import com.itextpdf.styledxmlparser.jsoup.internal.StringUtil;
import com.itextpdf.styledxmlparser.jsoup.parser.ParseSettings;
import com.itextpdf.styledxmlparser.jsoup.parser.Tag;
import com.itextpdf.styledxmlparser.jsoup.select.Collector;
import com.itextpdf.styledxmlparser.jsoup.select.Elements;
import com.itextpdf.styledxmlparser.jsoup.select.Evaluator;
import com.itextpdf.styledxmlparser.jsoup.select.NodeFilter;
import com.itextpdf.styledxmlparser.jsoup.select.NodeTraversor;
import com.itextpdf.styledxmlparser.jsoup.select.NodeVisitor;
import com.itextpdf.styledxmlparser.jsoup.select.QueryParser;
import com.itextpdf.styledxmlparser.jsoup.select.Selector;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
/**
* A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
* other elements).
*
* From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
*/
public class Element extends Node {
private static final List EmptyChildren = Collections.emptyList();
private static final Pattern ClassSplit = Pattern.compile("\\s+");
private static final String BaseUriKey = Attributes.internalKey("baseUri");
private Tag tag;
private WeakReference> shadowChildrenRef; // points to child elements shadowed from node children
List childNodes;
private Attributes attributes; // field is nullable but all methods for attributes are non null
/**
* Create a new, standalone element.
* @param tag tag name
*/
public Element(String tag) {
this(Tag.valueOf(tag), "", null);
}
/**
* Create a new, standalone Element. (Standalone in that is has no parent.)
*
* @param tag tag of this element
* @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's)
* @param attributes initial attributes (optional, may be null)
* @see #appendChild(Node)
* @see #appendElement(String)
*/
public Element(Tag tag, String baseUri, Attributes attributes) {
Validate.notNull(tag);
childNodes = EmptyNodes;
this.attributes = attributes;
this.tag = tag;
if (baseUri != null)
this.setBaseUri(baseUri);
}
/**
* Create a new Element from a Tag and a base URI.
*
* @param tag element tag
* @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
* @see Tag#valueOf(String, ParseSettings)
*/
public Element(Tag tag, String baseUri) {
this(tag, baseUri, null);
}
/**
Internal test to check if a nodelist object has been created.
*/
protected boolean hasChildNodes() {
return childNodes != EmptyNodes;
}
protected List ensureChildNodes() {
if (childNodes == EmptyNodes) {
childNodes = new NodeList(this, 4);
}
return childNodes;
}
@Override
protected boolean hasAttributes() {
return attributes != null;
}
@Override
public Attributes attributes() {
if (attributes == null) // not using hasAttributes, as doesn't clear warning
attributes = new Attributes();
return attributes;
}
@Override
public String baseUri() {
return searchUpForAttribute(this, BaseUriKey);
}
private static String searchUpForAttribute(final Element start, final String key) {
Element el = start;
while (el != null) {
if (el.attributes != null && el.attributes.hasKey(key))
return el.attributes.get(key);
el = (Element) el.parent();
}
return "";
}
@Override
protected void doSetBaseUri(String baseUri) {
attributes().put(BaseUriKey, baseUri);
}
@Override
public int childNodeSize() {
return childNodes.size();
}
@Override
public String nodeName() {
return tag.getName();
}
/**
* Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase
* case preserving parsing}, this will return the source's original case.
*
* @return the tag name
*/
public String tagName() {
return tag.getName();
}
/**
* Get the normalized name of this Element's tag. This will always be the lowercased version of the tag, regardless
* of the tag case preserving setting of the parser. For e.g., {@code } and {@code } both have a
* normal name of {@code div}.
* @return normal name
*/
public String normalName() {
return tag.normalName();
}
/**
* Change (rename) the tag of this element. For example, convert a {@code } to a {@code } with
* {@code el.tagName("div");}.
*
* @param tagName new tag name for this element
* @return this element, for chaining
* @see Elements#tagName(String)
*/
public Element tagName(String tagName) {
Validate.notEmpty(tagName, "Tag name must not be empty.");
tag = Tag.valueOf(tagName, NodeUtils.parser(this).settings()); // maintains the case option of the original parse
return this;
}
/**
* Get the Tag for this element.
*
* @return the tag object
*/
public Tag tag() {
return tag;
}
/**
* Test if this element is a block-level element. (E.g. {@code == true} or an inline element
* {@code == false}).
*
* @return true if block, false if not (and thus inline)
*/
public boolean isBlock() {
return tag.isBlock();
}
/**
* Get the {@code id} attribute of this element.
*
* @return The id attribute, if present, or an empty string if not.
*/
public String id() {
return attributes != null ? attributes.getIgnoreCase("id") :"";
}
/**
Set the {@code id} attribute of this element.
@param id the ID value to use
@return this Element, for chaining
*/
public Element id(String id) {
Validate.notNull(id);
attr("id", id);
return this;
}
/**
* Set an attribute value on this element. If this element already has an attribute with the
* key, its value is updated; otherwise, a new attribute is added.
*
* @return this element
*/
public Node attr(String attributeKey, String attributeValue) {
super.attr(attributeKey, attributeValue);
return this;
}
/**
* Set a boolean attribute value on this element. Setting to true
sets the attribute value to "" and
* marks the attribute as boolean so no value is written out. Setting to false
removes the attribute
* with the same key if it exists.
*
* @param attributeKey the attribute key
* @param attributeValue the attribute value
*
* @return this element
*/
public Element attr(String attributeKey, boolean attributeValue) {
attributes().put(attributeKey, attributeValue);
return this;
}
/**
* Get this element's HTML5 custom data attributes. Each attribute in the element that has a key
* starting with "data-" is included the dataset.
*
* E.g., the element {@code
...} has the dataset
* {@code package=jsoup, language=java}.
*
* This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected
* in the other map.
*
* You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector.
* @return a map of {@code key=value} custom data attributes.
*/
public Map dataset() {
return attributes().dataset();
}
@Override
public final Node parent() {
return parentNode;
}
/**
* Get this element's parent and ancestors, up to the document root.
* @return this element's stack of parents, closest first.
*/
public Elements parents() {
Elements parents = new Elements();
accumulateParents(this, parents);
return parents;
}
private static void accumulateParents(Element el, Elements parents) {
Element parent = (Element) el.parent();
if (parent != null && !parent.tagName().equals("#root")) {
parents.add(parent);
accumulateParents(parent, parents);
}
}
/**
* Get a child element of this element, by its 0-based index number.
*
* Note that an element can have both mixed Nodes and Elements as children. This method inspects
* a filtered list of children that are elements, and the index is based on that filtered list.
*
* @param index the index number of the element to retrieve
* @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException}
* @see #childNode(int)
*/
public Element child(int index) {
return childElementsList().get(index);
}
/**
* Get the number of child nodes of this element that are elements.
*
* This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link
* #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.)
*
* @return the number of child nodes that are elements
* @see #children()
* @see #child(int)
*/
public int childrenSize() {
return childElementsList().size();
}
/**
* Get this element's child elements.
*
* This is effectively a filter on {@link #childNodes()} to get Element nodes.
*
* @return child elements. If this element has no children, returns an empty list.
* @see #childNodes()
*/
public Elements children() {
return new Elements(childElementsList());
}
/**
* Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated.
*
* @return a list of child elements
*/
List childElementsList() {
if (childNodeSize() == 0)
return EmptyChildren; // short circuit creating empty
List children;
if (shadowChildrenRef == null || (children = (List) shadowChildrenRef.get()) == null) {
final int size = childNodes.size();
children = new ArrayList<>(size);
//noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here)
for (int i = 0; i < size; i++) {
final Node node = childNodes.get(i);
if (node instanceof Element)
children.add((Element) node);
}
shadowChildrenRef = new WeakReference<>(children);
}
return children;
}
/**
* Clears the cached shadow child elements.
*/
@Override
void nodelistChanged() {
super.nodelistChanged();
shadowChildrenRef = null;
}
/**
* Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
*
* This is effectively a filter on {@link #childNodes()} to get Text nodes.
* @return child text nodes. If this element has no text nodes, returns an
* empty list.
*
* For example, with the input HTML: {@code
One Two Three
Four
} with the {@code p} element selected:
*
* - {@code p.text()} = {@code "One Two Three Four"}
*
- {@code p.ownText()} = {@code "One Three Four"}
*
- {@code p.children()} = {@code Elements[,
]}
* - {@code p.childNodes()} = {@code List
["One ", , " Three ",
, " Four"]}
* - {@code p.textNodes()} = {@code List
["One ", " Three ", " Four"]}
*
*/
public List textNodes() {
List textNodes = new ArrayList<>();
for (Node node : childNodes) {
if (node instanceof TextNode)
textNodes.add((TextNode) node);
}
return Collections.unmodifiableList(textNodes);
}
/**
* Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
*
* This is effectively a filter on {@link #childNodes()} to get Data nodes.
*
* @return child data nodes. If this element has no data nodes, returns an
* empty list.
* @see #data()
*/
public List dataNodes() {
List dataNodes = new ArrayList<>();
for (Node node : childNodes) {
if (node instanceof DataNode)
dataNodes.add((DataNode) node);
}
return Collections.unmodifiableList(dataNodes);
}
/**
* Find elements that match the {@link Selector} CSS query, with this element as the starting context.
* Matched elements may include this element, or any of its children.
* This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
* multiple filters can be combined, e.g.:
*
*
* - {@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
*
- {@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
*
*
*
* See the query syntax documentation in {@link com.itextpdf.styledxmlparser.jsoup.select.Selector}.
*
* Also known as {@code querySelectorAll()} in the Web DOM.
*
* @param cssQuery a {@link Selector} CSS-like query
* @return an {@link Elements} list containing elements that match the query (empty if none match)
* @see Selector selector query syntax
* @see QueryParser#parse(String)
* @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
*/
public Elements select(String cssQuery) {
return Selector.select(cssQuery, this);
}
/**
* Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
* may be useful if you are running the same query many times (on many documents) and want to save the overhead of
* repeatedly parsing the CSS query.
* @param evaluator an element evaluator
* @return an {@link Elements} list containing elements that match the query (empty if none match)
*/
public Elements select(Evaluator evaluator) {
return Selector.select(evaluator, this);
}
/**
* Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
*
* This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
* execution stops on the first hit.
*
* Also known as {@code querySelector()} in the Web DOM.
* @param cssQuery cssQuery a {@link Selector} CSS-like query
* @return the first matching element, or {@code null} if there is no match.
*/
public Element selectFirst(String cssQuery) {
return Selector.selectFirst(cssQuery, this);
}
/**
* Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or
* {@code null} if none match.
*
* @param evaluator an element evaluator
* @return the first matching element (walking down the tree, starting from this element), or {@code null} if none
* matchn.
*/
public Element selectFirst(Evaluator evaluator) {
return Collector.findFirst(evaluator, this);
}
/**
* Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web
* DOM.
*
* @param cssQuery a {@link Selector} CSS query
* @return if this element matches the query
*/
public boolean is(String cssQuery) {
return is(QueryParser.parse(cssQuery));
}
/**
* Check if this element matches the given evaluator.
* @param evaluator an element evaluator
* @return if this element matches
*/
public boolean is(Evaluator evaluator) {
return evaluator.matches((Element) this.root(), this);
}
/**
* Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an
* ancestor, or {@code null} if there is no such matching element.
* @param cssQuery a {@link Selector} CSS query
* @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
* found.
*/
public Element closest(String cssQuery) {
return closest(QueryParser.parse(cssQuery));
}
/**
* Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an
* ancestor, or {@code null} if there is no such matching element.
* @param evaluator a query evaluator
* @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not
* found.
*/
public Element closest(Evaluator evaluator) {
Validate.notNull(evaluator);
Element el = this;
final Element root = (Element) root();
do {
if (evaluator.matches(root, el))
return el;
el = (Element) el.parent();
} while (el != null);
return null;
}
/**
* Insert a node to the end of this Element's children. The incoming node will be re-parented.
*
* @param child node to add.
* @return this Element, for chaining
* @see #prependChild(Node)
* @see #insertChildren(int, Collection)
*/
public Element appendChild(Node child) {
Validate.notNull(child);
// was - Node#addChildren(child). short-circuits an array create and a loop.
reparentChild(child);
ensureChildNodes();
childNodes.add(child);
child.setSiblingIndex(childNodes.size() - 1);
return this;
}
/**
* Insert the given nodes to the end of this Element's children.
*
* @param children nodes to add
* @return this Element, for chaining
* @see #insertChildren(int, Collection)
*/
public Element appendChildren(Collection extends Node> children) {
insertChildren(-1, children);
return this;
}
/**
* Add this element to the supplied parent element, as its next child.
*
* @param parent element to which this element will be appended
* @return this element, so that you can continue modifying the element
*/
public Element appendTo(Element parent) {
Validate.notNull(parent);
parent.appendChild(this);
return this;
}
/**
* Add a node to the start of this element's children.
*
* @param child node to add.
* @return this element, so that you can add more child nodes or elements.
*/
public Element prependChild(Node child) {
Validate.notNull(child);
addChildren(0, child);
return this;
}
/**
* Insert the given nodes to the start of this Element's children.
*
* @param children nodes to add
* @return this Element, for chaining
* @see #insertChildren(int, Collection)
*/
public Element prependChildren(Collection extends Node> children) {
insertChildren(0, children);
return this;
}
/**
* Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
* right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
*
* @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
* end
* @param children child nodes to insert
* @return this element, for chaining.
*/
public Element insertChildren(int index, Collection extends Node> children) {
Validate.notNull(children, "Children collection to be inserted must not be null.");
int currentSize = childNodeSize();
if (index < 0) index += currentSize +1; // roll around
Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
ArrayList nodes = new ArrayList<>(children);
Node[] nodeArray = nodes.toArray(new Node[0]);
addChildren(index, nodeArray);
return this;
}
/**
* Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the
* right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first.
*
* @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
* end
* @param children child nodes to insert
* @return this element, for chaining.
*/
public Element insertChildren(int index, Node... children) {
Validate.notNull(children, "Children collection to be inserted must not be null.");
int currentSize = childNodeSize();
if (index < 0) index += currentSize +1; // roll around
Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds.");
addChildren(index, children);
return this;
}
/**
* Inserts the given child node into this element at the specified index. Current node will be shifted to the
* right. The inserted nodes will be moved from their current parent. To prevent moving, copy the node first.
*
* @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the
* end
* @param child child node to insert
* @return this element, for chaining.
*/
public Element insertChild(int index, Node child) {
return insertChildren(index, child);
}
/**
* Create a new element by tag name, and add it as the last child.
*
* @param tagName the name of the tag (e.g. {@code div}).
* @return the new element, to allow you to add content to it, e.g.:
* {@code parent.appendElement("h1").attr("id", "header").text("Welcome");}
*/
public Element appendElement(String tagName) {
Element child = new Element(Tag.valueOf(tagName, NodeUtils.parser(this).settings()), baseUri());
appendChild(child);
return child;
}
/**
* Create a new element by tag name, and add it as the first child.
*
* @param tagName the name of the tag (e.g. {@code div}).
* @return the new element, to allow you to add content to it, e.g.:
* {@code parent.prependElement("h1").attr("id", "header").text("Welcome");}
*/
public Element prependElement(String tagName) {
Element child = new Element(Tag.valueOf(tagName, NodeUtils.parser(this).settings()), baseUri());
prependChild(child);
return child;
}
/**
* Create and append a new TextNode to this element.
*
* @param text the unencoded text to add
* @return this element
*/
public Element appendText(String text) {
Validate.notNull(text);
TextNode node = new TextNode(text);
appendChild(node);
return this;
}
/**
* Create and prepend a new TextNode to this element.
*
* @param text the unencoded text to add
* @return this element
*/
public Element prependText(String text) {
Validate.notNull(text);
TextNode node = new TextNode(text);
prependChild(node);
return this;
}
/**
* Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children.
* @param html HTML to add inside this element, after the existing HTML
* @return this element
* @see #html(String)
*/
public Element append(String html) {
Validate.notNull(html);
List nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
addChildren(nodes.toArray(new Node[0]));
return this;
}
/**
* Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children.
* @param html HTML to add inside this element, before the existing HTML
* @return this element
* @see #html(String)
*/
public Element prepend(String html) {
Validate.notNull(html);
List nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri());
addChildren(0, nodes.toArray(new Node[0]));
return this;
}
/**
* Insert the specified HTML into the DOM before this element (as a preceding sibling).
*
* @param html HTML to add before this element
* @return this element, for chaining
* @see #after(String)
*/
@Override
public Node before(String html) {
return super.before(html);
}
/**
* Insert the specified node into the DOM before this node (as a preceding sibling).
* @param node to add before this element
* @return this Element, for chaining
* @see #after(Node)
*/
@Override
public Node before(Node node) {
return super.before(node);
}
/**
* Insert the specified HTML into the DOM after this element (as a following sibling).
*
* @param html HTML to add after this element
* @return this element, for chaining
* @see #before(String)
*/
@Override
public Node after(String html) {
return super.after(html);
}
/**
* Insert the specified node into the DOM after this node (as a following sibling).
* @param node to add after this element
* @return this element, for chaining
* @see #before(Node)
*/
@Override
public Node after(Node node) {
return super.after(node);
}
/**
* Remove all of the element's child nodes. Any attributes are left as-is.
* @return this element
*/
@Override
public Node empty() {
childNodes.clear();
return this;
}
/**
* Wrap the supplied HTML around this element.
*
* @param html HTML to wrap around this element, e.g. {@code }. Can be arbitrarily deep.
* @return this element, for chaining.
*/
@Override
public Node wrap(String html) {
return super.wrap(html);
}
/**
* Get a CSS selector that will uniquely select this element.
*
* If the element has an ID, returns #id;
* otherwise returns the parent (if any) CSS selector, followed by {@literal '>'},
* followed by a unique selector for the element (tag.class.class:nth-child(n)).
*
* @return the CSS Path that can be used to retrieve the element in a selector.
*/
public String cssSelector() {
if (id().length() > 0) {
// prefer to return the ID - but check that it's actually unique first!
String idSel = "#" + id();
Document doc = ownerDocument();
if (doc != null) {
Elements els = doc.select(idSel);
if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl
return idSel;
} else {
return idSel; // no ownerdoc, return the ID selector
}
}
// Translate HTML namespace ns:tag to CSS namespace syntax ns|tag
String tagName = tagName().replace(':', '|');
StringBuilder selector = new StringBuilder(tagName);
String classes = StringUtil.join(classNames(), ".");
if (classes.length() > 0)
selector.append('.').append(classes);
if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node
return selector.toString();
selector.insert(0, " > ");
Element parent = (Element) parent();
if (parent.select(selector.toString()).size() > 1)
selector.append(MessageFormatUtil.format(":nth-child({0})", elementSiblingIndex() + 1));
return parent.cssSelector() + selector.toString();
}
/**
* Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
* of itself, so will not be included in the returned list.
* @return sibling elements
*/
public Elements siblingElements() {
if (parentNode == null)
return new Elements(0);
Element parent = (Element) parent();
List elements = parent.childElementsList();
Elements siblings = new Elements(elements.size() - 1);
for (Element el: elements)
if (el != this)
siblings.add(el);
return siblings;
}
/**
* Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s,
* the {@code nextElementSibling} of the first {@code p} is the second {@code p}.
*
* This is similar to {@link #nextSibling()}, but specifically finds only Elements
*
* @return the next element, or null if there is no next element
* @see #previousElementSibling()
*/
public Element nextElementSibling() {
if (parentNode == null) return null;
Element parent = (Element) parent();
List siblings = parent.childElementsList();
int index = indexInList(this, siblings);
if (siblings.size() > index+1)
return siblings.get(index+1);
else
return null;
}
/**
* Get each of the sibling elements that come after this element.
*
* @return each of the element siblings after this element, or an empty list if there are no next sibling elements
*/
public Elements nextElementSiblings() {
return nextElementSiblings(true);
}
/**
* Gets the previous element sibling of this element.
* @return the previous element, or null if there is no previous element
* @see #nextElementSibling()
*/
public Element previousElementSibling() {
if (parentNode == null) return null;
Element parent = (Element) parent();
List siblings = parent.childElementsList();
int index = indexInList(this, siblings);
if (index > 0)
return siblings.get(index-1);
else
return null;
}
/**
* Get each of the element siblings before this element.
*
* @return the previous element siblings, or an empty list if there are none.
*/
public Elements previousElementSiblings() {
return nextElementSiblings(false);
}
private Elements nextElementSiblings(boolean next) {
Elements els = new Elements();
if (parentNode == null)
return els;
els.add(this);
return next ? els.nextAll() : els.prevAll();
}
/**
* Gets the first Element sibling of this element. That may be this element.
* @return the first sibling that is an element (aka the parent's first element child)
*/
public Element firstElementSibling() {
Element parent = (Element) parent();
if (parent != null) {
List siblings = parent.childElementsList();
return siblings.size() > 1 ? siblings.get(0) : this;
} else
return this; // orphan is its own first sibling
}
/**
* Get the list index of this element in its element sibling list. I.e. if this is the first element
* sibling, returns 0.
* @return position in element sibling list
*/
public int elementSiblingIndex() {
Element parent = (Element) parent();
if (parent == null) return 0;
return indexInList(this, parent.childElementsList());
}
/**
* Gets the last element sibling of this element. That may be this element.
* @return the last sibling that is an element (aka the parent's last element child)
*/
public Element lastElementSibling() {
Element parent = (Element) parent();
if (parent != null) {
List siblings = parent.childElementsList();
return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : this;
} else
return this;
}
private static int indexInList(Element search, List elements) {
final int size = elements.size();
for (int i = 0; i < size; i++) {
if (elements.get(i) == search)
return i;
}
return 0;
}
// DOM type methods
/**
* Finds elements, including and recursively under this element, with the specified tag name.
* @param tagName The tag name to search for (case insensitively).
* @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match.
*/
public Elements getElementsByTag(String tagName) {
Validate.notEmpty(tagName);
tagName = Normalizer.normalize(tagName);
return Collector.collect(new Evaluator.Tag(tagName), this);
}
/**
* Find an element by ID, including or under this element.
*
* Note that this finds the first matching ID, starting with this element. If you search down from a different
* starting point, it is possible to find a different element by ID. For unique element by ID within a Document,
* use {@link Document#getElementById(String)}
* @param id The ID to search for.
* @return The first matching element by ID, starting with this element, or null if none found.
*/
public Element getElementById(String id) {
Validate.notEmpty(id);
Elements elements = Collector.collect(new Evaluator.Id(id), this);
if (elements.size() > 0)
return elements.get(0);
else
return null;
}
/**
* Find elements that have this class, including or under this element. Case insensitive.
*
* Elements can have multiple classes (e.g. {@code
}. This method
* checks each class, so you can find the above with {@code el.getElementsByClass("header");}.
*
* @param className the name of the class to search for.
* @return elements with the supplied class name, empty if none
* @see #hasClass(String)
* @see #classNames()
*/
public Elements getElementsByClass(String className) {
Validate.notEmpty(className);
return Collector.collect(new Evaluator.Class(className), this);
}
/**
* Find elements that have a named attribute set. Case insensitive.
*
* @param key name of the attribute, e.g. {@code href}
* @return elements that have this attribute, empty if none
*/
public Elements getElementsByAttribute(String key) {
Validate.notEmpty(key);
key = key.trim();
return Collector.collect(new Evaluator.Attribute(key), this);
}
/**
* Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements
* that have HTML5 datasets.
* @param keyPrefix name prefix of the attribute e.g. {@code data-}
* @return elements that have attribute names that start with with the prefix, empty if none.
*/
public Elements getElementsByAttributeStarting(String keyPrefix) {
Validate.notEmpty(keyPrefix);
keyPrefix = keyPrefix.trim();
return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this);
}
/**
* Find elements that have an attribute with the specific value. Case insensitive.
*
* @param key name of the attribute
* @param value value of the attribute
* @return elements that have this attribute with this value, empty if none
*/
public Elements getElementsByAttributeValue(String key, String value) {
return Collector.collect(new Evaluator.AttributeWithValue(key, value), this);
}
/**
* Find elements that either do not have this attribute, or have it with a different value. Case insensitive.
*
* @param key name of the attribute
* @param value value of the attribute
* @return elements that do not have a matching attribute
*/
public Elements getElementsByAttributeValueNot(String key, String value) {
return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this);
}
/**
* Find elements that have attributes that start with the value prefix. Case insensitive.
*
* @param key name of the attribute
* @param valuePrefix start of attribute value
* @return elements that have attributes that start with the value prefix
*/
public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) {
return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this);
}
/**
* Find elements that have attributes that end with the value suffix. Case insensitive.
*
* @param key name of the attribute
* @param valueSuffix end of the attribute value
* @return elements that have attributes that end with the value suffix
*/
public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) {
return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this);
}
/**
* Find elements that have attributes whose value contains the match string. Case insensitive.
*
* @param key name of the attribute
* @param match substring of value to search for
* @return elements that have attributes containing this text
*/
public Elements getElementsByAttributeValueContaining(String key, String match) {
return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this);
}
/**
* Find elements that have attributes whose values match the supplied regular expression.
* @param key name of the attribute
* @param pattern compiled regular expression to match against attribute values
* @return elements that have attributes matching this regular expression
*/
public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) {
return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this);
}
/**
* Find elements that have attributes whose values match the supplied regular expression.
*
* @param key name of the attribute
* @param regex regular expression to match against attribute values.
* You can use embedded flags
* (such as (?i) and (?m) to control regex options.
* @return elements that have attributes matching this regular expression
*/
public Elements getElementsByAttributeValueMatching(String key, String regex) {
Pattern pattern;
try {
pattern = Pattern.compile(regex);
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
}
return getElementsByAttributeValueMatching(key, pattern);
}
/**
* Find elements whose sibling index is less than the supplied index.
* @param index 0-based index
* @return elements less than index
*/
public Elements getElementsByIndexLessThan(int index) {
return Collector.collect(new Evaluator.IndexLessThan(index), this);
}
/**
* Find elements whose sibling index is greater than the supplied index.
* @param index 0-based index
* @return elements greater than index
*/
public Elements getElementsByIndexGreaterThan(int index) {
return Collector.collect(new Evaluator.IndexGreaterThan(index), this);
}
/**
* Find elements whose sibling index is equal to the supplied index.
* @param index 0-based index
* @return elements equal to index
*/
public Elements getElementsByIndexEquals(int index) {
return Collector.collect(new Evaluator.IndexEquals(index), this);
}
/**
* Find elements that contain the specified string. The search is case insensitive. The text may appear directly
* in the element, or in any of its descendants.
* @param searchText to look for in the element's text
* @return elements that contain the string, case insensitive.
* @see Element#text()
*/
public Elements getElementsContainingText(String searchText) {
return Collector.collect(new Evaluator.ContainsText(searchText), this);
}
/**
* Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly
* in the element, not in any of its descendants.
* @param searchText to look for in the element's own text
* @return elements that contain the string, case insensitive.
* @see Element#ownText()
*/
public Elements getElementsContainingOwnText(String searchText) {
return Collector.collect(new Evaluator.ContainsOwnText(searchText), this);
}
/**
* Find elements whose text matches the supplied regular expression.
* @param pattern regular expression to match text against
* @return elements matching the supplied regular expression.
* @see Element#text()
*/
public Elements getElementsMatchingText(Pattern pattern) {
return Collector.collect(new Evaluator.Matches(pattern), this);
}
/**
* Find elements whose text matches the supplied regular expression.
*
* @param regex regular expression to match text against.
* You can use embedded flags
* (such as (?i) and (?m) to control regex options.
* @return elements matching the supplied regular expression.
* @see Element#text()
*/
public Elements getElementsMatchingText(String regex) {
Pattern pattern;
try {
pattern = Pattern.compile(regex);
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
}
return getElementsMatchingText(pattern);
}
/**
* Find elements whose own text matches the supplied regular expression.
* @param pattern regular expression to match text against
* @return elements matching the supplied regular expression.
* @see Element#ownText()
*/
public Elements getElementsMatchingOwnText(Pattern pattern) {
return Collector.collect(new Evaluator.MatchesOwn(pattern), this);
}
/**
* Find elements whose own text matches the supplied regular expression.
*
* @param regex regular expression to match text against.
* You can use embedded flags
* (such as (?i) and (?m) to control regex options.
* @return elements matching the supplied regular expression.
* @see Element#ownText()
*/
public Elements getElementsMatchingOwnText(String regex) {
Pattern pattern;
try {
pattern = Pattern.compile(regex);
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Pattern syntax error: " + regex, e);
}
return getElementsMatchingOwnText(pattern);
}
/**
* Find all elements under this element (including self, and children of children).
*
* @return all elements
*/
public Elements getAllElements() {
return Collector.collect(new Evaluator.AllElements(), this);
}
/**
* Gets the normalized, combined text of this element and all its children. Whitespace is normalized and
* trimmed.
* For example, given HTML {@code
Hello there now!
}, {@code p.text()} returns {@code "Hello there
* now!"}
* If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not
* children), use {@link #ownText()}
*
Note that this method returns the textual content that would be presented to a reader. The contents of data
* nodes (such as {@code