All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jsoup.nodes.Element Maven / Gradle / Ivy

Go to download

SDK for dev_appserver (local development) with some of the dependencies shaded (repackaged)

There is a newer version: 2.0.31
Show newest version
package org.jsoup.nodes;

import org.jsoup.helper.ChangeNotifyingArrayList;
import org.jsoup.helper.Validate;
import org.jsoup.internal.StringUtil;
import org.jsoup.parser.ParseSettings;
import org.jsoup.parser.Parser;
import org.jsoup.parser.Tag;
import org.jsoup.select.Collector;
import org.jsoup.select.Elements;
import org.jsoup.select.Evaluator;
import org.jsoup.select.NodeFilter;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.jsoup.select.QueryParser;
import org.jsoup.select.Selector;
import org.jspecify.annotations.Nullable;

import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.jsoup.internal.Normalizer.normalize;
import static org.jsoup.nodes.TextNode.lastCharIsWhitespace;
import static org.jsoup.parser.Parser.NamespaceHtml;
import static org.jsoup.parser.TokenQueue.escapeCssIdentifier;

/**
 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements).
 

From an Element, you can extract data, traverse the node graph, and manipulate the HTML. */ public class Element extends Node { private static final List EmptyChildren = Collections.emptyList(); private static final Pattern ClassSplit = Pattern.compile("\\s+"); private static final String BaseUriKey = Attributes.internalKey("baseUri"); private Tag tag; private @Nullable WeakReference> shadowChildrenRef; // points to child elements shadowed from node children List childNodes; @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null /** * Create a new, standalone element, in the specified namespace. * @param tag tag name * @param namespace namespace for this element */ public Element(String tag, String namespace) { this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null); } /** * Create a new, standalone element, in the HTML namespace. * @param tag tag name * @see #Element(String tag, String namespace) */ public Element(String tag) { this(Tag.valueOf(tag, Parser.NamespaceHtml, ParseSettings.preserveCase), "", null); } /** * Create a new, standalone Element. (Standalone in that it has no parent.) * * @param tag tag of this element * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's) * @param attributes initial attributes (optional, may be null) * @see #appendChild(Node) * @see #appendElement(String) */ public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) { Validate.notNull(tag); childNodes = EmptyNodes; this.attributes = attributes; this.tag = tag; if (baseUri != null) this.setBaseUri(baseUri); } /** * Create a new Element from a Tag and a base URI. * * @param tag element tag * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. * @see Tag#valueOf(String, ParseSettings) */ public Element(Tag tag, @Nullable String baseUri) { this(tag, baseUri, null); } /** Internal test to check if a nodelist object has been created. */ protected boolean hasChildNodes() { return childNodes != EmptyNodes; } protected List ensureChildNodes() { if (childNodes == EmptyNodes) { childNodes = new NodeList(this, 4); } return childNodes; } @Override protected boolean hasAttributes() { return attributes != null; } @Override public Attributes attributes() { if (attributes == null) // not using hasAttributes, as doesn't clear warning attributes = new Attributes(); return attributes; } @Override public String baseUri() { return searchUpForAttribute(this, BaseUriKey); } private static String searchUpForAttribute(final Element start, final String key) { Element el = start; while (el != null) { if (el.attributes != null && el.attributes.hasKey(key)) return el.attributes.get(key); el = el.parent(); } return ""; } @Override protected void doSetBaseUri(String baseUri) { attributes().put(BaseUriKey, baseUri); } @Override public int childNodeSize() { return childNodes.size(); } @Override public String nodeName() { return tag.getName(); } /** * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase * case preserving parsing}, this will return the source's original case. * * @return the tag name */ public String tagName() { return tag.getName(); } /** * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless * of the tag case preserving setting of the parser. For e.g., {@code

} and {@code
} both have a * normal name of {@code div}. * @return normal name */ @Override public String normalName() { return tag.normalName(); } /** Test if this Element has the specified normalized name, and is in the specified namespace. * @param normalName a normalized element name (e.g. {@code div}). * @param namespace the namespace * @return true if the element's normal name matches exactly, and is in the specified namespace * @since 1.17.2 */ public boolean elementIs(String normalName, String namespace) { return tag.normalName().equals(normalName) && tag.namespace().equals(namespace); } /** * Change (rename) the tag of this element. For example, convert a {@code } to a {@code
} with * {@code el.tagName("div");}. * * @param tagName new tag name for this element * @return this element, for chaining * @see Elements#tagName(String) */ public Element tagName(String tagName) { return tagName(tagName, tag.namespace()); } /** * Change (rename) the tag of this element. For example, convert a {@code } to a {@code
} with * {@code el.tagName("div");}. * * @param tagName new tag name for this element * @param namespace the new namespace for this element * @return this element, for chaining * @see Elements#tagName(String) */ public Element tagName(String tagName, String namespace) { Validate.notEmptyParam(tagName, "tagName"); Validate.notEmptyParam(namespace, "namespace"); tag = Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()); // maintains the case option of the original parse return this; } /** * Get the Tag for this element. * * @return the tag object */ public Tag tag() { return tag; } /** * Test if this element is a block-level element. (E.g. {@code
== true} or an inline element * {@code == false}). * * @return true if block, false if not (and thus inline) */ public boolean isBlock() { return tag.isBlock(); } /** * Get the {@code id} attribute of this element. * * @return The id attribute, if present, or an empty string if not. */ public String id() { return attributes != null ? attributes.getIgnoreCase("id") :""; } /** Set the {@code id} attribute of this element. @param id the ID value to use @return this Element, for chaining */ public Element id(String id) { Validate.notNull(id); attr("id", id); return this; } /** * Set an attribute value on this element. If this element already has an attribute with the * key, its value is updated; otherwise, a new attribute is added. * * @return this element */ public Element attr(String attributeKey, String attributeValue) { super.attr(attributeKey, attributeValue); return this; } /** * Set a boolean attribute value on this element. Setting to true sets the attribute value to "" and * marks the attribute as boolean so no value is written out. Setting to false removes the attribute * with the same key if it exists. * * @param attributeKey the attribute key * @param attributeValue the attribute value * * @return this element */ public Element attr(String attributeKey, boolean attributeValue) { attributes().put(attributeKey, attributeValue); return this; } /** Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to this Element. @param key the (case-sensitive) attribute key @return the Attribute for this key, or null if not present. @since 1.17.2 */ public Attribute attribute(String key) { return hasAttributes() ? attributes().attribute(key) : null; } /** * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key * starting with "data-" is included the dataset. *

* E.g., the element {@code

...} has the dataset * {@code package=jsoup, language=java}. *

* This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected * in the other map. *

* You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. * @return a map of {@code key=value} custom data attributes. */ public Map dataset() { return attributes().dataset(); } @Override @Nullable public final Element parent() { return (Element) parentNode; } /** * Get this element's parent and ancestors, up to the document root. * @return this element's stack of parents, starting with the closest first. */ public Elements parents() { Elements parents = new Elements(); Element parent = this.parent(); while (parent != null && !parent.nameIs("#root")) { parents.add(parent); parent = parent.parent(); } return parents; } /** * Get a child element of this element, by its 0-based index number. *

* Note that an element can have both mixed Nodes and Elements as children. This method inspects * a filtered list of children that are elements, and the index is based on that filtered list. *

* * @param index the index number of the element to retrieve * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} * @see #childNode(int) */ public Element child(int index) { return childElementsList().get(index); } /** * Get the number of child nodes of this element that are elements. *

* This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) *

* * @return the number of child nodes that are elements * @see #children() * @see #child(int) */ public int childrenSize() { return childElementsList().size(); } /** * Get this element's child elements. *

* This is effectively a filter on {@link #childNodes()} to get Element nodes. *

* @return child elements. If this element has no children, returns an empty list. * @see #childNodes() */ public Elements children() { return new Elements(childElementsList()); } /** * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. * TODO - think about pulling this out as a helper as there are other shadow lists (like in Attributes) kept around. * @return a list of child elements */ List childElementsList() { if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty List children; if (shadowChildrenRef == null || (children = shadowChildrenRef.get()) == null) { final int size = childNodes.size(); children = new ArrayList<>(size); //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) for (int i = 0; i < size; i++) { final Node node = childNodes.get(i); if (node instanceof Element) children.add((Element) node); } shadowChildrenRef = new WeakReference<>(children); } return children; } /** * Clears the cached shadow child elements. */ @Override void nodelistChanged() { super.nodelistChanged(); shadowChildrenRef = null; } /** Returns a Stream of this Element and all of its descendant Elements. The stream has document order. @return a stream of this element and its descendants. @see #nodeStream() @since 1.17.1 */ public Stream stream() { return NodeUtils.stream(this, Element.class); } private List filterNodes(Class clazz) { return childNodes.stream() .filter(clazz::isInstance) .map(clazz::cast) .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); } /** * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. *

* This is effectively a filter on {@link #childNodes()} to get Text nodes. * @return child text nodes. If this element has no text nodes, returns an * empty list. *

* For example, with the input HTML: {@code

One Two Three
Four

} with the {@code p} element selected: *
    *
  • {@code p.text()} = {@code "One Two Three Four"}
  • *
  • {@code p.ownText()} = {@code "One Three Four"}
  • *
  • {@code p.children()} = {@code Elements[,
    ]}
  • *
  • {@code p.childNodes()} = {@code List["One ", , " Three ",
    , " Four"]}
  • *
  • {@code p.textNodes()} = {@code List["One ", " Three ", " Four"]}
  • *
*/ public List textNodes() { return filterNodes(TextNode.class); } /** * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. *

* This is effectively a filter on {@link #childNodes()} to get Data nodes. *

* @return child data nodes. If this element has no data nodes, returns an * empty list. * @see #data() */ public List dataNodes() { return filterNodes(DataNode.class); } /** * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements * may include this element, or any of its children. *

This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because * multiple filters can be combined, e.g.:

*
    *
  • {@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) *
  • {@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) *
*

See the query syntax documentation in {@link org.jsoup.select.Selector}.

*

Also known as {@code querySelectorAll()} in the Web DOM.

* * @param cssQuery a {@link Selector} CSS-like query * @return an {@link Elements} list containing elements that match the query (empty if none match) * @see Selector selector query syntax * @see QueryParser#parse(String) * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. */ public Elements select(String cssQuery) { return Selector.select(cssQuery, this); } /** * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but * may be useful if you are running the same query many times (on many documents) and want to save the overhead of * repeatedly parsing the CSS query. * @param evaluator an element evaluator * @return an {@link Elements} list containing elements that match the query (empty if none match) */ public Elements select(Evaluator evaluator) { return Selector.select(evaluator, this); } /** * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. *

This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query * execution stops on the first hit.

*

Also known as {@code querySelector()} in the Web DOM.

* @param cssQuery cssQuery a {@link Selector} CSS-like query * @return the first matching element, or {@code null} if there is no match. * @see #expectFirst(String) */ public @Nullable Element selectFirst(String cssQuery) { return Selector.selectFirst(cssQuery, this); } /** * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or * {@code null} if none match. * * @param evaluator an element evaluator * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none * match. */ public @Nullable Element selectFirst(Evaluator evaluator) { return Collector.findFirst(evaluator, this); } /** Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This is useful if you want to simply abort processing on a failed match. @param cssQuery a {@link Selector} CSS-like query @return the first matching element @throws IllegalArgumentException if no match is found @since 1.15.2 */ public Element expectFirst(String cssQuery) { return (Element) Validate.ensureNotNull( Selector.selectFirst(cssQuery, this), parent() != null ? "No elements matched the query '%s' on element '%s'.": "No elements matched the query '%s' in the document." , cssQuery, this.tagName() ); } /** * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web * DOM. * * @param cssQuery a {@link Selector} CSS query * @return if this element matches the query */ public boolean is(String cssQuery) { return is(QueryParser.parse(cssQuery)); } /** * Check if this element matches the given evaluator. * @param evaluator an element evaluator * @return if this element matches */ public boolean is(Evaluator evaluator) { return evaluator.matches(this.root(), this); } /** * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an * ancestor, or {@code null} if there is no such matching element. * @param cssQuery a {@link Selector} CSS query * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not * found. */ public @Nullable Element closest(String cssQuery) { return closest(QueryParser.parse(cssQuery)); } /** * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an * ancestor, or {@code null} if there is no such matching element. * @param evaluator a query evaluator * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not * found. */ public @Nullable Element closest(Evaluator evaluator) { Validate.notNull(evaluator); Element el = this; final Element root = root(); do { if (evaluator.matches(root, el)) return el; el = el.parent(); } while (el != null); return null; } /** Find Elements that match the supplied {@index XPath} expression.

Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be expressed using the element's local name only.

By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an alternate XPathFactory implementation:

  1. Add the implementation to your classpath. E.g. to use Saxon-HE, add net.sf.saxon:Saxon-HE to your build.
  2. Set the system property javax.xml.xpath.XPathFactory:jsoup to the implementing classname. E.g.:
    System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");
@param xpath XPath expression @return matching elements, or an empty list if none match. @see #selectXpath(String, Class) @since 1.14.3 */ public Elements selectXpath(String xpath) { return new Elements(NodeUtils.selectXpath(xpath, this, Element.class)); } /** Find Nodes that match the supplied XPath expression.

For example, to select TextNodes under {@code p} elements:

List<TextNode> textNodes = doc.selectXpath("//body//p//text()", TextNode.class);

Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something like:

List<String> hrefs = doc.selectXpath("//a").eachAttr("href");
@param xpath XPath expression @param nodeType the jsoup node type to return @see #selectXpath(String) @return a list of matching nodes @since 1.14.3 */ public List selectXpath(String xpath, Class nodeType) { return NodeUtils.selectXpath(xpath, this, nodeType); } /** * Insert a node to the end of this Element's children. The incoming node will be re-parented. * * @param child node to add. * @return this Element, for chaining * @see #prependChild(Node) * @see #insertChildren(int, Collection) */ public Element appendChild(Node child) { Validate.notNull(child); // was - Node#addChildren(child). short-circuits an array create and a loop. reparentChild(child); ensureChildNodes(); childNodes.add(child); child.setSiblingIndex(childNodes.size() - 1); return this; } /** Insert the given nodes to the end of this Element's children. @param children nodes to add @return this Element, for chaining @see #insertChildren(int, Collection) */ public Element appendChildren(Collection children) { insertChildren(-1, children); return this; } /** * Add this element to the supplied parent element, as its next child. * * @param parent element to which this element will be appended * @return this element, so that you can continue modifying the element */ public Element appendTo(Element parent) { Validate.notNull(parent); parent.appendChild(this); return this; } /** * Add a node to the start of this element's children. * * @param child node to add. * @return this element, so that you can add more child nodes or elements. */ public Element prependChild(Node child) { Validate.notNull(child); addChildren(0, child); return this; } /** Insert the given nodes to the start of this Element's children. @param children nodes to add @return this Element, for chaining @see #insertChildren(int, Collection) */ public Element prependChildren(Collection children) { insertChildren(0, children); return this; } /** * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the * end * @param children child nodes to insert * @return this element, for chaining. */ public Element insertChildren(int index, Collection children) { Validate.notNull(children, "Children collection to be inserted must not be null."); int currentSize = childNodeSize(); if (index < 0) index += currentSize +1; // roll around Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); ArrayList nodes = new ArrayList<>(children); Node[] nodeArray = nodes.toArray(new Node[0]); addChildren(index, nodeArray); return this; } /** * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the * end * @param children child nodes to insert * @return this element, for chaining. */ public Element insertChildren(int index, Node... children) { Validate.notNull(children, "Children collection to be inserted must not be null."); int currentSize = childNodeSize(); if (index < 0) index += currentSize +1; // roll around Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); addChildren(index, children); return this; } /** * Create a new element by tag name, and add it as this Element's last child. * * @param tagName the name of the tag (e.g. {@code div}). * @return the new element, to allow you to add content to it, e.g.: * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} */ public Element appendElement(String tagName) { return appendElement(tagName, tag.namespace()); } /** * Create a new element by tag name and namespace, add it as this Element's last child. * * @param tagName the name of the tag (e.g. {@code div}). * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) * @return the new element, in the specified namespace */ public Element appendElement(String tagName, String namespace) { Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); appendChild(child); return child; } /** * Create a new element by tag name, and add it as this Element's first child. * * @param tagName the name of the tag (e.g. {@code div}). * @return the new element, to allow you to add content to it, e.g.: * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} */ public Element prependElement(String tagName) { return prependElement(tagName, tag.namespace()); } /** * Create a new element by tag name and namespace, and add it as this Element's first child. * * @param tagName the name of the tag (e.g. {@code div}). * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) * @return the new element, in the specified namespace */ public Element prependElement(String tagName, String namespace) { Element child = new Element(Tag.valueOf(tagName, namespace, NodeUtils.parser(this).settings()), baseUri()); prependChild(child); return child; } /** * Create and append a new TextNode to this element. * * @param text the (un-encoded) text to add * @return this element */ public Element appendText(String text) { Validate.notNull(text); TextNode node = new TextNode(text); appendChild(node); return this; } /** * Create and prepend a new TextNode to this element. * * @param text the decoded text to add * @return this element */ public Element prependText(String text) { Validate.notNull(text); TextNode node = new TextNode(text); prependChild(node); return this; } /** * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. * @param html HTML to add inside this element, after the existing HTML * @return this element * @see #html(String) */ public Element append(String html) { Validate.notNull(html); List nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); addChildren(nodes.toArray(new Node[0])); return this; } /** * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. * @param html HTML to add inside this element, before the existing HTML * @return this element * @see #html(String) */ public Element prepend(String html) { Validate.notNull(html); List nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); addChildren(0, nodes.toArray(new Node[0])); return this; } /** * Insert the specified HTML into the DOM before this element (as a preceding sibling). * * @param html HTML to add before this element * @return this element, for chaining * @see #after(String) */ @Override public Element before(String html) { return (Element) super.before(html); } /** * Insert the specified node into the DOM before this node (as a preceding sibling). * @param node to add before this element * @return this Element, for chaining * @see #after(Node) */ @Override public Element before(Node node) { return (Element) super.before(node); } /** * Insert the specified HTML into the DOM after this element (as a following sibling). * * @param html HTML to add after this element * @return this element, for chaining * @see #before(String) */ @Override public Element after(String html) { return (Element) super.after(html); } /** * Insert the specified node into the DOM after this node (as a following sibling). * @param node to add after this element * @return this element, for chaining * @see #before(Node) */ @Override public Element after(Node node) { return (Element) super.after(node); } /** * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to * {@code null}. * @return this element */ @Override public Element empty() { // Detach each of the children -> parent links: for (Node child : childNodes) { child.parentNode = null; } childNodes.clear(); return this; } /** * Wrap the supplied HTML around this element. * * @param html HTML to wrap around this element, e.g. {@code
}. Can be arbitrarily deep. * @return this element, for chaining. */ @Override public Element wrap(String html) { return (Element) super.wrap(html); } /** * Get a CSS selector that will uniquely select this element. *

* If the element has an ID, returns #id; * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, * followed by a unique selector for the element (tag.class.class:nth-child(n)). *

* * @return the CSS Path that can be used to retrieve the element in a selector. */ public String cssSelector() { if (id().length() > 0) { // prefer to return the ID - but check that it's actually unique first! String idSel = "#" + escapeCssIdentifier(id()); Document doc = ownerDocument(); if (doc != null) { Elements els = doc.select(idSel); if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl return idSel; } else { return idSel; // no ownerdoc, return the ID selector } } StringBuilder selector = StringUtil.borrowBuilder(); Element el = this; while (el != null && !(el instanceof Document)) { selector.insert(0, el.cssSelectorComponent()); el = el.parent(); } return StringUtil.releaseBuilder(selector); } private String cssSelectorComponent() { // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|"); StringBuilder selector = StringUtil.borrowBuilder().append(tagName); // String classes = StringUtil.join(classNames().stream().map(TokenQueue::escapeCssIdentifier).iterator(), "."); // todo - replace with ^^ in 1.16.1 when we enable Android support for stream etc StringUtil.StringJoiner escapedClasses = new StringUtil.StringJoiner("."); for (String name : classNames()) escapedClasses.add(escapeCssIdentifier(name)); String classes = escapedClasses.complete(); if (classes.length() > 0) selector.append('.').append(classes); if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node return StringUtil.releaseBuilder(selector); selector.insert(0, " > "); if (parent().select(selector.toString()).size() > 1) selector.append(String.format( ":nth-child(%d)", elementSiblingIndex() + 1)); return StringUtil.releaseBuilder(selector); } /** * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling * of itself, so will not be included in the returned list. * @return sibling elements */ public Elements siblingElements() { if (parentNode == null) return new Elements(0); List elements = parent().childElementsList(); Elements siblings = new Elements(elements.size() - 1); for (Element el: elements) if (el != this) siblings.add(el); return siblings; } /** * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. *

* This is similar to {@link #nextSibling()}, but specifically finds only Elements *

* @return the next element, or null if there is no next element * @see #previousElementSibling() */ public @Nullable Element nextElementSibling() { Node next = this; while ((next = next.nextSibling()) != null) { if (next instanceof Element) return (Element) next; } return null; } /** * Get each of the sibling elements that come after this element. * * @return each of the element siblings after this element, or an empty list if there are no next sibling elements */ public Elements nextElementSiblings() { return nextElementSiblings(true); } /** * Gets the previous element sibling of this element. * @return the previous element, or null if there is no previous element * @see #nextElementSibling() */ public @Nullable Element previousElementSibling() { Node prev = this; while ((prev = prev.previousSibling()) != null) { if (prev instanceof Element) return (Element) prev; } return null; } /** * Get each of the element siblings before this element. * * @return the previous element siblings, or an empty list if there are none. */ public Elements previousElementSiblings() { return nextElementSiblings(false); } private Elements nextElementSiblings(boolean next) { Elements els = new Elements(); if (parentNode == null) return els; els.add(this); return next ? els.nextAll() : els.prevAll(); } /** * Gets the first Element sibling of this element. That may be this element. * @return the first sibling that is an element (aka the parent's first element child) */ public Element firstElementSibling() { if (parent() != null) { //noinspection DataFlowIssue (not nullable, would be this is no other sibs) return parent().firstElementChild(); } else return this; // orphan is its own first sibling } /** * Get the list index of this element in its element sibling list. I.e. if this is the first element * sibling, returns 0. * @return position in element sibling list */ public int elementSiblingIndex() { if (parent() == null) return 0; return indexInList(this, parent().childElementsList()); } /** * Gets the last element sibling of this element. That may be this element. * @return the last sibling that is an element (aka the parent's last element child) */ public Element lastElementSibling() { if (parent() != null) { //noinspection DataFlowIssue (not nullable, would be this if no other sibs) return parent().lastElementChild(); } else return this; } private static int indexInList(Element search, List elements) { final int size = elements.size(); for (int i = 0; i < size; i++) { if (elements.get(i) == search) return i; } return 0; } /** Gets the first child of this Element that is an Element, or {@code null} if there is none. @return the first Element child node, or null. @see #firstChild() @see #lastElementChild() @since 1.15.2 */ public @Nullable Element firstElementChild() { Node child = firstChild(); while (child != null) { if (child instanceof Element) return (Element) child; child = child.nextSibling(); } return null; } /** Gets the last child of this Element that is an Element, or @{code null} if there is none. @return the last Element child node, or null. @see #lastChild() @see #firstElementChild() @since 1.15.2 */ public @Nullable Element lastElementChild() { Node child = lastChild(); while (child != null) { if (child instanceof Element) return (Element) child; child = child.previousSibling(); } return null; } // DOM type methods /** * Finds elements, including and recursively under this element, with the specified tag name. * @param tagName The tag name to search for (case insensitively). * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. */ public Elements getElementsByTag(String tagName) { Validate.notEmpty(tagName); tagName = normalize(tagName); return Collector.collect(new Evaluator.Tag(tagName), this); } /** * Find an element by ID, including or under this element. *

* Note that this finds the first matching ID, starting with this element. If you search down from a different * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, * use {@link Document#getElementById(String)} * @param id The ID to search for. * @return The first matching element by ID, starting with this element, or null if none found. */ public @Nullable Element getElementById(String id) { Validate.notEmpty(id); Elements elements = Collector.collect(new Evaluator.Id(id), this); if (elements.size() > 0) return elements.get(0); else return null; } /** * Find elements that have this class, including or under this element. Case-insensitive. *

* Elements can have multiple classes (e.g. {@code

}). This method * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. * * @param className the name of the class to search for. * @return elements with the supplied class name, empty if none * @see #hasClass(String) * @see #classNames() */ public Elements getElementsByClass(String className) { Validate.notEmpty(className); return Collector.collect(new Evaluator.Class(className), this); } /** * Find elements that have a named attribute set. Case-insensitive. * * @param key name of the attribute, e.g. {@code href} * @return elements that have this attribute, empty if none */ public Elements getElementsByAttribute(String key) { Validate.notEmpty(key); key = key.trim(); return Collector.collect(new Evaluator.Attribute(key), this); } /** * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements * that have HTML5 datasets. * @param keyPrefix name prefix of the attribute e.g. {@code data-} * @return elements that have attribute names that start with the prefix, empty if none. */ public Elements getElementsByAttributeStarting(String keyPrefix) { Validate.notEmpty(keyPrefix); keyPrefix = keyPrefix.trim(); return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); } /** * Find elements that have an attribute with the specific value. Case-insensitive. * * @param key name of the attribute * @param value value of the attribute * @return elements that have this attribute with this value, empty if none */ public Elements getElementsByAttributeValue(String key, String value) { return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); } /** * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive. * * @param key name of the attribute * @param value value of the attribute * @return elements that do not have a matching attribute */ public Elements getElementsByAttributeValueNot(String key, String value) { return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); } /** * Find elements that have attributes that start with the value prefix. Case-insensitive. * * @param key name of the attribute * @param valuePrefix start of attribute value * @return elements that have attributes that start with the value prefix */ public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); } /** * Find elements that have attributes that end with the value suffix. Case-insensitive. * * @param key name of the attribute * @param valueSuffix end of the attribute value * @return elements that have attributes that end with the value suffix */ public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); } /** * Find elements that have attributes whose value contains the match string. Case-insensitive. * * @param key name of the attribute * @param match substring of value to search for * @return elements that have attributes containing this text */ public Elements getElementsByAttributeValueContaining(String key, String match) { return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); } /** * Find elements that have an attribute whose value matches the supplied regular expression. * @param key name of the attribute * @param pattern compiled regular expression to match against attribute values * @return elements that have attributes matching this regular expression */ public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); } /** * Find elements that have attributes whose values match the supplied regular expression. * @param key name of the attribute * @param regex regular expression to match against attribute values. You can use embedded flags (such as (?i) and (?m) to control regex options. * @return elements that have attributes matching this regular expression */ public Elements getElementsByAttributeValueMatching(String key, String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsByAttributeValueMatching(key, pattern); } /** * Find elements whose sibling index is less than the supplied index. * @param index 0-based index * @return elements less than index */ public Elements getElementsByIndexLessThan(int index) { return Collector.collect(new Evaluator.IndexLessThan(index), this); } /** * Find elements whose sibling index is greater than the supplied index. * @param index 0-based index * @return elements greater than index */ public Elements getElementsByIndexGreaterThan(int index) { return Collector.collect(new Evaluator.IndexGreaterThan(index), this); } /** * Find elements whose sibling index is equal to the supplied index. * @param index 0-based index * @return elements equal to index */ public Elements getElementsByIndexEquals(int index) { return Collector.collect(new Evaluator.IndexEquals(index), this); } /** * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly * in the element, or in any of its descendants. * @param searchText to look for in the element's text * @return elements that contain the string, case-insensitive. * @see Element#text() */ public Elements getElementsContainingText(String searchText) { return Collector.collect(new Evaluator.ContainsText(searchText), this); } /** * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly * in the element, not in any of its descendants. * @param searchText to look for in the element's own text * @return elements that contain the string, case-insensitive. * @see Element#ownText() */ public Elements getElementsContainingOwnText(String searchText) { return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); } /** * Find elements whose text matches the supplied regular expression. * @param pattern regular expression to match text against * @return elements matching the supplied regular expression. * @see Element#text() */ public Elements getElementsMatchingText(Pattern pattern) { return Collector.collect(new Evaluator.Matches(pattern), this); } /** * Find elements whose text matches the supplied regular expression. * @param regex regular expression to match text against. You can use embedded flags (such as (?i) and (?m) to control regex options. * @return elements matching the supplied regular expression. * @see Element#text() */ public Elements getElementsMatchingText(String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsMatchingText(pattern); } /** * Find elements whose own text matches the supplied regular expression. * @param pattern regular expression to match text against * @return elements matching the supplied regular expression. * @see Element#ownText() */ public Elements getElementsMatchingOwnText(Pattern pattern) { return Collector.collect(new Evaluator.MatchesOwn(pattern), this); } /** * Find elements whose own text matches the supplied regular expression. * @param regex regular expression to match text against. You can use embedded flags (such as (?i) and (?m) to control regex options. * @return elements matching the supplied regular expression. * @see Element#ownText() */ public Elements getElementsMatchingOwnText(String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsMatchingOwnText(pattern); } /** * Find all elements under this element (including self, and children of children). * * @return all elements */ public Elements getAllElements() { return Collector.collect(new Evaluator.AllElements(), this); } /** Gets the normalized, combined text of this element and all its children. Whitespace is normalized and trimmed.

For example, given HTML {@code

Hello there now!

}, {@code p.text()} returns {@code "Hello there now!"}

If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not children), use {@link #ownText()}

Note that this method returns the textual content that would be presented to a reader. The contents of data nodes (such as {@code