All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.styledxmlparser.jsoup.nodes.Element Maven / Gradle / Ivy

There is a newer version: 9.0.0
Show newest version
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2023 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see .
 */
package com.itextpdf.styledxmlparser.jsoup.nodes;

import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.styledxmlparser.jsoup.helper.ChangeNotifyingArrayList;
import com.itextpdf.styledxmlparser.jsoup.helper.Validate;
import com.itextpdf.styledxmlparser.jsoup.internal.Normalizer;
import com.itextpdf.styledxmlparser.jsoup.internal.StringUtil;
import com.itextpdf.styledxmlparser.jsoup.parser.ParseSettings;
import com.itextpdf.styledxmlparser.jsoup.parser.Tag;
import com.itextpdf.styledxmlparser.jsoup.select.Collector;
import com.itextpdf.styledxmlparser.jsoup.select.Elements;
import com.itextpdf.styledxmlparser.jsoup.select.Evaluator;
import com.itextpdf.styledxmlparser.jsoup.select.NodeFilter;
import com.itextpdf.styledxmlparser.jsoup.select.NodeTraversor;
import com.itextpdf.styledxmlparser.jsoup.select.NodeVisitor;
import com.itextpdf.styledxmlparser.jsoup.select.QueryParser;
import com.itextpdf.styledxmlparser.jsoup.select.Selector;

import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
 * other elements).
 * 
 * From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
 * 
 * @author Jonathan Hedley, [email protected]
 */
public class Element extends Node {
    private static final List EmptyChildren = Collections.emptyList();
    private static final Pattern ClassSplit = Pattern.compile("\\s+");
    private static final String BaseUriKey = Attributes.internalKey("baseUri");
    private Tag tag;
    private WeakReference> shadowChildrenRef; // points to child elements shadowed from node children
    List childNodes;
    private Attributes attributes; // field is nullable but all methods for attributes are non null

    /**
     * Create a new, standalone element.
     * @param tag tag name
     */
    public Element(String tag) {
        this(Tag.valueOf(tag), "", null);
    }

    /**
     * Create a new, standalone Element. (Standalone in that is has no parent.)
     * 
     * @param tag tag of this element
     * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's)
     * @param attributes initial attributes (optional, may be null)
     * @see #appendChild(Node)
     * @see #appendElement(String)
     */
    public Element(Tag tag, String baseUri, Attributes attributes) {
        Validate.notNull(tag);
        childNodes = EmptyNodes;
        this.attributes = attributes;
        this.tag = tag;
        if (baseUri != null)
            this.setBaseUri(baseUri);
    }

    /**
     * Create a new Element from a Tag and a base URI.
     * 
     * @param tag element tag
     * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
     * @see Tag#valueOf(String, ParseSettings)
     */
    public Element(Tag tag, String baseUri) {
        this(tag, baseUri, null);
    }

    /**
     Internal test to check if a nodelist object has been created.
     */
    protected boolean hasChildNodes() {
        return childNodes != EmptyNodes;
    }

    protected List ensureChildNodes() {
        if (childNodes == EmptyNodes) {
            childNodes = new NodeList(this, 4);
        }
        return childNodes;
    }

    @Override
    protected boolean hasAttributes() {
        return attributes != null;
    }

    @Override
    public Attributes attributes() {
        if (attributes == null) // not using hasAttributes, as doesn't clear warning
            attributes = new Attributes();
        return attributes;
    }

    @Override
    public String baseUri() {
        return searchUpForAttribute(this, BaseUriKey);
    }

    private static String searchUpForAttribute(final Element start, final String key) {
        Element el = start;
        while (el != null) {
            if (el.attributes != null && el.attributes.hasKey(key))
                return el.attributes.get(key);
            el = (Element) el.parent();
        }
        return "";
    }

    @Override
    protected void doSetBaseUri(String baseUri) {
        attributes().put(BaseUriKey, baseUri);
    }

    @Override
    public int childNodeSize() {
        return childNodes.size();
    }

    @Override
    public String nodeName() {
        return tag.getName();
    }

    /**
     * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase
     * case preserving parsing}, this will return the source's original case.
     * 
     * @return the tag name
     */
    public String tagName() {
        return tag.getName();
    }

    /**
     * Get the normalized name of this Element's tag. This will always be the lowercased version of the tag, regardless
     * of the tag case preserving setting of the parser. For e.g., {@code 
} and {@code
} both have a * normal name of {@code div}. * @return normal name */ public String normalName() { return tag.normalName(); } /** * Change (rename) the tag of this element. For example, convert a {@code } to a {@code
} with * {@code el.tagName("div");}. * * @param tagName new tag name for this element * @return this element, for chaining * @see Elements#tagName(String) */ public Element tagName(String tagName) { Validate.notEmpty(tagName, "Tag name must not be empty."); tag = Tag.valueOf(tagName, NodeUtils.parser(this).settings()); // maintains the case option of the original parse return this; } /** * Get the Tag for this element. * * @return the tag object */ public Tag tag() { return tag; } /** * Test if this element is a block-level element. (E.g. {@code
== true} or an inline element * {@code == false}). * * @return true if block, false if not (and thus inline) */ public boolean isBlock() { return tag.isBlock(); } /** * Get the {@code id} attribute of this element. * * @return The id attribute, if present, or an empty string if not. */ public String id() { return attributes != null ? attributes.getIgnoreCase("id") :""; } /** Set the {@code id} attribute of this element. @param id the ID value to use @return this Element, for chaining */ public Element id(String id) { Validate.notNull(id); attr("id", id); return this; } /** * Set an attribute value on this element. If this element already has an attribute with the * key, its value is updated; otherwise, a new attribute is added. * * @return this element */ public Node attr(String attributeKey, String attributeValue) { super.attr(attributeKey, attributeValue); return this; } /** * Set a boolean attribute value on this element. Setting to true sets the attribute value to "" and * marks the attribute as boolean so no value is written out. Setting to false removes the attribute * with the same key if it exists. * * @param attributeKey the attribute key * @param attributeValue the attribute value * * @return this element */ public Element attr(String attributeKey, boolean attributeValue) { attributes().put(attributeKey, attributeValue); return this; } /** * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key * starting with "data-" is included the dataset. *

* E.g., the element {@code

...} has the dataset * {@code package=jsoup, language=java}. *

* This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected * in the other map. *

* You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. * @return a map of {@code key=value} custom data attributes. */ public Map dataset() { return attributes().dataset(); } @Override public final Node parent() { return parentNode; } /** * Get this element's parent and ancestors, up to the document root. * @return this element's stack of parents, closest first. */ public Elements parents() { Elements parents = new Elements(); accumulateParents(this, parents); return parents; } private static void accumulateParents(Element el, Elements parents) { Element parent = (Element) el.parent(); if (parent != null && !parent.tagName().equals("#root")) { parents.add(parent); accumulateParents(parent, parents); } } /** * Get a child element of this element, by its 0-based index number. *

* Note that an element can have both mixed Nodes and Elements as children. This method inspects * a filtered list of children that are elements, and the index is based on that filtered list. * * @param index the index number of the element to retrieve * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} * @see #childNode(int) */ public Element child(int index) { return childElementsList().get(index); } /** * Get the number of child nodes of this element that are elements. *

* This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) * * @return the number of child nodes that are elements * @see #children() * @see #child(int) */ public int childrenSize() { return childElementsList().size(); } /** * Get this element's child elements. *

* This is effectively a filter on {@link #childNodes()} to get Element nodes. * * @return child elements. If this element has no children, returns an empty list. * @see #childNodes() */ public Elements children() { return new Elements(childElementsList()); } /** * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. * * @return a list of child elements */ List childElementsList() { if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty List children; if (shadowChildrenRef == null || (children = (List) shadowChildrenRef.get()) == null) { final int size = childNodes.size(); children = new ArrayList<>(size); //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) for (int i = 0; i < size; i++) { final Node node = childNodes.get(i); if (node instanceof Element) children.add((Element) node); } shadowChildrenRef = new WeakReference<>(children); } return children; } /** * Clears the cached shadow child elements. */ @Override void nodelistChanged() { super.nodelistChanged(); shadowChildrenRef = null; } /** * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. *

* This is effectively a filter on {@link #childNodes()} to get Text nodes. * @return child text nodes. If this element has no text nodes, returns an * empty list. *

* For example, with the input HTML: {@code

One Two Three
Four

} with the {@code p} element selected: *
    *
  • {@code p.text()} = {@code "One Two Three Four"} *
  • {@code p.ownText()} = {@code "One Three Four"} *
  • {@code p.children()} = {@code Elements[,
    ]} *
  • {@code p.childNodes()} = {@code List["One ", , " Three ",
    , " Four"]} *
  • {@code p.textNodes()} = {@code List["One ", " Three ", " Four"]} *
*/ public List textNodes() { List textNodes = new ArrayList<>(); for (Node node : childNodes) { if (node instanceof TextNode) textNodes.add((TextNode) node); } return Collections.unmodifiableList(textNodes); } /** * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. *

* This is effectively a filter on {@link #childNodes()} to get Data nodes. * * @return child data nodes. If this element has no data nodes, returns an * empty list. * @see #data() */ public List dataNodes() { List dataNodes = new ArrayList<>(); for (Node node : childNodes) { if (node instanceof DataNode) dataNodes.add((DataNode) node); } return Collections.unmodifiableList(dataNodes); } /** * Find elements that match the {@link Selector} CSS query, with this element as the starting context. * Matched elements may include this element, or any of its children. *

This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because * multiple filters can be combined, e.g.: * *

    *
  • {@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) *
  • {@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) *
* *

* See the query syntax documentation in {@link com.itextpdf.styledxmlparser.jsoup.select.Selector}. *

* Also known as {@code querySelectorAll()} in the Web DOM. * * @param cssQuery a {@link Selector} CSS-like query * @return an {@link Elements} list containing elements that match the query (empty if none match) * @see Selector selector query syntax * @see QueryParser#parse(String) * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. */ public Elements select(String cssQuery) { return Selector.select(cssQuery, this); } /** * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but * may be useful if you are running the same query many times (on many documents) and want to save the overhead of * repeatedly parsing the CSS query. * @param evaluator an element evaluator * @return an {@link Elements} list containing elements that match the query (empty if none match) */ public Elements select(Evaluator evaluator) { return Selector.select(evaluator, this); } /** * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. *

* This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query * execution stops on the first hit. *

* Also known as {@code querySelector()} in the Web DOM. * @param cssQuery cssQuery a {@link Selector} CSS-like query * @return the first matching element, or {@code null} if there is no match. */ public Element selectFirst(String cssQuery) { return Selector.selectFirst(cssQuery, this); } /** * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or * {@code null} if none match. * * @param evaluator an element evaluator * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none * matchn. */ public Element selectFirst(Evaluator evaluator) { return Collector.findFirst(evaluator, this); } /** * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web * DOM. * * @param cssQuery a {@link Selector} CSS query * @return if this element matches the query */ public boolean is(String cssQuery) { return is(QueryParser.parse(cssQuery)); } /** * Check if this element matches the given evaluator. * @param evaluator an element evaluator * @return if this element matches */ public boolean is(Evaluator evaluator) { return evaluator.matches((Element) this.root(), this); } /** * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an * ancestor, or {@code null} if there is no such matching element. * @param cssQuery a {@link Selector} CSS query * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not * found. */ public Element closest(String cssQuery) { return closest(QueryParser.parse(cssQuery)); } /** * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an * ancestor, or {@code null} if there is no such matching element. * @param evaluator a query evaluator * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not * found. */ public Element closest(Evaluator evaluator) { Validate.notNull(evaluator); Element el = this; final Element root = (Element) root(); do { if (evaluator.matches(root, el)) return el; el = (Element) el.parent(); } while (el != null); return null; } /** * Insert a node to the end of this Element's children. The incoming node will be re-parented. * * @param child node to add. * @return this Element, for chaining * @see #prependChild(Node) * @see #insertChildren(int, Collection) */ public Element appendChild(Node child) { Validate.notNull(child); // was - Node#addChildren(child). short-circuits an array create and a loop. reparentChild(child); ensureChildNodes(); childNodes.add(child); child.setSiblingIndex(childNodes.size() - 1); return this; } /** * Insert the given nodes to the end of this Element's children. * * @param children nodes to add * @return this Element, for chaining * @see #insertChildren(int, Collection) */ public Element appendChildren(Collection children) { insertChildren(-1, children); return this; } /** * Add this element to the supplied parent element, as its next child. * * @param parent element to which this element will be appended * @return this element, so that you can continue modifying the element */ public Element appendTo(Element parent) { Validate.notNull(parent); parent.appendChild(this); return this; } /** * Add a node to the start of this element's children. * * @param child node to add. * @return this element, so that you can add more child nodes or elements. */ public Element prependChild(Node child) { Validate.notNull(child); addChildren(0, child); return this; } /** * Insert the given nodes to the start of this Element's children. * * @param children nodes to add * @return this Element, for chaining * @see #insertChildren(int, Collection) */ public Element prependChildren(Collection children) { insertChildren(0, children); return this; } /** * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the * end * @param children child nodes to insert * @return this element, for chaining. */ public Element insertChildren(int index, Collection children) { Validate.notNull(children, "Children collection to be inserted must not be null."); int currentSize = childNodeSize(); if (index < 0) index += currentSize +1; // roll around Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); ArrayList nodes = new ArrayList<>(children); Node[] nodeArray = nodes.toArray(new Node[0]); addChildren(index, nodeArray); return this; } /** * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the * end * @param children child nodes to insert * @return this element, for chaining. */ public Element insertChildren(int index, Node... children) { Validate.notNull(children, "Children collection to be inserted must not be null."); int currentSize = childNodeSize(); if (index < 0) index += currentSize +1; // roll around Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); addChildren(index, children); return this; } /** * Inserts the given child node into this element at the specified index. Current node will be shifted to the * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the node first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the * end * @param child child node to insert * @return this element, for chaining. */ public Element insertChild(int index, Node child) { return insertChildren(index, child); } /** * Create a new element by tag name, and add it as the last child. * * @param tagName the name of the tag (e.g. {@code div}). * @return the new element, to allow you to add content to it, e.g.: * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} */ public Element appendElement(String tagName) { Element child = new Element(Tag.valueOf(tagName, NodeUtils.parser(this).settings()), baseUri()); appendChild(child); return child; } /** * Create a new element by tag name, and add it as the first child. * * @param tagName the name of the tag (e.g. {@code div}). * @return the new element, to allow you to add content to it, e.g.: * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} */ public Element prependElement(String tagName) { Element child = new Element(Tag.valueOf(tagName, NodeUtils.parser(this).settings()), baseUri()); prependChild(child); return child; } /** * Create and append a new TextNode to this element. * * @param text the unencoded text to add * @return this element */ public Element appendText(String text) { Validate.notNull(text); TextNode node = new TextNode(text); appendChild(node); return this; } /** * Create and prepend a new TextNode to this element. * * @param text the unencoded text to add * @return this element */ public Element prependText(String text) { Validate.notNull(text); TextNode node = new TextNode(text); prependChild(node); return this; } /** * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. * @param html HTML to add inside this element, after the existing HTML * @return this element * @see #html(String) */ public Element append(String html) { Validate.notNull(html); List nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); addChildren(nodes.toArray(new Node[0])); return this; } /** * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. * @param html HTML to add inside this element, before the existing HTML * @return this element * @see #html(String) */ public Element prepend(String html) { Validate.notNull(html); List nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); addChildren(0, nodes.toArray(new Node[0])); return this; } /** * Insert the specified HTML into the DOM before this element (as a preceding sibling). * * @param html HTML to add before this element * @return this element, for chaining * @see #after(String) */ @Override public Node before(String html) { return super.before(html); } /** * Insert the specified node into the DOM before this node (as a preceding sibling). * @param node to add before this element * @return this Element, for chaining * @see #after(Node) */ @Override public Node before(Node node) { return super.before(node); } /** * Insert the specified HTML into the DOM after this element (as a following sibling). * * @param html HTML to add after this element * @return this element, for chaining * @see #before(String) */ @Override public Node after(String html) { return super.after(html); } /** * Insert the specified node into the DOM after this node (as a following sibling). * @param node to add after this element * @return this element, for chaining * @see #before(Node) */ @Override public Node after(Node node) { return super.after(node); } /** * Remove all of the element's child nodes. Any attributes are left as-is. * @return this element */ @Override public Node empty() { childNodes.clear(); return this; } /** * Wrap the supplied HTML around this element. * * @param html HTML to wrap around this element, e.g. {@code

}. Can be arbitrarily deep. * @return this element, for chaining. */ @Override public Node wrap(String html) { return super.wrap(html); } /** * Get a CSS selector that will uniquely select this element. *

* If the element has an ID, returns #id; * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, * followed by a unique selector for the element (tag.class.class:nth-child(n)). * * @return the CSS Path that can be used to retrieve the element in a selector. */ public String cssSelector() { if (id().length() > 0) { // prefer to return the ID - but check that it's actually unique first! String idSel = "#" + id(); Document doc = ownerDocument(); if (doc != null) { Elements els = doc.select(idSel); if (els.size() == 1 && els.get(0) == this) // otherwise, continue to the nth-child impl return idSel; } else { return idSel; // no ownerdoc, return the ID selector } } // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag String tagName = tagName().replace(':', '|'); StringBuilder selector = new StringBuilder(tagName); String classes = StringUtil.join(classNames(), "."); if (classes.length() > 0) selector.append('.').append(classes); if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node return selector.toString(); selector.insert(0, " > "); Element parent = (Element) parent(); if (parent.select(selector.toString()).size() > 1) selector.append(MessageFormatUtil.format(":nth-child({0})", elementSiblingIndex() + 1)); return parent.cssSelector() + selector.toString(); } /** * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling * of itself, so will not be included in the returned list. * @return sibling elements */ public Elements siblingElements() { if (parentNode == null) return new Elements(0); Element parent = (Element) parent(); List elements = parent.childElementsList(); Elements siblings = new Elements(elements.size() - 1); for (Element el: elements) if (el != this) siblings.add(el); return siblings; } /** * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. *

* This is similar to {@link #nextSibling()}, but specifically finds only Elements * * @return the next element, or null if there is no next element * @see #previousElementSibling() */ public Element nextElementSibling() { if (parentNode == null) return null; Element parent = (Element) parent(); List siblings = parent.childElementsList(); int index = indexInList(this, siblings); if (siblings.size() > index+1) return siblings.get(index+1); else return null; } /** * Get each of the sibling elements that come after this element. * * @return each of the element siblings after this element, or an empty list if there are no next sibling elements */ public Elements nextElementSiblings() { return nextElementSiblings(true); } /** * Gets the previous element sibling of this element. * @return the previous element, or null if there is no previous element * @see #nextElementSibling() */ public Element previousElementSibling() { if (parentNode == null) return null; Element parent = (Element) parent(); List siblings = parent.childElementsList(); int index = indexInList(this, siblings); if (index > 0) return siblings.get(index-1); else return null; } /** * Get each of the element siblings before this element. * * @return the previous element siblings, or an empty list if there are none. */ public Elements previousElementSiblings() { return nextElementSiblings(false); } private Elements nextElementSiblings(boolean next) { Elements els = new Elements(); if (parentNode == null) return els; els.add(this); return next ? els.nextAll() : els.prevAll(); } /** * Gets the first Element sibling of this element. That may be this element. * @return the first sibling that is an element (aka the parent's first element child) */ public Element firstElementSibling() { Element parent = (Element) parent(); if (parent != null) { List siblings = parent.childElementsList(); return siblings.size() > 1 ? siblings.get(0) : this; } else return this; // orphan is its own first sibling } /** * Get the list index of this element in its element sibling list. I.e. if this is the first element * sibling, returns 0. * @return position in element sibling list */ public int elementSiblingIndex() { Element parent = (Element) parent(); if (parent == null) return 0; return indexInList(this, parent.childElementsList()); } /** * Gets the last element sibling of this element. That may be this element. * @return the last sibling that is an element (aka the parent's last element child) */ public Element lastElementSibling() { Element parent = (Element) parent(); if (parent != null) { List siblings = parent.childElementsList(); return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : this; } else return this; } private static int indexInList(Element search, List elements) { final int size = elements.size(); for (int i = 0; i < size; i++) { if (elements.get(i) == search) return i; } return 0; } // DOM type methods /** * Finds elements, including and recursively under this element, with the specified tag name. * @param tagName The tag name to search for (case insensitively). * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. */ public Elements getElementsByTag(String tagName) { Validate.notEmpty(tagName); tagName = Normalizer.normalize(tagName); return Collector.collect(new Evaluator.Tag(tagName), this); } /** * Find an element by ID, including or under this element. *

* Note that this finds the first matching ID, starting with this element. If you search down from a different * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, * use {@link Document#getElementById(String)} * @param id The ID to search for. * @return The first matching element by ID, starting with this element, or null if none found. */ public Element getElementById(String id) { Validate.notEmpty(id); Elements elements = Collector.collect(new Evaluator.Id(id), this); if (elements.size() > 0) return elements.get(0); else return null; } /** * Find elements that have this class, including or under this element. Case insensitive. *

* Elements can have multiple classes (e.g. {@code

}. This method * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. * * @param className the name of the class to search for. * @return elements with the supplied class name, empty if none * @see #hasClass(String) * @see #classNames() */ public Elements getElementsByClass(String className) { Validate.notEmpty(className); return Collector.collect(new Evaluator.Class(className), this); } /** * Find elements that have a named attribute set. Case insensitive. * * @param key name of the attribute, e.g. {@code href} * @return elements that have this attribute, empty if none */ public Elements getElementsByAttribute(String key) { Validate.notEmpty(key); key = key.trim(); return Collector.collect(new Evaluator.Attribute(key), this); } /** * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements * that have HTML5 datasets. * @param keyPrefix name prefix of the attribute e.g. {@code data-} * @return elements that have attribute names that start with with the prefix, empty if none. */ public Elements getElementsByAttributeStarting(String keyPrefix) { Validate.notEmpty(keyPrefix); keyPrefix = keyPrefix.trim(); return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); } /** * Find elements that have an attribute with the specific value. Case insensitive. * * @param key name of the attribute * @param value value of the attribute * @return elements that have this attribute with this value, empty if none */ public Elements getElementsByAttributeValue(String key, String value) { return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); } /** * Find elements that either do not have this attribute, or have it with a different value. Case insensitive. * * @param key name of the attribute * @param value value of the attribute * @return elements that do not have a matching attribute */ public Elements getElementsByAttributeValueNot(String key, String value) { return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); } /** * Find elements that have attributes that start with the value prefix. Case insensitive. * * @param key name of the attribute * @param valuePrefix start of attribute value * @return elements that have attributes that start with the value prefix */ public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); } /** * Find elements that have attributes that end with the value suffix. Case insensitive. * * @param key name of the attribute * @param valueSuffix end of the attribute value * @return elements that have attributes that end with the value suffix */ public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); } /** * Find elements that have attributes whose value contains the match string. Case insensitive. * * @param key name of the attribute * @param match substring of value to search for * @return elements that have attributes containing this text */ public Elements getElementsByAttributeValueContaining(String key, String match) { return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); } /** * Find elements that have attributes whose values match the supplied regular expression. * @param key name of the attribute * @param pattern compiled regular expression to match against attribute values * @return elements that have attributes matching this regular expression */ public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); } /** * Find elements that have attributes whose values match the supplied regular expression. * * @param key name of the attribute * @param regex regular expression to match against attribute values. * You can use embedded flags * (such as (?i) and (?m) to control regex options. * @return elements that have attributes matching this regular expression */ public Elements getElementsByAttributeValueMatching(String key, String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsByAttributeValueMatching(key, pattern); } /** * Find elements whose sibling index is less than the supplied index. * @param index 0-based index * @return elements less than index */ public Elements getElementsByIndexLessThan(int index) { return Collector.collect(new Evaluator.IndexLessThan(index), this); } /** * Find elements whose sibling index is greater than the supplied index. * @param index 0-based index * @return elements greater than index */ public Elements getElementsByIndexGreaterThan(int index) { return Collector.collect(new Evaluator.IndexGreaterThan(index), this); } /** * Find elements whose sibling index is equal to the supplied index. * @param index 0-based index * @return elements equal to index */ public Elements getElementsByIndexEquals(int index) { return Collector.collect(new Evaluator.IndexEquals(index), this); } /** * Find elements that contain the specified string. The search is case insensitive. The text may appear directly * in the element, or in any of its descendants. * @param searchText to look for in the element's text * @return elements that contain the string, case insensitive. * @see Element#text() */ public Elements getElementsContainingText(String searchText) { return Collector.collect(new Evaluator.ContainsText(searchText), this); } /** * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly * in the element, not in any of its descendants. * @param searchText to look for in the element's own text * @return elements that contain the string, case insensitive. * @see Element#ownText() */ public Elements getElementsContainingOwnText(String searchText) { return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); } /** * Find elements whose text matches the supplied regular expression. * @param pattern regular expression to match text against * @return elements matching the supplied regular expression. * @see Element#text() */ public Elements getElementsMatchingText(Pattern pattern) { return Collector.collect(new Evaluator.Matches(pattern), this); } /** * Find elements whose text matches the supplied regular expression. * * @param regex regular expression to match text against. * You can use embedded flags * (such as (?i) and (?m) to control regex options. * @return elements matching the supplied regular expression. * @see Element#text() */ public Elements getElementsMatchingText(String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsMatchingText(pattern); } /** * Find elements whose own text matches the supplied regular expression. * @param pattern regular expression to match text against * @return elements matching the supplied regular expression. * @see Element#ownText() */ public Elements getElementsMatchingOwnText(Pattern pattern) { return Collector.collect(new Evaluator.MatchesOwn(pattern), this); } /** * Find elements whose own text matches the supplied regular expression. * * @param regex regular expression to match text against. * You can use embedded flags * (such as (?i) and (?m) to control regex options. * @return elements matching the supplied regular expression. * @see Element#ownText() */ public Elements getElementsMatchingOwnText(String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsMatchingOwnText(pattern); } /** * Find all elements under this element (including self, and children of children). * * @return all elements */ public Elements getAllElements() { return Collector.collect(new Evaluator.AllElements(), this); } /** * Gets the normalized, combined text of this element and all its children. Whitespace is normalized and * trimmed. *

For example, given HTML {@code

Hello there now!

}, {@code p.text()} returns {@code "Hello there * now!"} *

If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not * children), use {@link #ownText()} *

Note that this method returns the textual content that would be presented to a reader. The contents of data * nodes (such as {@code