All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.styledxmlparser.jsoup.nodes.Element Maven / Gradle / Ivy

There is a newer version: 9.0.0
Show newest version
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2021 iText Group NV
    Authors: iText Software.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License version 3
    as published by the Free Software Foundation with the addition of the
    following permission added to Section 15 as permitted in Section 7(a):
    FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
    ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
    OF THIRD PARTY RIGHTS

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE.
    See the GNU Affero General Public License for more details.
    You should have received a copy of the GNU Affero General Public License
    along with this program; if not, see http://www.gnu.org/licenses or write to
    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
    Boston, MA, 02110-1301 USA, or download the license from the following URL:
    http://itextpdf.com/terms-of-use/

    The interactive user interfaces in modified source and object code versions
    of this program must display Appropriate Legal Notices, as required under
    Section 5 of the GNU Affero General Public License.

    In accordance with Section 7(b) of the GNU Affero General Public License,
    a covered work must retain the producer line in every PDF that is created
    or manipulated using iText.

    You can be released from the requirements of the license by purchasing
    a commercial license. Buying such a license is mandatory as soon as you
    develop commercial activities involving the iText software without
    disclosing the source code of your own applications.
    These activities include: offering paid services to customers as an ASP,
    serving PDFs on the fly in a web application, shipping iText with a closed
    source product.

    For more information, please contact iText Software Corp. at this
    address: [email protected]
 */
package com.itextpdf.styledxmlparser.jsoup.nodes;

import com.itextpdf.styledxmlparser.jsoup.helper.Validate;
import com.itextpdf.styledxmlparser.jsoup.helper.StringUtil;
import com.itextpdf.styledxmlparser.jsoup.parser.Parser;
import com.itextpdf.styledxmlparser.jsoup.parser.Tag;
import com.itextpdf.styledxmlparser.jsoup.select.Collector;
import com.itextpdf.styledxmlparser.jsoup.select.Elements;
import com.itextpdf.styledxmlparser.jsoup.select.Evaluator;
import com.itextpdf.styledxmlparser.jsoup.select.NodeTraversor;
import com.itextpdf.styledxmlparser.jsoup.select.NodeVisitor;
import com.itextpdf.styledxmlparser.jsoup.select.Selector;

import java.io.IOException;
import com.itextpdf.io.util.MessageFormatUtil;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * A HTML element consists of a tag name, attributes, and child nodes (including text nodes and
 * other elements).
 *
 * From an Element, you can extract data, traverse the node graph, and manipulate the HTML.
 *
 * @author Jonathan Hedley, [email protected]
 */
public class Element extends Node {
    private Tag tag;

    private static final Pattern classSplit = Pattern.compile("\\s+");

    /**
     * Create a new, standalone Element. (Standalone in that is has no parent.)
     *
     * @param tag tag of this element
     * @param baseUri the base URI
     * @param attributes initial attributes
     * @see #appendChild(Node)
     * @see #appendElement(String)
     */
    public Element(Tag tag, String baseUri, Attributes attributes) {
        super(baseUri, attributes);

        Validate.notNull(tag);
        this.tag = tag;
    }

    /**
     * Create a new Element from a tag and a base URI.
     *
     * @param tag element tag
     * @param baseUri the base URI of this element. It is acceptable for the base URI to be an empty
     *            string, but not null.
     * @see Tag#valueOf(String)
     */
    public Element(Tag tag, String baseUri) {
        this(tag, baseUri, new Attributes());
    }

    @Override
    public String nodeName() {
        return tag.getName();
    }

    /**
     * Get the name of the tag for this element. E.g. {@code div}
     *
     * @return the tag name
     */
    public String tagName() {
        return tag.getName();
    }

    /**
     * Change the tag of this element. For example, convert a {@code } to a {@code 
} with * {@code el.tagName("div");}. * * @param tagName new tag name for this element * @return this element, for chaining */ public Element tagName(String tagName) { Validate.notEmpty(tagName, "Tag name must not be empty."); tag = Tag.valueOf(tagName); return this; } /** * Get the Tag for this element. * * @return the tag object */ public Tag tag() { return tag; } /** * Test if this element is a block-level element. (E.g. {@code
== true} or an inline element * {@code

== false}). * * @return true if block, false if not (and thus inline) */ public boolean isBlock() { return tag.isBlock(); } /** * Get the {@code id} attribute of this element. * * @return The id attribute, if present, or an empty string if not. */ public String id() { return attributes.get("id"); } /** * Set an attribute value on this element. If this element already has an attribute with the * key, its value is updated; otherwise, a new attribute is added. * * @return this element */ public Node attr(String attributeKey, String attributeValue) { super.attr(attributeKey, attributeValue); return this; } /** * Set a boolean attribute value on this element. Setting to true sets the attribute value to "" and * marks the attribute as boolean so no value is written out. Setting to false removes the attribute * with the same key if it exists. * * @param attributeKey the attribute key * @param attributeValue the attribute value * * @return this element */ public Element attr(String attributeKey, boolean attributeValue) { attributes.put(attributeKey, attributeValue); return this; } /** * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key * starting with "data-" is included the dataset. *

* E.g., the element {@code

...} has the dataset * {@code package=jsoup, language=java}. *

* This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected * in the other map. *

* You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. * @return a map of {@code key=value} custom data attributes. */ public Map dataset() { return attributes.dataset(); } @Override public final Node parent() { return parentNode; } /** * Get this element's parent and ancestors, up to the document root. * @return this element's stack of parents, closest first. */ public Elements parents() { Elements parents = new Elements(); accumulateParents(this, parents); return parents; } private static void accumulateParents(Element el, Elements parents) { Element parent = (Element) el.parent(); if (parent != null && !parent.tagName().equals("#root")) { parents.add(parent); accumulateParents(parent, parents); } } /** * Get a child element of this element, by its 0-based index number. *

* Note that an element can have both mixed Nodes and Elements as children. This method inspects * a filtered list of children that are elements, and the index is based on that filtered list. * * @param index the index number of the element to retrieve * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} * @see #childNode(int) */ public Element child(int index) { return children().get(index); } /** * Get this element's child elements. *

* This is effectively a filter on {@link #childNodes()} to get Element nodes. * * @return child elements. If this element has no children, returns an * empty list. * @see #childNodes() */ public Elements children() { // create on the fly rather than maintaining two lists. if gets slow, memoize, and mark dirty on change List elements = new ArrayList(childNodes.size()); for (Node node : childNodes) { if (node instanceof Element) elements.add((Element) node); } return new Elements(elements); } /** * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. *

* This is effectively a filter on {@link #childNodes()} to get Text nodes. * @return child text nodes. If this element has no text nodes, returns an * empty list. *

* For example, with the input HTML: {@code

One Two Three
Four

} with the {@code p} element selected: *
    *
  • {@code p.text()} = {@code "One Two Three Four"} *
  • {@code p.ownText()} = {@code "One Three Four"} *
  • {@code p.children()} = {@code Elements[,
    ]} *
  • {@code p.childNodes()} = {@code List["One ", , " Three ",
    , " Four"]} *
  • {@code p.textNodes()} = {@code List["One ", " Three ", " Four"]} *
*/ public List textNodes() { List textNodes = new ArrayList(); for (Node node : childNodes) { if (node instanceof TextNode) textNodes.add((TextNode) node); } return Collections.unmodifiableList(textNodes); } /** * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. *

* This is effectively a filter on {@link #childNodes()} to get Data nodes. * * @return child data nodes. If this element has no data nodes, returns an * empty list. * @see #data() */ public List dataNodes() { List dataNodes = new ArrayList(); for (Node node : childNodes) { if (node instanceof DataNode) dataNodes.add((DataNode) node); } return Collections.unmodifiableList(dataNodes); } /** * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements * may include this element, or any of its children. *

* This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because * multiple filters can be combined, e.g.: * *

    *
  • {@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes) *
  • {@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely) *
* * See the query syntax documentation in {@link Selector}. * * @param cssQuery a {@link Selector} CSS-like query * @return elements that match the query (empty if none match) * @see Selector * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. */ public Elements select(String cssQuery) { return Selector.select(cssQuery, this); } /** * Add a node child node to this element. * * @param child node to add. * @return this element, so that you can add more child nodes or elements. */ public Element appendChild(Node child) { Validate.notNull(child); // was - Node#addChildren(child). short-circuits an array create and a loop. reparentChild(child); ensureChildNodes(); childNodes.add(child); child.setSiblingIndex(childNodes.size() - 1); return this; } /** * Add a node to the start of this element's children. * * @param child node to add. * @return this element, so that you can add more child nodes or elements. */ public Element prependChild(Node child) { return insertChild(0, child); } /** * Inserts the given child node into this element at the specified index. Current node will be shifted to the * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the node first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the * end * @param child child node to insert * @return this element, for chaining. */ public Element insertChild(int index, Node child) { if (index == -1) { return appendChild(child); } Validate.notNull(child); addChildren(index, child); return this; } /** * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. * * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the * end * @param children child nodes to insert * @return this element, for chaining. */ public Element insertChildren(int index, Collection children) { Validate.notNull(children, "Children collection to be inserted must not be null."); int currentSize = childNodeSize(); if (index < 0) index += currentSize +1; // roll around Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); ArrayList nodes = new ArrayList(children); Node[] nodeArray = nodes.toArray(new Node[nodes.size()]); addChildren(index, nodeArray); return this; } /** * Create a new element by tag name, and add it as the last child. * * @param tagName the name of the tag (e.g. {@code div}). * @return the new element, to allow you to add content to it, e.g.: * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} */ public Element appendElement(String tagName) { Element child = new Element(Tag.valueOf(tagName), baseUri()); appendChild(child); return child; } /** * Create a new element by tag name, and add it as the first child. * * @param tagName the name of the tag (e.g. {@code div}). * @return the new element, to allow you to add content to it, e.g.: * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} */ public Element prependElement(String tagName) { Element child = new Element(Tag.valueOf(tagName), baseUri()); prependChild(child); return child; } /** * Create and append a new TextNode to this element. * * @param text the unencoded text to add * @return this element */ public Element appendText(String text) { Validate.notNull(text); TextNode node = new TextNode(text, baseUri()); appendChild(node); return this; } /** * Create and prepend a new TextNode to this element. * * @param text the unencoded text to add * @return this element */ public Element prependText(String text) { Validate.notNull(text); TextNode node = new TextNode(text, baseUri()); prependChild(node); return this; } /** * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. * @param html HTML to add inside this element, after the existing HTML * @return this element * @see #html(String) */ public Element append(String html) { Validate.notNull(html); List nodes = Parser.parseFragment(html, this, baseUri()); addChildren(nodes.toArray(new Node[nodes.size()])); return this; } /** * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. * @param html HTML to add inside this element, before the existing HTML * @return this element * @see #html(String) */ public Element prepend(String html) { Validate.notNull(html); List nodes = Parser.parseFragment(html, this, baseUri()); addChildren(0, nodes.toArray(new Node[nodes.size()])); return this; } /** * Insert the specified HTML into the DOM before this element (as a preceding sibling). * * @param html HTML to add before this element * @return this element, for chaining * @see #after(String) */ @Override public Node before(String html) { return super.before(html); } /** * Insert the specified node into the DOM before this node (as a preceding sibling). * @param node to add before this element * @return this Element, for chaining * @see #after(Node) */ @Override public Node before(Node node) { return super.before(node); } /** * Insert the specified HTML into the DOM after this element (as a following sibling). * * @param html HTML to add after this element * @return this element, for chaining * @see #before(String) */ @Override public Node after(String html) { return (Element) super.after(html); } /** * Insert the specified node into the DOM after this node (as a following sibling). * @param node to add after this element * @return this element, for chaining * @see #before(Node) */ @Override public Node after(Node node) { return (Element) super.after(node); } /** * Remove all of the element's child nodes. Any attributes are left as-is. * @return this element */ public Element empty() { childNodes.clear(); return this; } /** * Wrap the supplied HTML around this element. * * @param html HTML to wrap around this element, e.g. {@code
}. Can be arbitrarily deep. * @return this element, for chaining. */ @Override public Node wrap(String html) { return super.wrap(html); } /** * Get a CSS selector that will uniquely select this element. *

* If the element has an ID, returns #id; * otherwise returns the parent (if any) CSS selector, followed by {@literal '>'}, * followed by a unique selector for the element (tag.class.class:nth-child(n)). * * @return the CSS Path that can be used to retrieve the element in a selector. */ public String cssSelector() { if (id().length() > 0) return "#" + id(); // Translate HTML namespace ns:tag to CSS namespace syntax ns|tag String tagName = tagName().replace(':', '|'); StringBuilder selector = new StringBuilder(tagName); String classes = StringUtil.join(classNames(), "."); if (classes.length() > 0) selector.append('.').append(classes); if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node return selector.toString(); selector.insert(0, " > "); if (((Element) parent()).select(selector.toString()).size() > 1) selector.append(MessageFormatUtil.format( ":nth-child({0})", elementSiblingIndex() + 1)); return ((Element) parent()).cssSelector() + selector.toString(); } /** * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling * of itself, so will not be included in the returned list. * @return sibling elements */ public Elements siblingElements() { if (parentNode == null) return new Elements(0); List elements = ((Element) parent()).children(); Elements siblings = new Elements(elements.size() - 1); for (Element el: elements) if (el != this) siblings.add(el); return siblings; } /** * Gets the next sibling element of this element. E.g., if a {@code div} contains two {@code p}s, * the {@code nextElementSibling} of the first {@code p} is the second {@code p}. *

* This is similar to {@link #nextSibling()}, but specifically finds only Elements * * @return the next element, or null if there is no next element * @see #previousElementSibling() */ public Element nextElementSibling() { if (parentNode == null) return null; List siblings = ((Element) parent()).children(); int index = indexInList(this, siblings); Validate.isTrue(index >= 0); //Validate.notNull(index); if (siblings.size() > index+1) return siblings.get(index+1); else return null; } /** * Gets the previous element sibling of this element. * @return the previous element, or null if there is no previous element * @see #nextElementSibling() */ public Element previousElementSibling() { if (parentNode == null) return null; List siblings = ((Element) parent()).children(); int index = indexInList(this, siblings); Validate.isTrue(index >= 0); if (index > 0) return siblings.get(index-1); else return null; } /** * Gets the first element sibling of this element. * @return the first sibling that is an element (aka the parent's first element child) */ public Element firstElementSibling() { // todo: should firstSibling() exclude this? List siblings = ((Element) parent()).children(); return siblings.size() > 1 ? siblings.get(0) : null; } /** * Get the list index of this element in its element sibling list. I.e. if this is the first element * sibling, returns 0. * @return position in element sibling list */ public int elementSiblingIndex() { if (parent() == null) return 0; return indexInList(this, ((Element) parent()).children()); } /** * Gets the last element sibling of this element * @return the last sibling that is an element (aka the parent's last element child) */ public Element lastElementSibling() { List siblings = ((Element) parent()).children(); return siblings.size() > 1 ? siblings.get(siblings.size() - 1) : null; } private static int indexInList(Element search, List elements) { Validate.notNull(search); Validate.notNull(elements); for (int i = 0; i < elements.size(); i++) { E element = elements.get(i); if (element == search) return i; } return -1; } // DOM type methods /** * Finds elements, including and recursively under this element, with the specified tag name. * @param tagName The tag name to search for (case insensitively). * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. */ public Elements getElementsByTag(String tagName) { Validate.notEmpty(tagName); tagName = tagName.toLowerCase().trim(); return Collector.collect(new Evaluator.Tag(tagName), this); } /** * Find an element by ID, including or under this element. *

* Note that this finds the first matching ID, starting with this element. If you search down from a different * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, * use {@link Document#getElementById(String)} * @param id The ID to search for. * @return The first matching element by ID, starting with this element, or null if none found. */ public Element getElementById(String id) { Validate.notEmpty(id); Elements elements = Collector.collect(new Evaluator.Id(id), this); if (elements.size() > 0) return elements.get(0); else return null; } /** * Find elements that have this class, including or under this element. Case insensitive. *

* Elements can have multiple classes (e.g. {@code

}. This method * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. * * @param className the name of the class to search for. * @return elements with the supplied class name, empty if none * @see #hasClass(String) * @see #classNames() */ public Elements getElementsByClass(String className) { Validate.notEmpty(className); return Collector.collect(new Evaluator.Class(className), this); } /** * Find elements that have a named attribute set. Case insensitive. * * @param key name of the attribute, e.g. {@code href} * @return elements that have this attribute, empty if none */ public Elements getElementsByAttribute(String key) { Validate.notEmpty(key); key = key.trim().toLowerCase(); return Collector.collect(new Evaluator.Attribute(key), this); } /** * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements * that have HTML5 datasets. * @param keyPrefix name prefix of the attribute e.g. {@code data-} * @return elements that have attribute names that start with with the prefix, empty if none. */ public Elements getElementsByAttributeStarting(String keyPrefix) { Validate.notEmpty(keyPrefix); keyPrefix = keyPrefix.trim().toLowerCase(); return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); } /** * Find elements that have an attribute with the specific value. Case insensitive. * * @param key name of the attribute * @param value value of the attribute * @return elements that have this attribute with this value, empty if none */ public Elements getElementsByAttributeValue(String key, String value) { return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); } /** * Find elements that either do not have this attribute, or have it with a different value. Case insensitive. * * @param key name of the attribute * @param value value of the attribute * @return elements that do not have a matching attribute */ public Elements getElementsByAttributeValueNot(String key, String value) { return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); } /** * Find elements that have attributes that start with the value prefix. Case insensitive. * * @param key name of the attribute * @param valuePrefix start of attribute value * @return elements that have attributes that start with the value prefix */ public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); } /** * Find elements that have attributes that end with the value suffix. Case insensitive. * * @param key name of the attribute * @param valueSuffix end of the attribute value * @return elements that have attributes that end with the value suffix */ public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); } /** * Find elements that have attributes whose value contains the match string. Case insensitive. * * @param key name of the attribute * @param match substring of value to search for * @return elements that have attributes containing this text */ public Elements getElementsByAttributeValueContaining(String key, String match) { return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); } /** * Find elements that have attributes whose values match the supplied regular expression. * @param key name of the attribute * @param pattern compiled regular expression to match against attribute values * @return elements that have attributes matching this regular expression */ public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); } /** * Find elements that have attributes whose values match the supplied regular expression. * @param key name of the attribute * @param regex regular expression to match against attribute values. * You can use embedded flags * (such as (?i) and (?m) to control regex options. * @return elements that have attributes matching this regular expression */ public Elements getElementsByAttributeValueMatching(String key, String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsByAttributeValueMatching(key, pattern); } /** * Find elements whose sibling index is less than the supplied index. * @param index 0-based index * @return elements less than index */ public Elements getElementsByIndexLessThan(int index) { return Collector.collect(new Evaluator.IndexLessThan(index), this); } /** * Find elements whose sibling index is greater than the supplied index. * @param index 0-based index * @return elements greater than index */ public Elements getElementsByIndexGreaterThan(int index) { return Collector.collect(new Evaluator.IndexGreaterThan(index), this); } /** * Find elements whose sibling index is equal to the supplied index. * @param index 0-based index * @return elements equal to index */ public Elements getElementsByIndexEquals(int index) { return Collector.collect(new Evaluator.IndexEquals(index), this); } /** * Find elements that contain the specified string. The search is case insensitive. The text may appear directly * in the element, or in any of its descendants. * @param searchText to look for in the element's text * @return elements that contain the string, case insensitive. * @see Element#text() */ public Elements getElementsContainingText(String searchText) { return Collector.collect(new Evaluator.ContainsText(searchText), this); } /** * Find elements that directly contain the specified string. The search is case insensitive. The text must appear directly * in the element, not in any of its descendants. * @param searchText to look for in the element's own text * @return elements that contain the string, case insensitive. * @see Element#ownText() */ public Elements getElementsContainingOwnText(String searchText) { return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); } /** * Find elements whose text matches the supplied regular expression. * @param pattern regular expression to match text against * @return elements matching the supplied regular expression. * @see Element#text() */ public Elements getElementsMatchingText(Pattern pattern) { return Collector.collect(new Evaluator.Matches(pattern), this); } /** * Find elements whose text matches the supplied regular expression. * @param regex regular expression to match text against. * You can use embedded flags * (such as (?i) and (?m) to control regex options. * @return elements matching the supplied regular expression. * @see Element#text() */ public Elements getElementsMatchingText(String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsMatchingText(pattern); } /** * Find elements whose own text matches the supplied regular expression. * @param pattern regular expression to match text against * @return elements matching the supplied regular expression. * @see Element#ownText() */ public Elements getElementsMatchingOwnText(Pattern pattern) { return Collector.collect(new Evaluator.MatchesOwn(pattern), this); } /** * Find elements whose text matches the supplied regular expression. * @param regex regular expression to match text against. * You can use embedded flags * (such as (?i) and (?m) to control regex options. * @return elements matching the supplied regular expression. * @see Element#ownText() */ public Elements getElementsMatchingOwnText(String regex) { Pattern pattern; try { pattern = Pattern.compile(regex); } catch (PatternSyntaxException e) { throw new IllegalArgumentException("Pattern syntax error: " + regex, e); } return getElementsMatchingOwnText(pattern); } /** * Find all elements under this element (including self, and children of children). * * @return all elements */ public Elements getAllElements() { return Collector.collect(new Evaluator.AllElements(), this); } /** * Gets the combined text of this element and all its children. Whitespace is normalized and trimmed. *

* For example, given HTML {@code

Hello there now!

}, {@code p.text()} returns {@code "Hello there now!"} * * @return unencoded text, or empty string if none. * @see #ownText() * @see #textNodes() */ public String text() { final StringBuilder accum = new StringBuilder(); new NodeTraversor(new NodeVisitor() { public void head(Node node, int depth) { if (node instanceof TextNode) { TextNode textNode = (TextNode) node; appendNormalisedText(accum, textNode); } else if (node instanceof Element) { Element element = (Element) node; if (accum.length() > 0 && (element.isBlock() || element.tag.getName().equals("br")) && !TextNode.lastCharIsWhitespace(accum)) accum.append(" "); } } public void tail(Node node, int depth) { } }).traverse(this); return accum.toString().trim(); } /** * Gets the text owned by this element only; does not get the combined text of all children. *

* For example, given HTML {@code

Hello there now!

}, {@code p.ownText()} returns {@code "Hello now!"}, * whereas {@code p.text()} returns {@code "Hello there now!"}. * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. * * @return unencoded text, or empty string if none. * @see #text() * @see #textNodes() */ public String ownText() { StringBuilder sb = new StringBuilder(); ownText(sb); return sb.toString().trim(); } private void ownText(StringBuilder accum) { for (Node child : childNodes) { if (child instanceof TextNode) { TextNode textNode = (TextNode) child; appendNormalisedText(accum, textNode); } else if (child instanceof Element) { appendWhitespaceIfBr((Element) child, accum); } } } private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parentNode)) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, TextNode.lastCharIsWhitespace(accum)); } private static void appendWhitespaceIfBr(Element element, StringBuilder accum) { if (element.tag.getName().equals("br") && !TextNode.lastCharIsWhitespace(accum)) accum.append(" "); } static boolean preserveWhitespace(Node node) { // looks only at this element and one level up, to prevent recursion & needless stack searches if (node != null && node instanceof Element) { Element element = (Element) node; return element.tag.preserveWhitespace() || element.parent() != null && ((Element) element.parent()).tag.preserveWhitespace(); } return false; } /** * Set the text of this element. Any existing contents (text or elements) will be cleared * @param text unencoded text * @return this element */ public Element text(String text) { Validate.notNull(text); empty(); TextNode textNode = new TextNode(text, baseUri); appendChild(textNode); return this; } /** Test if this element has any text content (that is not just whitespace). @return true if element has non-blank text content. */ public boolean hasText() { for (Node child: childNodes) { if (child instanceof TextNode) { TextNode textNode = (TextNode) child; if (!textNode.isBlank()) return true; } else if (child instanceof Element) { Element el = (Element) child; if (el.hasText()) return true; } } return false; } /** * Get the combined data of this element. Data is e.g. the inside of a {@code script} tag. * @return the data, or empty string if none * * @see #dataNodes() */ public String data() { StringBuilder sb = new StringBuilder(); for (Node childNode : childNodes) { if (childNode instanceof DataNode) { DataNode data = (DataNode) childNode; sb.append(data.getWholeData()); } else if (childNode instanceof Element) { Element element = (Element) childNode; String elementData = element.data(); sb.append(elementData); } } return sb.toString(); } /** * Gets the literal value of this element's "class" attribute, which may include multiple class names, space * separated. (E.g. on <div class="header gray"> returns, "header gray") * @return The literal class attribute, or empty string if no class attribute set. */ public String className() { return attr("class").trim(); } /** * Get all of the element's class names. E.g. on element {@code
}, * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. * @return set of classnames, empty if no class attribute */ public Set classNames() { String[] names = classSplit.split(className()); Set classNames = new LinkedHashSet(Arrays.asList(names)); classNames.remove(""); // if classNames() was empty, would include an empty class return classNames; } /** Set the element's {@code class} attribute to the supplied class names. @param classNames set of classes @return this element, for chaining */ public Element classNames(Set classNames) { Validate.notNull(classNames); attributes.put("class", StringUtil.join(classNames, " ")); return this; } /** * Tests if this element has a class. Case insensitive. * @param className name of class to check for * @return true if it does, false if not */ /* Used by common .class selector, so perf tweaked to reduce object creation vs hitting classnames(). Wiki: 71, 13 (5.4x) CNN: 227, 91 (2.5x) Alterslash: 59, 4 (14.8x) Jsoup: 14, 1 (14x) */ public boolean hasClass(String className) { String classAttr = attributes.get("class"); if (classAttr.equals("") || classAttr.length() < className.length()) return false; final String[] classes = classSplit.split(classAttr); for (String name : classes) { if (className.equalsIgnoreCase(name)) return true; } return false; } /** Add a class name to this element's {@code class} attribute. @param className class name to add @return this element */ public Element addClass(String className) { Validate.notNull(className); Set classes = classNames(); classes.add(className); classNames(classes); return this; } /** Remove a class name from this element's {@code class} attribute. @param className class name to remove @return this element */ public Element removeClass(String className) { Validate.notNull(className); Set classes = classNames(); classes.remove(className); classNames(classes); return this; } /** Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. @param className class name to toggle @return this element */ public Element toggleClass(String className) { Validate.notNull(className); Set classes = classNames(); if (classes.contains(className)) classes.remove(className); else classes.add(className); classNames(classes); return this; } /** * Get the value of a form element (input, textarea, etc). * @return the value of the form element, or empty string if not set. */ public String val() { if (tagName().equals("textarea")) return text(); else return attr("value"); } /** * Set the value of a form element (input, textarea, etc). * @param value value to set * @return this element (for chaining) */ public Element val(String value) { if (tagName().equals("textarea")) text(value); else attr("value", value); return this; } void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { if (out.prettyPrint() && (tag.formatAsBlock() || (parent() != null && ((Element) parent()).tag().formatAsBlock()) || out.outline())) { if (accum instanceof StringBuilder) { if (((StringBuilder) accum).length() > 0) indent(accum, depth, out); } else { indent(accum, depth, out); } } accum .append("<") .append(tagName()); attributes.html(accum, out); // selfclosing includes unknown tags, isEmpty defines tags that are always empty if (childNodes.isEmpty() && tag.isSelfClosing()) { if (out.syntax() == Document.OutputSettings.Syntax.html && tag.isEmpty()) accum.append('>'); else accum.append(" />"); // in html, in xml } else accum.append(">"); } void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) throws IOException { if (!(childNodes.isEmpty() && tag.isSelfClosing())) { if (out.prettyPrint() && (!childNodes.isEmpty() && ( tag.formatAsBlock() || (out.outline() && (childNodes.size()>1 || (childNodes.size()==1 && !(childNodes.get(0) instanceof TextNode)))) ))) indent(accum, depth, out); accum.append(""); } } /** * Retrieves the element's inner HTML. E.g. on a {@code
} with one empty {@code

}, would return * {@code

}. (Whereas {@link #outerHtml()} would return {@code

}.) * * @return String of HTML. * @see #outerHtml() */ public String html() { StringBuilder accum = new StringBuilder(); html(accum); return getOutputSettings().prettyPrint() ? accum.toString().trim() : accum.toString(); } /** * {@inheritDoc} */ @Override public Appendable html(Appendable appendable) { for (Node node : childNodes) node.outerHtml(appendable); return appendable; } /** * Set this element's inner HTML. Clears the existing HTML first. * @param html HTML to parse and set into this element * @return this element * @see #append(String) */ public Element html(String html) { empty(); append(html); return this; } public String toString() { return outerHtml(); } @Override public Object clone() { return super.clone(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy