All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.styledxmlparser.jsoup.select.Elements Maven / Gradle / Ivy

There is a newer version: 9.0.0
Show newest version
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2024 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see .
 */
package com.itextpdf.styledxmlparser.jsoup.select;

import com.itextpdf.styledxmlparser.jsoup.helper.Validate;
import com.itextpdf.styledxmlparser.jsoup.internal.StringUtil;
import com.itextpdf.styledxmlparser.jsoup.nodes.Comment;
import com.itextpdf.styledxmlparser.jsoup.nodes.DataNode;
import com.itextpdf.styledxmlparser.jsoup.nodes.Element;
import com.itextpdf.styledxmlparser.jsoup.nodes.FormElement;
import com.itextpdf.styledxmlparser.jsoup.nodes.Node;
import com.itextpdf.styledxmlparser.jsoup.nodes.TextNode;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

/**
 * A list of {@link Element}s, with methods that act on every element in the list.
 * 

* To get an {@code Elements} object, use the {@link Element#select(String)} method. * * @author Jonathan Hedley, [email protected] */ public class Elements extends ArrayList { public Elements() { } public Elements(int initialCapacity) { super(initialCapacity); } public Elements(Collection elements) { super(elements); } public Elements(List elements) { super(elements); } public Elements(Element... elements) { super(Arrays.asList(elements)); } /** * Creates a deep copy of these elements. * * @return a deep copy */ @Override public Object clone() { Elements clone = new Elements(size()); for(Element e : this) clone.add((Element) e.clone()); return clone; } // attribute methods /** * Get an attribute value from the first matched element that has the attribute. * * @param attributeKey The attribute key. * @return The attribute value from the first matched element that has the attribute.. If no elements were matched (isEmpty() == true), * or if the no elements have the attribute, returns empty string. * @see #hasAttr(String) */ public String attr(String attributeKey) { for (Element element : this) { if (element.hasAttr(attributeKey)) return element.attr(attributeKey); } return ""; } /** * Checks if any of the matched elements have this attribute defined. * * @param attributeKey attribute key * @return true if any of the elements have the attribute; false if none do. */ public boolean hasAttr(String attributeKey) { for (Element element : this) { if (element.hasAttr(attributeKey)) return true; } return false; } /** * Get the attribute value for each of the matched elements. If an element does not have this attribute, no value is * included in the result set for that element. * @param attributeKey the attribute name to return values for. You can add the {@code abs:} prefix to the key to * get absolute URLs from relative URLs, e.g.: {@code doc.select("a").eachAttr("abs:href")} . * @return a list of each element's attribute value for the attribute */ public List eachAttr(String attributeKey) { List attrs = new ArrayList<>(size()); for (Element element : this) { if (element.hasAttr(attributeKey)) attrs.add(element.attr(attributeKey)); } return attrs; } /** * Set an attribute on all matched elements. * @param attributeKey attribute key * @param attributeValue attribute value * @return this */ public Elements attr(String attributeKey, String attributeValue) { for (Element element : this) { element.attr(attributeKey, attributeValue); } return this; } /** * Remove an attribute from every matched element. * @param attributeKey The attribute to remove. * @return this (for chaining) */ public Elements removeAttr(String attributeKey) { for (Element element : this) { element.removeAttr(attributeKey); } return this; } /** * Add the class name to every matched element's {@code class} attribute. * * @param className class name to add * @return this */ public Elements addClass(String className) { for (Element element : this) { element.addClass(className); } return this; } /** * Remove the class name from every matched element's {@code class} attribute, if present. * * @param className class name to remove * @return this */ public Elements removeClass(String className) { for (Element element : this) { element.removeClass(className); } return this; } /** * Toggle the class name on every matched element's {@code class} attribute. * * @param className class name to add if missing, or remove if present, from every element. * @return this */ public Elements toggleClass(String className) { for (Element element : this) { element.toggleClass(className); } return this; } /** * Determine if any of the matched elements have this class name set in their {@code class} attribute. * * @param className class name to check for * @return true if any do, false if none do */ public boolean hasClass(String className) { for (Element element : this) { if (element.hasClass(className)) return true; } return false; } /** * Get the form element's value of the first matched element. * @return The form element's value, or empty if not set. * @see Element#val() */ public String val() { if (size() > 0) //noinspection ConstantConditions return first().val(); // first() != null as size() > 0 else return ""; } /** * Set the form element's value in each of the matched elements. * @param value The value to set into each matched element * @return this (for chaining) */ public Elements val(String value) { for (Element element : this) element.val(value); return this; } /** * Get the combined text of all the matched elements. *

* Note that it is possible to get repeats if the matched elements contain both parent elements and their own * children, as the Element.text() method returns the combined text of a parent and all its children. * @return string of all text: unescaped and no HTML. * @see Element#text() * @see #eachText() */ public String text() { StringBuilder sb = StringUtil.borrowBuilder(); for (Element element : this) { if (sb.length() != 0) sb.append(" "); sb.append(element.text()); } return StringUtil.releaseBuilder(sb); } /** * Test if any matched Element has any text content, that is not just whitespace. * * @return true if any element has non-blank text content. * @see Element#hasText() */ public boolean hasText() { for (Element element: this) { if (element.hasText()) return true; } return false; } /** * Get the text content of each of the matched elements. If an element has no text, then it is not included in the * result. * @return A list of each matched element's text content. * @see Element#text() * @see Element#hasText() * @see #text() */ public List eachText() { ArrayList texts = new ArrayList<>(size()); for (Element el: this) { if (el.hasText()) texts.add(el.text()); } return texts; } /** * Get the combined inner HTML of all matched elements. * @return string of all element's inner HTML. * @see #text() * @see #outerHtml() */ public String html() { StringBuilder sb = StringUtil.borrowBuilder(); for (Element element : this) { if (sb.length() != 0) sb.append("\n"); sb.append(element.html()); } return StringUtil.releaseBuilder(sb); } /** * Get the combined outer HTML of all matched elements. * @return string of all element's outer HTML. * @see #text() * @see #html() */ public String outerHtml() { StringBuilder sb = StringUtil.borrowBuilder(); for (Element element : this) { if (sb.length() != 0) sb.append("\n"); sb.append(element.outerHtml()); } return StringUtil.releaseBuilder(sb); } /** * Get the combined outer HTML of all matched elements. Alias of {@link #outerHtml()}. * @return string of all element's outer HTML. * @see #text() * @see #html() */ @Override public String toString() { return outerHtml(); } /** * Update (rename) the tag name of each matched element. For example, to change each {@code } to a {@code }, do * {@code doc.select("i").tagName("em");} * * @param tagName the new tag name * @return this, for chaining * @see Element#tagName(String) */ public Elements tagName(String tagName) { for (Element element : this) { element.tagName(tagName); } return this; } /** * Set the inner HTML of each matched element. * @param html HTML to parse and set into each matched element. * @return this, for chaining * @see Element#html(String) */ public Elements html(String html) { for (Element element : this) { element.html(html); } return this; } /** * Add the supplied HTML to the start of each matched element's inner HTML. * @param html HTML to add inside each element, before the existing HTML * @return this, for chaining * @see Element#prepend(String) */ public Elements prepend(String html) { for (Element element : this) { element.prepend(html); } return this; } /** * Add the supplied HTML to the end of each matched element's inner HTML. * @param html HTML to add inside each element, after the existing HTML * @return this, for chaining * @see Element#append(String) */ public Elements append(String html) { for (Element element : this) { element.append(html); } return this; } /** * Insert the supplied HTML before each matched element's outer HTML. * @param html HTML to insert before each element * @return this, for chaining * @see Element#before(String) */ public Elements before(String html) { for (Element element : this) { element.before(html); } return this; } /** * Insert the supplied HTML after each matched element's outer HTML. * @param html HTML to insert after each element * @return this, for chaining * @see Element#after(String) */ public Elements after(String html) { for (Element element : this) { element.after(html); } return this; } /** * Wrap the supplied HTML around each matched elements. For example, with HTML * {@code

This is Jsoup

}, * doc.select("b").wrap("<i></i>"); * becomes {@code

This is jsoup

} * * @param html HTML to wrap around each element, e.g. {@code
}. Can be arbitrarily deep. * @return this (for chaining) * @see Element#wrap */ public Elements wrap(String html) { Validate.notEmpty(html); for (Element element : this) { element.wrap(html); } return this; } /** * Removes the matched elements from the DOM, and moves their children up into their parents. This has the effect of * dropping the elements but keeping their children. *

* This is useful for e.g removing unwanted formatting elements but keeping their contents. * * E.g. with HTML:

* {@code

One Two
} *

* {@code doc.select("font").unwrap();} *

* HTML = {@code

One Two
} * * @return this (for chaining) * @see Node#unwrap */ public Elements unwrap() { for (Element element : this) { element.unwrap(); } return this; } /** * Empty (remove all child nodes from) each matched element. This is similar to setting the inner HTML of each * element to nothing. *

* E.g. HTML: {@code

Hello there

now

}
* doc.select("p").empty();
* HTML = {@code

} * * @return this, for chaining * @see Element#empty() * @see #remove() */ public Elements empty() { for (Element element : this) { element.empty(); } return this; } /** * Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing. *

* E.g. HTML: {@code

Hello

there

}
* doc.select("p").remove();
* HTML = {@code
} *

* Note that this method should not be used to clean user-submitted HTML; rather, use {@link com.itextpdf.styledxmlparser.jsoup.safety.Cleaner} to clean HTML. * * @return this, for chaining * @see Element#empty() * @see #empty() */ public Elements remove() { for (Element element : this) { element.remove(); } return this; } // filters /** * Find matching elements within this element list. * @param query A {@link Selector} query * @return the filtered list of elements, or an empty list if none match. */ public Elements select(String query) { return Selector.select(query, this); } /** * Remove elements from this list that match the {@link Selector} query. *

* E.g. HTML: {@code

Two
}
* Elements divs = doc.select("div").not(".logo");
* Result: {@code divs: [
Two
]} *

* @param query the selector query whose results should be removed from these elements * @return a new elements list that contains only the filtered results */ public Elements not(String query) { Elements out = Selector.select(query, this); return Selector.filterOut(this, out); } /** * Get the nth matched element as an Elements object. *

* See also {@link #get(int)} to retrieve an Element. * @param index the (zero-based) index of the element in the list to retain * @return Elements containing only the specified element, or, if that element did not exist, an empty list. */ public Elements eq(int index) { return size() > index ? new Elements(get(index)) : new Elements(); } /** * Test if any of the matched elements match the supplied query. * @param query A selector * @return true if at least one element in the list matches the query. */ public boolean is(String query) { Evaluator eval = QueryParser.parse(query); for (Element e : this) { if (e.is(eval)) return true; } return false; } /** * Get the immediate next element sibling of each element in this list. * @return next element siblings. */ public Elements next() { return siblings(null, true, false); } /** * Get the immediate next element sibling of each element in this list, filtered by the query. * @param query CSS query to match siblings against * @return next element siblings. */ public Elements next(String query) { return siblings(query, true, false); } /** * Get each of the following element siblings of each element in this list. * @return all following element siblings. */ public Elements nextAll() { return siblings(null, true, true); } /** * Get each of the following element siblings of each element in this list, that match the query. * @param query CSS query to match siblings against * @return all following element siblings. */ public Elements nextAll(String query) { return siblings(query, true, true); } /** * Get the immediate previous element sibling of each element in this list. * @return previous element siblings. */ public Elements prev() { return siblings(null, false, false); } /** * Get the immediate previous element sibling of each element in this list, filtered by the query. * @param query CSS query to match siblings against * @return previous element siblings. */ public Elements prev(String query) { return siblings(query, false, false); } /** * Get each of the previous element siblings of each element in this list. * @return all previous element siblings. */ public Elements prevAll() { return siblings(null, false, true); } /** * Get each of the previous element siblings of each element in this list, that match the query. * @param query CSS query to match siblings against * @return all previous element siblings. */ public Elements prevAll(String query) { return siblings(query, false, true); } private Elements siblings(String query, boolean next, boolean all) { Elements els = new Elements(); Evaluator eval = query != null? QueryParser.parse(query) : null; for (Element e : this) { Element temp = e; do { Element sib = next ? temp.nextElementSibling() : temp.previousElementSibling(); if (sib == null) break; if (eval == null) els.add(sib); else if (sib.is(eval)) els.add(sib); temp = sib; } while (all); } return els; } /** * Get all of the parents and ancestor elements of the matched elements. * @return all of the parents and ancestor elements of the matched elements */ public Elements parents() { Set combo = new LinkedHashSet<>(); for (Element e: this) { combo.addAll(e.parents()); } return new Elements(combo); } // list-like methods /** Get the first matched element. @return The first matched element, or null if contents is empty. */ public Element first() { return isEmpty() ? null : get(0); } /** Get the last matched element. @return The last matched element, or null if contents is empty. */ public Element last() { return isEmpty() ? null : get(size() - 1); } /** * Perform a depth-first traversal on each of the selected elements. * @param nodeVisitor the visitor callbacks to perform on each node * @return this, for chaining */ public Elements traverse(NodeVisitor nodeVisitor) { NodeTraversor.traverse(nodeVisitor, this); return this; } /** * Perform a depth-first filtering on each of the selected elements. * @param nodeFilter the filter callbacks to perform on each node * @return this, for chaining */ public Elements filter(NodeFilter nodeFilter) { NodeTraversor.filter(nodeFilter, this); return this; } /** * Get the {@link FormElement} forms from the selected elements, if any. * @return a list of {@link FormElement}s pulled from the matched elements. The list will be empty if the elements contain * no forms. */ public List forms() { ArrayList forms = new ArrayList<>(); for (Element el: this) if (el instanceof FormElement) forms.add((FormElement) el); return forms; } /** * Get {@link Comment} nodes that are direct child nodes of the selected elements. * @return Comment nodes, or an empty list if none. */ public List comments() { return childNodesOfType(Comment.class); } /** * Get {@link TextNode} nodes that are direct child nodes of the selected elements. * @return TextNode nodes, or an empty list if none. */ public List textNodes() { return childNodesOfType(TextNode.class); } /** * Get {@link DataNode} nodes that are direct child nodes of the selected elements. DataNode nodes contain the * content of tags such as {@code script}, {@code style} etc and are distinct from {@link TextNode}s. * @return Comment nodes, or an empty list if none. */ public List dataNodes() { return childNodesOfType(DataNode.class); } private List childNodesOfType(Class tClass) { ArrayList nodes = new ArrayList<>(); for (Element el: this) { for (int i = 0; i < el.childNodeSize(); i++) { Node node = el.childNode(i); if (tClass.isInstance(node)) nodes.add(tClass.cast(node)); } } return nodes; } }