All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.skrape.selects.DomTreeElement.kt Maven / Gradle / Ivy

Go to download

A Kotlin-based testing/scraping/parsing library providing the ability to analyze and extract data from HTML (server & client-side rendered). It places particular emphasis on ease of use and a high level of readability by providing an intuitive DSL. First and foremost it aims to be a testing lib, but it can also be used to scrape websites in a convenient fashion.

There is a newer version: 1.3.0-alpha.2
Show newest version
package it.skrape.selects

import org.jsoup.nodes.Element

public abstract class DomTreeElement : CssSelectable() {
    public abstract val element: Element

    public abstract val relaxed: Boolean

    /**
     * Gets the combined text of this element and all its children. Whitespace is normalized and trimmed.
     * 

* For example, given HTML {@code

Hello there now!

}, {@code p.text()} returns {@code "Hello there now!"} * * @return unencoded, normalized text, or empty string if none. * @see #wholeText() if you don't want the text to be normalized. * @see #ownText() * @see #textNodes() */ public val text: String by lazy { element.text().orEmpty() } /** * Retrieves the element's inner HTML. E.g. on a {@code
} with one empty {@code

}, would return * {@code

}. (Whereas {@link outerHtml} would return {@code

}.) * @return String of HTML. * @see outerHtml */ public val html: String by lazy { element.html().orEmpty() } /** * Get the outer HTML of this node. For example, on a {@code p} element, may return {@code

Para

}. * @return outer HTML * @see html * @see text */ public val outerHtml: String by lazy { element.outerHtml().orEmpty() } /** * Find all elements in the document. * @return List */ public val allElements: List by lazy { element.allElements.map { DocElement(it) } } /** * Get this element's child elements. * @return List of child elements. If this element has no children, returns an empty list. */ public val children: List by lazy { element.children().map { DocElement(it) } } /** * Get this element's child elements. * @return T */ public fun children(init: List.() -> T): T = children.init() public fun eachAttribute(attributeKey: String): List = allElements.map { it attribute attributeKey } .filter { it.isNotEmpty() } public val eachHref: List by lazy { eachAttribute("href").filter { it.isNotEmpty() } } public val eachSrc: List by lazy { eachAttribute("src").filter { it.isNotEmpty() } } public val eachLink: Map get(): Map = allElements.filter { it.hasAttribute("href") } .associate { it.text to it.attribute("href") } public val eachImage: Map get(): Map = allElements.filter { it.tagName == "img" } .filter { it.hasAttribute("src") } .associate { it.attribute("alt") to it.attribute("src") } public open fun makeDefaultElement(cssSelector: String): DocElement { return super.makeDefault(cssSelector) } override fun makeDefault(cssSelector: String): DocElement { return if (relaxed) makeDefaultElement(cssSelector) else throw ElementNotFoundException(cssSelector) } override fun applySelector(rawCssSelector: String): List { if (rawCssSelector.isEmpty()) { return allElements } val queried = element.children().select(rawCssSelector).map { DocElement(it, relaxed) } val selected = queried.takeIf { it.isNotEmpty() } return if (relaxed) selected.orEmpty() else selected ?: throw ElementNotFoundException(rawCssSelector) } override fun toString(): String = element.toString() } public open class ElementNotFoundException(selector: String, tag: String = "") : Exception("Could not find element \"$tag$selector\"")




© 2015 - 2025 Weber Informatics LLC | Privacy Policy