it.skrape.selects.CssSelectable.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of skrapeit-html-parser Show documentation
Show all versions of skrapeit-html-parser Show documentation
A Kotlin-based testing/scraping/parsing library providing the ability to analyze and extract data from HTML (server & client-side rendered). It places particular emphasis on ease of use and a high level of readability by providing an intuitive DSL. First and foremost it aims to be a testing lib, but it can also be used to scrape websites in a convenient fashion.
package it.skrape.selects
import it.skrape.SkrapeItDsl
import org.jsoup.nodes.Element
import java.util.*
@Suppress("TooManyFunctions")
@SkrapeItDsl
public abstract class CssSelectable {
public abstract val toCssSelector: String
internal abstract fun applySelector(rawCssSelector: String): List
public fun selection(cssSelector: String, init: CssSelector.() -> T): T =
CssSelector(rawCssSelector = cssSelector, doc = this).init()
/**
* Will create a CssSelector scope to calculate a css selector
* @param init block for configuring the CSS-Selector that will be considered during calculation
* @return T
*/
public operator fun String.invoke(init: CssSelector.() -> T): T =
[email protected](this, init)
public open fun makeDefault(cssSelector: String): DocElement {
return DocElement(Element("${UUID.randomUUID()}"))
}
/**
* Will pick all occurrences of elements that are matching the CSS-Selector
* @see Overview of CSS-Selectors for further information.
* @param cssSelector that represents an CSS-Selector
* @return T
*/
public infix fun findAll(cssSelector: String): List =
this.applySelector(cssSelector)
public fun findByIndex(index: Int, cssSelector: String = ""): DocElement =
findAll(cssSelector).getOrElse(index) { makeDefault(cssSelector) }
public operator fun Int.invoke(cssSelector: String = ""): DocElement =
findByIndex(this, cssSelector)
public fun findBySelectorMatching(regex: Regex): List =
[email protected]("*").filter { it.ownCssSelector.matches(regex) }
public operator fun Regex.invoke(): List =
findBySelectorMatching(this)
/**
* Will pick the first occurrence of an element that
* is matching the CSS-Selector from a parsed document and invoke it to a lambda function.
* @see Overview of CSS-Selectors for further information.
* @param cssSelector that represents an CSS-Selector
* @return T
*/
public infix fun findFirst(cssSelector: String): DocElement =
findByIndex(0, cssSelector)
public fun findSecond(cssSelector: String = ""): DocElement =
findByIndex(1, cssSelector)
public fun findThird(cssSelector: String = ""): DocElement =
findByIndex(2, cssSelector)
public fun findLast(cssSelector: String = ""): DocElement =
findAll(cssSelector).last()
public fun findSecondLast(cssSelector: String = ""): DocElement =
findAll(cssSelector).let { it.getOrElse(it.lastIndex -1) { makeDefault(cssSelector) } }
/**
* Will pick all occurrences of elements that are matching the CSS-Selector
* @see Overview of CSS-Selectors for further information.
* @param cssSelector that represents an CSS-Selector
* @return T
*/
public fun findAll(cssSelector: String = "", init: List.() -> T): T =
findAll(cssSelector).init()
public fun findByIndex(index: Int, cssSelector: String = "", init: DocElement.() -> T): T =
findByIndex(index, cssSelector).init()
public operator fun Int.invoke(cssSelector: String = "", init: DocElement.() -> T): T =
this(cssSelector).init()
public fun findBySelectorMatching(regex: Regex, init: List.() -> T): T =
findBySelectorMatching(regex).init()
public operator fun Regex.invoke(init: List.() -> T): T =
this().init()
/**
* Will pick the first occurrence of an element that
* is matching the CSS-Selector from a parsed document and invoke it to a lambda function.
* @see Overview of CSS-Selectors for further information.
* @param cssSelector that represents an CSS-Selector
* @return T
*/
public fun findFirst(cssSelector: String = "", init: DocElement.() -> T): T =
findFirst(cssSelector).init()
public fun findSecond(cssSelector: String = "", init: DocElement.() -> T): T =
findSecond(cssSelector).init()
public fun findThird(cssSelector: String = "", init: DocElement.() -> T): T =
findThird(cssSelector).init()
public fun findLast(cssSelector: String = "", init: DocElement.() -> T): T =
findLast(cssSelector).init()
public fun findSecondLast(cssSelector: String = "", init: DocElement.() -> T): T =
findSecondLast(cssSelector).init()
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy