All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.examples._7_RPA.kt Maven / Gradle / Ivy

The newest version!
package ai.platon.pulsar.examples

import ai.platon.pulsar.skeleton.common.options.LoadOptions
import ai.platon.pulsar.skeleton.context.PulsarContexts.createSession
import ai.platon.pulsar.skeleton.crawl.fetch.driver.WebDriver
import ai.platon.pulsar.skeleton.session.PulsarSession
import org.slf4j.LoggerFactory

internal class RPACrawler(private val session: PulsarSession = createSession()) {
    private val logger = LoggerFactory.getLogger(this.javaClass)

    private val searchBoxSelector = ".form input[type=text]"
    private val searchBoxSubmit = ".form input[type=submit]"

    val fieldSelectors = mutableMapOf(
        "title" to "#productTitle",
        "reviews" to "#acrCustomerReviewText",
        "prodDetails" to "#prodDetails"
    )

    fun options(args: String): LoadOptions {
        val options = session.options(args)

        val be = options.event.browseEventHandlers

        be.onDocumentActuallyReady.addLast { page, driver ->
            fieldSelectors.values.forEach { interact(it, driver) }
        }

        be.onDidInteract.addLast { page, driver ->
            logger.info("Did the interaction")
        }

        return options
    }

    private suspend fun interact(selector: String, driver: WebDriver) {
        val searchBoxSelector = ".form input[type=text]"

        if (driver.exists(selector)) {
            driver.click(selector)
            val text = driver.selectFirstTextOrNull(selector) ?: "no-text"
            driver.type(searchBoxSelector, text.substring(1, 4))
            logger.info("{} clicked", selector)
        }
    }
}

/**
 * Demonstrates how to use RPA for Web scraping.
 * */
fun main() {
    val url = "https://www.amazon.com/dp/B0C1H26C46"
    val args = "-refresh -parse"
    val session = createSession()
    val crawler = RPACrawler(session)
    val fields = session.scrape(url, crawler.options(args), crawler.fieldSelectors)
    println(fields)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy