All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.examples.sites.spa.wemix.AjaxCrawler.kt Maven / Gradle / Ivy

The newest version!
package ai.platon.pulsar.examples.sites.spa.wemix

import ai.platon.pulsar.common.AppPaths
import ai.platon.pulsar.common.getLogger
import ai.platon.pulsar.common.stringify
import ai.platon.pulsar.skeleton.context.PulsarContexts
import ai.platon.pulsar.skeleton.crawl.event.WebPageWebDriverEventHandler
import ai.platon.pulsar.skeleton.crawl.fetch.driver.WebDriver
import ai.platon.pulsar.persist.WebPage
import ai.platon.pulsar.skeleton.session.PulsarSession
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.StandardOpenOption

private class AjaxFetchHandler(
    val initPageNumber: Int,
    val exportDirectory: Path
): WebPageWebDriverEventHandler() {
    private val logger = getLogger(this)

    override suspend fun invoke(page: WebPage, driver: WebDriver): Any? {
        val session = driver.newJsoupSession()

        val u = "https://scopi.wemixnetwork.com/api/v1/chain/1003/account/0xcb7615cb4322cddc518f670b4da042dbefc69500/tx"
        IntRange(1, 100).forEach { i ->
            try {
                val pageNo = initPageNumber + i
                val json = session.newRequest()
                    .url(u)
                    .data("page", "$pageNo")
                    .data("pagesize", "20")
                    .execute()
                    .body()

                println(json)
                export(pageNo, json, true)
            } catch (e: Exception) {
                logger.warn(e.stringify("$i.\t"))
            }
        }

        return null
    }

    private fun prepareFiles(path: Path) {
        if (!Files.exists(path)) {
            Files.createDirectories(path.parent)
            Files.createFile(path)
        }
    }

    private fun export(i: Int, text: String, isJson: Boolean = false) {
        val postfix = if (isJson) ".json" else ".html"
        val timestamp = System.currentTimeMillis()
        val fileName = "transaction.b${initPageNumber}.t$timestamp.p$i$postfix"
        val file = exportDirectory.resolve(fileName)
        prepareFiles(file)
        Files.writeString(file, text, StandardOpenOption.APPEND)
    }
}

private class AjaxCrawler(
    var initPageNumber: Int = 1,
    val session: PulsarSession
) {
    private val logger = getLogger(this)

    private val url = "https://scope.wemixnetwork.com/1003/token/0xcb7615cb4322cddc518f670b4da042dbefc69500"

    val reportDirectory = AppPaths.REPORT_DIR
        .resolve("wemix")
        .resolve("b$initPageNumber")

    /**
     * Crawl with api with a single page application
     * */
    fun crawl() {
        if (Files.exists(reportDirectory)) {
            return
        }

        val apiFetcherHandler = AjaxFetchHandler(initPageNumber, reportDirectory)
        val options = session.options("-refresh")
        options.event.browseEventHandlers.onWillComputeFeature.addLast(apiFetcherHandler)

        try {
            session.load(url, options)
        } catch (e: Exception) {
            logger.warn("Unexpected exception", e)
        }
    }
}

fun main() {
    val session = PulsarContexts.createSession()

    IntRange(1, 80).forEach { i ->
        val crawler = AjaxCrawler(100 * i, session)
        crawler.crawl()
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy