All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.skeleton.crawl.common.url.ParsableHyperlink.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.skeleton.crawl.common.url

import ai.platon.pulsar.common.urls.Hyperlink
import ai.platon.pulsar.dom.FeaturedDocument
import ai.platon.pulsar.persist.WebPage
import ai.platon.pulsar.skeleton.crawl.PageEventHandlers
import ai.platon.pulsar.skeleton.crawl.event.impl.DefaultPageEventHandlers
import java.util.function.BiConsumer

/**
 * A hyperlink that contains a `onParse` event handler. The `onParse` event handler
 * will be called when the page is parsed.
 * */
open class ParsableHyperlink(
    /**
     * The url of this hyperlink
     * */
    url: String,
    /**
     * A event handler that will be called when the page is parsed.
     * */
    val onParse: (WebPage, FeaturedDocument) -> Any?
): Hyperlink(url, args = "-parse"), ListenableUrl {

    /**
     * Java compatible constructor
     * */
    constructor(url: String, onParse: BiConsumer):
            this(url, { page, document -> onParse.accept(page, document) })

    /**
     * The PageEvent handlers of this hyperlink.
     * */
    override var event: PageEventHandlers = DefaultPageEventHandlers().also {
        it.loadEventHandlers.onHTMLDocumentParsed.addLast { page, document ->
            onParse(page, document)
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy