ai.platon.pulsar.skeleton.crawl.common.FetchEntry.kt Maven / Gradle / Ivy
package ai.platon.pulsar.skeleton.crawl.common
import ai.platon.pulsar.common.PulsarParams
import ai.platon.pulsar.common.config.VolatileConfig
import ai.platon.pulsar.persist.WebPage
import ai.platon.pulsar.skeleton.common.options.LoadOptions
import ai.platon.pulsar.skeleton.common.urls.NormURL
class FetchEntry(val page: WebPage, val options: LoadOptions) {
constructor(url: String, options: LoadOptions, href: String? = null) :
this(createPageShell(url, options, href), options)
companion object {
fun createPageShell(normURL: NormURL): WebPage {
return createPageShell(normURL.spec, normURL.options, normURL.hrefSpec, normURL.referrer)
}
fun createPageShell(url: String, conf: VolatileConfig, href: String? = null, referrer: String? = null): WebPage {
val page = WebPage.newWebPage(url, conf, href)
initWebPage(page, null, href, referrer)
return page
}
fun createPageShell(url: String, options: LoadOptions, href: String? = null, referrer: String? = null): WebPage {
val page = WebPage.newWebPage(url, options.conf, href)
initWebPage(page, options, href, referrer)
return page
}
fun initWebPage(page: WebPage, options: LoadOptions?, href: String? = null, referrer: String? = null) {
page.href = href
if (referrer != null) {
page.referrer = referrer
}
if (options != null) {
page.args = options.toString()
page.fetchMode = options.fetchMode
page.conf = options.conf
page.maxRetries = options.nMaxRetry
page.isResource = options.isResource
// since LoadOptions is not visible by WebPage, we use an unsafe method to pass the load options
page.setVar(PulsarParams.VAR_LOAD_OPTIONS, options)
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy