All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.protocol.browser.emulator.context.BasicPrivacyContextManager.kt Maven / Gradle / Ivy

There is a newer version: 2.0.2
Show newest version
package ai.platon.pulsar.protocol.browser.emulator.context

import ai.platon.pulsar.common.browser.Fingerprint
import ai.platon.pulsar.common.config.CapabilityTypes
import ai.platon.pulsar.common.config.ImmutableConfig
import ai.platon.pulsar.common.proxy.ProxyPoolManager
import ai.platon.pulsar.skeleton.crawl.CoreMetrics
import ai.platon.pulsar.skeleton.crawl.fetch.FetchResult
import ai.platon.pulsar.skeleton.crawl.fetch.FetchTask
import ai.platon.pulsar.skeleton.crawl.fetch.driver.WebDriver
import ai.platon.pulsar.skeleton.crawl.fetch.privacy.PrivacyAgent
import ai.platon.pulsar.skeleton.crawl.fetch.privacy.PrivacyContext
import ai.platon.pulsar.skeleton.crawl.fetch.privacy.PrivacyManager
import ai.platon.pulsar.persist.WebPage
import ai.platon.pulsar.protocol.browser.driver.WebDriverPoolManager
import com.google.common.collect.Iterables
import org.slf4j.LoggerFactory

class BasicPrivacyContextManager(
    val driverPoolManager: WebDriverPoolManager,
    val proxyPoolManager: ProxyPoolManager? = null,
    val coreMetrics: CoreMetrics? = null,
    config: ImmutableConfig
): PrivacyManager(config) {
    private val logger = LoggerFactory.getLogger(BasicPrivacyContextManager::class.java)
    private val numPrivacyContexts: Int get() = conf.getInt(CapabilityTypes.PRIVACY_CONTEXT_NUMBER, 2)

    private val iterator = Iterables.cycle(temporaryContexts.values).iterator()

    constructor(driverPoolManager: WebDriverPoolManager, config: ImmutableConfig)
            : this(driverPoolManager, null, null, config)

    override suspend fun run(task: FetchTask, fetchFun: suspend (FetchTask, WebDriver) -> FetchResult): FetchResult {
        return run0(computeNextContext(task.page, task.fingerprint, task), task, fetchFun)
    }

    override fun createUnmanagedContext(privacyAgent: PrivacyAgent): BrowserPrivacyContext {
        val context = BrowserPrivacyContext(proxyPoolManager, driverPoolManager, coreMetrics, conf, privacyAgent)
        logger.info("Privacy context is created #{}", context.display)
        return context
    }
    override fun computeNextContext(fingerprint: Fingerprint): PrivacyContext {
        val task = FetchTask.create(WebPage.NIL, fingerprint)
        return computeNextContext(task.page, fingerprint, task)
    }
    override fun computeNextContext(page: WebPage, fingerprint: Fingerprint, task: FetchTask): PrivacyContext {
        val context = computeIfNecessary(page, fingerprint, task)
        return context.takeIf { it.isActive } ?: run { close(context); computeIfAbsent(privacyAgentGenerator(fingerprint)) }
    }
    override fun computeIfNecessary(fingerprint: Fingerprint): PrivacyContext {
        val task = FetchTask.create(WebPage.NIL, fingerprint)
        return computeIfNecessary(task.page, fingerprint, task)
    }

    override fun computeIfNecessary(page: WebPage, fingerprint: Fingerprint, task: FetchTask): PrivacyContext {
        synchronized(contextLifeCycleMonitor) {
            if (temporaryContexts.size < numPrivacyContexts) {
                val generator = privacyAgentGeneratorFactory.generator
                computeIfAbsent(generator(fingerprint))
            }

            return iterator.next()
        }
    }

    override fun computeIfAbsent(privacyAgent: PrivacyAgent) =
        temporaryContexts.computeIfAbsent(privacyAgent) { createUnmanagedContext(it) }

    private suspend fun run0(
        privacyContext: PrivacyContext, task: FetchTask, fetchFun: suspend (FetchTask, WebDriver) -> FetchResult
    ): FetchResult {
        return takeIf { isActive } ?.run1(privacyContext, task, fetchFun) ?:
        FetchResult.crawlRetry(task, "Inactive privacy context")
    }

    private suspend fun run1(privacyContext: PrivacyContext, task: FetchTask,
                             fetchFun: suspend (FetchTask, WebDriver) -> FetchResult): FetchResult {
        if (privacyContext !is BrowserPrivacyContext) {
            throw ClassCastException("The privacy context should be a BrowserPrivacyContext")
        }

        return try {
            task.markReady()
            privacyContext.run(task) { _, driver ->
                task.startWork()
                fetchFun(task, driver)
            }
        } finally {
            task.done()
            task.page.variables["privacyContext"] = formatPrivacyContext(privacyContext)
        }
    }

    private fun formatPrivacyContext(privacyContext: PrivacyContext): String {
        return String.format("%s(%.2f)", privacyContext.privacyAgent.display, privacyContext.meterSuccesses.fiveMinuteRate)
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy