Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package ai.platon.pulsar.skeleton.crawl.fetch.driver
import ai.platon.pulsar.browser.driver.chrome.NetworkResourceResponse
import ai.platon.pulsar.common.urls.Hyperlink
import ai.platon.pulsar.common.urls.UrlUtils
import ai.platon.pulsar.common.warnForClose
import ai.platon.pulsar.dom.nodes.GeoAnchor
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.fasterxml.jackson.module.kotlin.readValue
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.runBlocking
import kotlinx.coroutines.withContext
import org.jsoup.Connection
import org.jsoup.Jsoup
import java.io.IOException
import java.time.Duration
import java.time.Instant
import java.util.*
import java.util.concurrent.atomic.AtomicBoolean
import java.util.concurrent.atomic.AtomicInteger
import java.util.concurrent.atomic.AtomicReference
import kotlin.random.Random
import kotlin.random.nextInt
abstract class AbstractWebDriver(
override val browser: AbstractBrowser,
override val id: Int = ID_SUPPLIER.incrementAndGet()
): Comparable, AbstractJvmWebDriver(), WebDriver, JvmWebDriver {
companion object {
private val ID_SUPPLIER = AtomicInteger()
}
/**
* The state of the driver.
* */
enum class State {
/**
* The driver is initialized.
* */
INIT,
/**
* The driver is ready to work.
* */
READY,
/**
* The driver is working.
* */
WORKING,
/**
* The driver is retired and should be quit as soon as possible.
* */
RETIRED,
/**
* The driver is quit.
* */
QUIT;
/**
* Whether the driver is initialized.
* */
val isInit get() = this == INIT
/**
* Whether the driver is ready to work.
* */
val isReady get() = this == READY
/**
* Whether the driver is working.
* */
val isWorking get() = this == WORKING
/**
* Whether the driver is quit.
* */
val isQuit get() = this == QUIT
/**
* Whether the driver is retired and should be quit as soon as possible.
* */
val isRetired get() = this == RETIRED
}
/**
* The state of the driver.
* * [State.INIT]: The driver is initialized.
* * [State.READY]: The driver is ready to work.
* * [State.WORKING]: The driver is working.
* * [State.RETIRED]: The driver is retired and should be quit as soon as possible.
* * [State.QUIT]: The driver is quit.
* */
private val state = AtomicReference(State.INIT)
private val canceled = AtomicBoolean()
private val crashed = AtomicBoolean()
private val jsoupCreateDestroyMonitor = Any()
private var jsoupSession: Connection? = null
var idleTimeout: Duration = Duration.ofMinutes(10)
var lastActiveTime: Instant = Instant.now()
/**
* Whether the driver is idle. The driver is idle if it is not working for a period of time.
* */
val isIdle get() = Duration.between(lastActiveTime, Instant.now()) > idleTimeout
val isInit get() = state.get().isInit
val isReady get() = state.get().isReady
val isWorking get() = state.get().isWorking
val isRetired get() = state.get().isRetired
val isQuit get() = state.get().isQuit
val isCanceled get() = canceled.get()
val isCrashed get() = crashed.get()
open val supportJavascript: Boolean = true
open val isMockedPageSource: Boolean = false
var isRecovered: Boolean = false
var isReused: Boolean = false
/**
* If a driver is recyclable, it should be closed after the task is done, and add to a standby pool,
* so it can be reused for other tasks.
*
* If a driver is not recyclable, it should be kept open and not be closed, and not be used by any other tasks.
* */
var isRecyclable: Boolean = true
/**
* Whether the driver should calculate the DOM features.
*
* If true, the DOM features should be calculated using __pulsar_utils__.compute().
* */
var ignoreDOMFeatures: Boolean = false
/**
* The name of the driver.
* */
open val name get() = javaClass.simpleName + "-" + id
/**
* The navigate entry of the current page.
* */
override var navigateEntry: NavigateEntry = NavigateEntry("")
/**
* The navigate history of this driver.
* */
override val navigateHistory = NavigateHistory()
/**
* The delay policy of the driver.
* */
override val delayPolicy by lazy { browser.browserSettings.interactSettings.generateRestrictedDelayPolicy() }
/**
* The timeout policy of the driver.
* */
override val timeoutPolicy by lazy { browser.browserSettings.interactSettings.generateRestrictedTimeoutPolicy() }
/**
* The frames of the current page.
* */
override val frames: MutableList = mutableListOf()
/**
* The opener of the current page.
* */
override var opener: WebDriver? = null
/**
* The outgoing opened pages from the current page.
* */
override val outgoingPages: MutableSet = mutableSetOf()
/**
* The associated data.
* */
override val data: MutableMap = mutableMapOf()
/**
* Mark the driver as free, so it can be used to fetch a new page.
* */
fun free() {
canceled.set(false)
crashed.set(false)
if (!isInit && !isWorking) {
// It's a bad idea to throw an exception, which lead to inconsistency within the ConcurrentStatefulDriverPool.
// throw IllegalWebDriverStateException("The driver is expected to be INIT or WORKING to be ready, actually $state")
}
state.set(State.READY)
}
/**
* Mark the driver as working, so it can not be used to do any another tasks.
* */
fun startWork() {
canceled.set(false)
crashed.set(false)
if (!isInit && !isReady) {
// It's a bad idea to throw an exception, which lead to inconsistency within the ConcurrentStatefulDriverPool.
// throw IllegalWebDriverStateException("The driver is expected to be INIT or READY to work, actually $state")
}
state.set(State.WORKING)
}
/**
* Mark the driver as retired, so it can not be used to fetch any page,
* and should be quit as soon as possible.
* */
fun retire() = state.set(State.RETIRED)
/**
* Mark the driver as canceled, so the fetch process should return as soon as possible,
* and the fetch result should be dropped.
* */
fun cancel() {
canceled.set(true)
}
override fun jvm(): JvmWebDriver = this
val mainRequestHeaders: Map get() = navigateEntry.mainRequestHeaders
val mainRequestCookies: List