All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.protocol.browser.driver.cdt.ChromeDevtoolsDriver.kt Maven / Gradle / Ivy

The newest version!
package ai.platon.pulsar.protocol.browser.driver.cdt

import ai.platon.pulsar.browser.common.BrowserSettings
import ai.platon.pulsar.browser.driver.chrome.*
import ai.platon.pulsar.browser.driver.chrome.impl.ChromeImpl
import ai.platon.pulsar.browser.driver.chrome.util.ChromeDriverException
import ai.platon.pulsar.common.*
import ai.platon.pulsar.common.browser.BrowserType
import ai.platon.pulsar.common.config.AppConstants
import ai.platon.pulsar.common.math.geometric.OffsetD
import ai.platon.pulsar.common.math.geometric.PointD
import ai.platon.pulsar.common.math.geometric.RectD
import ai.platon.pulsar.skeleton.common.message.MiscMessageWriter
import ai.platon.pulsar.common.urls.UrlUtils
import ai.platon.pulsar.skeleton.crawl.common.URLUtil
import ai.platon.pulsar.skeleton.crawl.fetch.driver.*
import ai.platon.pulsar.protocol.browser.driver.cdt.detail.*
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.fasterxml.jackson.module.kotlin.readValue
import com.github.kklisura.cdt.protocol.v2023.events.network.RequestWillBeSent
import com.github.kklisura.cdt.protocol.v2023.events.network.ResponseReceived
import com.github.kklisura.cdt.protocol.v2023.events.page.WindowOpen
import com.github.kklisura.cdt.protocol.v2023.types.fetch.RequestPattern
import com.github.kklisura.cdt.protocol.v2023.types.network.Cookie
import com.github.kklisura.cdt.protocol.v2023.types.network.ErrorReason
import com.github.kklisura.cdt.protocol.v2023.types.network.LoadNetworkResourceOptions
import com.github.kklisura.cdt.protocol.v2023.types.network.ResourceType
import com.github.kklisura.cdt.protocol.v2023.types.runtime.Evaluate
import kotlinx.coroutines.*
import kotlinx.coroutines.channels.Channel
import org.apache.commons.lang3.SystemUtils
import org.apache.http.client.utils.URIBuilder
import org.jetbrains.kotlin.utils.addToStdlib.ifFalse
import java.nio.file.Files
import java.text.MessageFormat
import java.time.Duration
import java.time.Instant
import java.util.*
import java.util.concurrent.atomic.AtomicBoolean
import kotlin.random.Random

class ChromeDevtoolsDriver(
    val chromeTab: ChromeTab,
    val devTools: RemoteDevTools,
    private val browserSettings: BrowserSettings,
    override val browser: ChromeDevtoolsBrowser,
) : AbstractWebDriver(browser) {

    private val logger = getLogger(this)

    private val tracer get() = logger.takeIf { it.isTraceEnabled }

    override val browserType: BrowserType = BrowserType.PULSAR_CHROME

    /**
     * The probability to block a resource request if the request url is in probabilisticBlockedURLs.
     * The probability must be in [0, 1].
     * */
    val resourceBlockProbability get() = browserSettings.resourceBlockProbability

    private val _blockedURLs = mutableListOf()
    private val _probabilityBlockedURLs = mutableListOf()
    val blockedURLs: List get() = _blockedURLs
    val probabilisticBlockedURLs: List get() = _probabilityBlockedURLs

    /**
     * TODO: distinguish the navigateUrl, currentUrl, chromeTab.url, mainFrameAPI.url, dom.document.documentURL, dom.document.baseURL
     * */
    private var navigateUrl = chromeTab.url ?: ""

    private val browserAPI get() = devTools.browser.takeIf { isActive }
    private val pageAPI get() = devTools.page.takeIf { isActive }
    private val targetAPI get() = devTools.target.takeIf { isActive }
    private val domAPI get() = devTools.dom.takeIf { isActive }
    private val cssAPI get() = devTools.css.takeIf { isActive }
    private val inputAPI get() = devTools.input.takeIf { isActive }
    private val mainFrameAPI get() = pageAPI?.frameTree?.frame
    private val networkAPI get() = devTools.network.takeIf { isActive }
    private val fetchAPI get() = devTools.fetch.takeIf { isActive }
    private val runtimeAPI get() = devTools.runtime.takeIf { isActive }
    private val emulationAPI get() = devTools.emulation.takeIf { isActive }

    private val rpc = RobustRPC(this)
    private val page = PageHandler(devTools, browserSettings.confuser)
    private val mouse get() = page.mouse.takeIf { isActive }
    private val keyboard get() = page.keyboard.takeIf { isActive }
    private val screenshot = Screenshot(page, devTools)

    private var credentials: Credentials? = null

    private val networkManager by lazy { NetworkManager(this, rpc) }
    private val messageWriter = MiscMessageWriter()

    private val enableStartupScript get() = browserSettings.isStartupScriptEnabled
    private val initScriptCache = mutableListOf()
    private val closed = AtomicBoolean()

    val isGone get() = closed.get() || isQuit || !AppContext.isActive || !devTools.isOpen
    val isActive get() = !isGone

    /**
     * Expose the underlying implementation, used for diagnosis purpose
     * */
    val implementation get() = devTools

    init {
        val userAgent = browser.userAgentOverride
        if (!userAgent.isNullOrEmpty()) {
            emulationAPI?.setUserAgentOverride(userAgent)
        }
    }

    override suspend fun addInitScript(script: String) {
        initScriptCache.add(script)
    }

    override suspend fun addBlockedURLs(urlPatterns: List) {
        _blockedURLs.addAll(urlPatterns)
    }

    override suspend fun addProbabilityBlockedURLs(urlPatterns: List) {
        _probabilityBlockedURLs.addAll(urlPatterns)
    }

    override suspend fun setTimeouts(browserSettings: BrowserSettings) {
    }

    @Throws(WebDriverException::class)
    override suspend fun navigateTo(entry: NavigateEntry) {
        navigateHistory.add(entry)
        this.navigateEntry = entry

        browser.emit(BrowserEvents.willNavigate, entry)

        try {
            enableAPIAgents()

            rpc.invokeDeferred("navigateTo") {
                if (enableStartupScript) navigateInvaded(entry) else navigateNonInvaded(entry)
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "navigateTo", entry.url)
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun getCookies(): List> {
        return invokeOnPage("getCookies") { getCookies0() } ?: listOf()
    }

    override suspend fun deleteCookies(name: String) {
        invokeOnPage("deleteCookies") { networkAPI?.deleteCookies(name) }
    }

    override suspend fun deleteCookies(name: String, url: String?, domain: String?, path: String?) {
        invokeOnPage("deleteCookies") { networkAPI?.deleteCookies(name, url, domain, path) }
    }

    override suspend fun clearBrowserCookies() {
        invokeOnPage("clearBrowserCookies") { networkAPI?.clearBrowserCookies() }
    }

    override suspend fun selectFirstAttributeOrNull(selector: String, attrName: String): String? {
        val name = "selectFirstAttributeOrNull"
        return invokeOnElement(selector, name) { page.getAttribute(it, attrName) }
    }

    // Unittest failed
//    override suspend fun selectAttributeAll(selector: String, attrName: String, start: Int, limit: Int): List {
//        val name = "selectAttributeAll"
//        return invokeOnPage(name) { page.getAttributeAll(selector, attrName, start, limit) } ?: listOf()
//    }

    @Throws(WebDriverException::class)
    override suspend fun evaluate(expression: String): Any? {
        return invokeOnPage("evaluate") { page.evaluate(expression) }
    }

    @Throws(WebDriverException::class)
    override suspend fun evaluateDetail(expression: String): JsEvaluation? {
        return invokeOnPage("evaluateDetail") { createJsEvaluate(page.evaluateDetail(expression)) }
    }

    @Throws(WebDriverException::class)
    override suspend fun currentUrl(): String {
        navigateUrl = invokeOnPage("currentUrl") { mainFrameAPI?.url } ?: navigateUrl
        return navigateUrl
    }

    @Throws(WebDriverException::class)
    override suspend fun exists(selector: String) = predicateOnElement(selector, "exists") { it > 0 }

    /**
     * Wait until [selector] for [timeout] at most
     * */
    @Throws(WebDriverException::class)
    override suspend fun waitForSelector(selector: String, timeout: Duration, action: suspend () -> Unit): Duration {
        return waitUntil("waitForSelector", timeout) { exists(selector).apply { ifFalse { action() } } }
    }

    @Throws(WebDriverException::class)
    override suspend fun waitForNavigation(oldUrl: String, timeout: Duration): Duration {
        // TODO: listen to the navigation event
        return waitUntil("waitForNavigation", timeout) { isNavigated(oldUrl) }
    }

    @Throws(WebDriverException::class)
    private suspend fun waitForNavigationExperimental(oldUrl: String, timeout: Duration): Duration {
        val startTime = Instant.now()

        try {
            val channel = Channel()

            pageAPI?.onDocumentOpened {
                val navigated = it.frame.url != oldUrl
                // emit(Navigation)
                channel.trySend("navigated")
            }

            channel.receive()
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "waitForNavigation $timeout")
        }

        return timeout - DateTimes.elapsedTime(startTime)
    }

    @Throws(WebDriverException::class)
    override suspend fun waitForPage(url: String, timeout: Duration): WebDriver? {
        return waitFor("waitForPage", timeout) { browser.findDriver(url) }
    }

    override suspend fun waitUntil(timeout: Duration, predicate: suspend () -> Boolean) =
        waitUntil("waitUtil", timeout, predicate)

    private suspend fun waitUntil(type: String, timeout: Duration, predicate: suspend () -> Boolean): Duration {
        val startTime = Instant.now()
        var elapsedTime = Duration.ZERO

        // it's OK to wait using a while loop, because all the operations are coroutines
        while (elapsedTime < timeout && !predicate()) {
            gap(type)
            elapsedTime = DateTimes.elapsedTime(startTime)
        }

        return timeout - elapsedTime
    }

    private suspend fun  waitFor(type: String, timeout: Duration, supplier: suspend () -> T): T? {
        val startTime = Instant.now()
        var elapsedTime = Duration.ZERO
        var result: T? = supplier()

        // it's OK to wait using a while loop, because all the operations are coroutines
        while (elapsedTime < timeout && result == null) {
            gap(type)
            result = supplier()
            elapsedTime = DateTimes.elapsedTime(startTime)
        }

        return result
    }

    @Throws(WebDriverException::class)
    override suspend fun mouseWheelDown(count: Int, deltaX: Double, deltaY: Double, delayMillis: Long) {
        try {
            rpc.invokeDeferred("mouseWheelDown", 1) {
                repeat(count) { i ->
                    if (i > 0) {
                        if (delayMillis > 0) gap(delayMillis) else gap("mouseWheel")
                    }

                    mouse?.wheel(deltaX, deltaY)
                }
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "mouseWheelDown")
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun mouseWheelUp(count: Int, deltaX: Double, deltaY: Double, delayMillis: Long) {
        try {
            rpc.invokeDeferred("mouseWheelUp", 1) {
                repeat(count) { i ->
                    if (i > 0) {
                        if (delayMillis > 0) gap(delayMillis) else gap("mouseWheel")
                    }

                    mouse?.wheel(deltaX, deltaY)
                }
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "mouseWheelUp")
        }
    }

    /**
     * TODO: test is required
     * */
    @Throws(WebDriverException::class)
    override suspend fun moveMouseTo(x: Double, y: Double) {
        invokeOnPage("moveMouseTo") { mouse?.moveTo(x, y) }
    }

    @Throws(WebDriverException::class)
    override suspend fun moveMouseTo(selector: String, deltaX: Int, deltaY: Int) {
        try {
            val nodeId = rpc.invokeDeferred("scrollIntoViewIfNeeded") {
                page.scrollIntoViewIfNeeded(selector)
            } ?: return

            val offset = OffsetD(4.0, 4.0)
            val p = pageAPI
            val d = domAPI
            if (p != null && d != null) {
                rpc.invokeDeferred("moveMouseTo") {
                    val point = ClickableDOM(p, d, nodeId, offset).clickablePoint().value
                    if (point != null) {
                        val point2 = PointD(point.x + deltaX, point.y + deltaY)
                        mouse?.moveTo(point2)
                    }
                    gap()
                }
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "moveMouseTo")
        }
    }

    /**
     * This method fetches an element with `selector`, scrolls it into view if
     * needed, and then uses {@link Mouse} to click in the center of the
     * element. If there's no element matching `selector`, the method do not click anything.
     * @remarks Bear in mind that if `click()` triggers a navigation event and
     * there's a separate `driver.waitForNavigation()` promise to be resolved, you
     * may end up with a race condition that yields unexpected results. The
     * correct pattern for click and wait for navigation is the following:
     * ```kotlin
     * driver.waitForNavigation()
     * driver.click(selector)
     * ```
     * @param selector - A `selector` to search for element to click. If there are
     * multiple elements satisfying the `selector`, the first will be clicked
     * @param count - Click count
     */
    @Throws(WebDriverException::class)
    override suspend fun click(selector: String, count: Int) {
        invokeOnElement(selector, "click", scrollIntoView = true) { nodeId ->
            click(nodeId, count)
        }
    }

    private suspend fun click(nodeId: Int, count: Int, position: String = "center") {
        val deltaX = 4.0 + Random.nextInt(4)
        val deltaY = 4.0
        val offset = OffsetD(deltaX, deltaY)
        val minDeltaX = 2.0

        val p = pageAPI
        val d = domAPI
        if (p == null || d == null) {
            return
        }

        val clickableDOM = ClickableDOM(p, d, nodeId, offset)
        val point = clickableDOM.clickablePoint().value ?: return
        val box = clickableDOM.boundingBox()
        val width = box?.width ?: 0.0
        // if it's an input element, we should click on the right side of the element,
        // so the cursor is at the tail of the text
        var offsetX = when (position) {
            "left" -> 0.0 + deltaX
            "right" -> width - deltaX
            else -> width / 2 + deltaX
        }
        offsetX = offsetX.coerceAtMost(width - minDeltaX).coerceAtLeast(minDeltaX)

        point.x += offsetX

        mouse?.click(point.x, point.y, count, randomDelayMillis("click"))
    }

    @Throws(WebDriverException::class)
    override suspend fun focus(selector: String) {
        // TODO: handle the minor exception: 0.	[focus] (3/5) | code: -32000, Element is not focusable
        // we can return false if the element is not focusable
        rpc.invokeDeferredSilently("focus") { page.focusOnSelector(selector) }
    }

    @Throws(WebDriverException::class)
    override suspend fun type(selector: String, text: String) {
        try {
            rpc.invokeDeferred("type") {
                val nodeId = page.focusOnSelector(selector)
                if (nodeId > 0) {
                    click(nodeId, 1)
                    keyboard?.type(text, randomDelayMillis("type"))
                    gap("type")
                }
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "type")
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun fill(selector: String, text: String) {
        invokeOnElement(selector, "fill", focus = true) { nodeId ->
            // val value = evaluateDetail("document.querySelector('$selector').value")?.value?.toString() ?: ""
            val value = page.getAttribute(nodeId, "value")
            if (value != null) {
                // it's an input element, we should click on the right side of the element,
                // so the cursor appears at the tail of the text
                click(nodeId, 1, "right")
                keyboard?.delete(value.length, randomDelayMillis("delete"))
                // ensure the input is empty
                // page.setAttribute(nodeId, "value", "")
            }

            click(nodeId, 1)
            // keyboard?.type(text, randomDelayMillis("fill"))
            // For fill, there is no delay between key presses
            keyboard?.type(text, 0)
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun press(selector: String, key: String) {
        invokeOnElement(selector, "press", focus = true) { nodeId ->
            keyboard?.press(key, randomDelayMillis("press"))
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun scrollTo(selector: String) {
        rpc.invokeDeferredSilently("scrollTo") { page.scrollIntoViewIfNeeded(selector) }
    }

    @Throws(WebDriverException::class)
    override suspend fun dragAndDrop(selector: String, deltaX: Int, deltaY: Int) {
        try {
            val nodeId = rpc.invokeDeferred("scrollIntoViewIfNeeded") {
                page.scrollIntoViewIfNeeded(selector)
            } ?: return

            val offset = OffsetD(4.0, 4.0)
            val p = pageAPI
            val d = domAPI
            if (p != null && d != null) {
                rpc.invokeDeferred("dragAndDrop") {
                    val point = ClickableDOM(p, d, nodeId, offset).clickablePoint().value
                    if (point != null) {
                        val point2 = PointD(point.x + deltaX, point.y + deltaY)
                        mouse?.dragAndDrop(point, point2, randomDelayMillis("dragAndDrop"))
                    }
                    gap()
                }
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "dragAndDrop")
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun outerHTML(): String? {
        return invokeOnPage("outerHTML") { domAPI?.outerHTML }
    }

    @Throws(WebDriverException::class)
    override suspend fun outerHTML(selector: String): String? {
        return invokeOnElement(selector, "outerHTML") { nodeId ->
            domAPI?.getOuterHTML(nodeId, null, null)
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun clickablePoint(selector: String): PointD? {
//        invokeOnElementOrNull(selector, "clickablePoint") { nodeId ->
//            ClickableDOM.create(pageAPI, domAPI, nodeId)?.clickablePoint()?.value
//        }

        try {
            return rpc.invokeDeferred("clickablePoint") {
                val nodeId = page.scrollIntoViewIfNeeded(selector)
                ClickableDOM.create(pageAPI, domAPI, nodeId)?.clickablePoint()?.value
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "clickablePoint")
        }

        return null
    }

    @Throws(WebDriverException::class)
    override suspend fun boundingBox(selector: String): RectD? {
        try {
            return rpc.invokeDeferred("boundingBox") {
                val nodeId = page.scrollIntoViewIfNeeded(selector)
                ClickableDOM.create(pageAPI, domAPI, nodeId)?.boundingBox()
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "boundingBox")
        }

        return null
    }

    /**
     * This method scrolls element into view if needed, and then uses
     * {@link page.captureScreenshot} to take a screenshot of the element.
     * If the element is detached from DOM, the method throws an error.
     */
    @Throws(WebDriverException::class)
    override suspend fun captureScreenshot(): String? {
        return try {
            rpc.invokeDeferred("stopLoading") { pageAPI?.stopLoading() }
            rpc.invokeDeferred("captureScreenshot") { screenshot.captureScreenshot() }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "captureScreenshot")
            null
        }
    }
    
    /**
     * This method scrolls element into view if needed, and then uses
     * {@link page.captureScreenshot} to take a screenshot of the element.
     * If the element is detached from DOM, the method throws an error.
     */
    @Throws(WebDriverException::class)
    override suspend fun captureScreenshot(selector: String): String? {
        return try {
            val nodeId = page.scrollIntoViewIfNeeded(selector) ?: return null
            // Force the page stop all navigations and pending resource fetches.
            rpc.invokeDeferred("stopLoading") { pageAPI?.stopLoading() }
            rpc.invokeDeferred("captureScreenshot") { screenshot.captureScreenshot(selector) }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "captureScreenshot")
            null
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun captureScreenshot(rect: RectD): String? {
        return try {
            // Force the page stop all navigations and pending resource fetches.
            rpc.invokeDeferred("stopLoading") { pageAPI?.stopLoading() }
            rpc.invokeDeferred("captureScreenshot") { screenshot.captureScreenshot(rect) }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "captureScreenshot")
            null
        }
    }

    internal fun checkState(action: String = ""): Boolean {
        if (!isActive) {
            return false
            // throw IllegalWebDriverStateException("WebDriver is not active #$id | $navigateUrl", this)
        }

        if (isCanceled) {
            // is it good to throw here?
            // throw WebDriverCancellationException("WebDriver is canceled #$id | $navigateUrl", this)
            return false
        }

        if (action.isNotBlank()) {
            lastActiveTime = Instant.now()
            navigateEntry.refresh(action)
        }

        return isActive
    }

    @Throws(WebDriverException::class)
    override suspend fun pageSource(): String? {
        return invokeOnPage("pageSource") { domAPI?.getOuterHTML(domAPI?.document?.nodeId, null, null) }
    }

    override suspend fun bringToFront() {
        rpc.invokeDeferredSilently("bringToFront") {
            pageAPI?.bringToFront()
        }
    }

    override fun awaitTermination() {
        devTools.awaitTermination()
    }

    override suspend fun loadResource(url: String): NetworkResourceResponse {
        val options = LoadNetworkResourceOptions().apply {
            disableCache = false
            includeCredentials = false
        }

        val frameId = pageAPI?.frameTree?.frame?.id
        val response = rpc.invokeDeferred("loadNetworkResource") {
            val resource = networkAPI?.loadNetworkResource(frameId, url, options)
            resource?.let {
                NetworkResourceResponse.from(it)
            }
        }

        return response ?: NetworkResourceResponse()
    }

    /**
     * Close the tab hold by this driver.
     * */
    override fun close() {
        // state should not be ready, working
//        if (state.get() == WebDriver.State.READY || state.get() == WebDriver.State.WORKING) {
//            logger.warn("Illegal driver state before close | {}", state.get())
//        }

        browser.destroyDriver(this)
        doClose()
    }

    fun doClose() {
        super.close()

        if (closed.compareAndSet(false, true)) {
            devTools.runCatching { close() }.onFailure { warnForClose(this, it) }
        }
    }

    @Throws(WebDriverException::class)
    override suspend fun pause() {
        invokeOnPage("pause") { pageAPI?.stopLoading() }
    }

    @Throws(WebDriverException::class)
    override suspend fun stop() {
        navigateEntry.stopped = true
        if (!isActive) {
            return
        }

        try {
            handleRedirect()

            if (browser.isGUI) {
                // in gui mode, just stop the loading, so we can diagnose
                pageAPI?.stopLoading()
            } else {
                // go to about:blank, so the browser stops the previous page and releases all resources
                navigateTo(ChromeImpl.ABOUT_BLANK_PAGE)
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, "terminate")
        }
    }

    override fun toString() = "Driver#$id"

    fun enableAPIAgents() {
        pageAPI?.enable()
        domAPI?.enable()
        runtimeAPI?.enable()
        networkAPI?.enable()
        cssAPI?.enable()

        if (resourceBlockProbability > 1e-6) {
            fetchAPI?.enable()
        }

        val proxyUsername = browser.id.fingerprint.proxyUsername
        if (!proxyUsername.isNullOrBlank()) {
            // allow all url patterns
            val patterns = listOf(RequestPattern())
            fetchAPI?.enable(patterns, true)
        }
    }

    /**
     * Navigate to the page and inject scripts.
     * */
    private fun navigateInvaded(entry: NavigateEntry) {
        val url = entry.url

        addScriptToEvaluateOnNewDocument()

        if (blockedURLs.isNotEmpty()) {
            // Blocks URLs from loading.
            networkAPI?.setBlockedURLs(blockedURLs)
        }

        networkManager.on(NetworkEvents.RequestWillBeSent) { event: RequestWillBeSent ->
            onRequestWillBeSent(entry, event)
        }
        networkManager.on(NetworkEvents.ResponseReceived) { event: ResponseReceived ->
            onResponseReceived(entry, event)
        }

        pageAPI?.onDocumentOpened { entry.mainRequestCookies = getCookies0() }
        // TODO: not working
        pageAPI?.onWindowOpen { onWindowOpen(it) }
        // pageAPI?.onFrameAttached {  }
//        pageAPI?.onDomContentEventFired {  }

        val proxyUsername = browser.id.fingerprint.proxyUsername
        if (!proxyUsername.isNullOrBlank()) {
            credentials = Credentials(proxyUsername, browser.id.fingerprint.proxyPassword)
            credentials?.let { networkManager.authenticate(it) }
        }

        navigateUrl = url
        // TODO: This is a temporary solution to serve local file, for example, file:///tmp/example.html
        if (AppConstants.LOCAL_FILE_SERVE_PREFIX in url) {
            openLocalFile(url)
        } else {
            page.navigate(url, referrer = navigateEntry.pageReferrer)
        }
    }

    /**
     * Navigate to a url without javascript injected, this is only for debugging
     * */
    private fun navigateNonInvaded(entry: NavigateEntry) {
        val url = entry.url

        navigateUrl = url
        pageAPI?.navigate(url)
    }

    private fun openLocalFile(url: String) {
        if (url.contains("?path=")) {
            val queryParams = URIBuilder(url).queryParams
            val path = queryParams.firstOrNull { it.name == "path" }?.value
            if (path != null) {
                val path2 = Base64.getUrlDecoder().decode(path).toString(Charsets.UTF_8)
                page.navigate(path2)
            }
            return
        }

        val url0 = url.removePrefix(AppConstants.LOCAL_FILE_SERVE_PREFIX)
        if (SystemUtils.IS_OS_WINDOWS) {
            page.navigate(url0)
        } else {
            page.navigate("file:///$url0")
        }
    }

    private fun onWindowOpen(event: WindowOpen) {
        val message = MessageFormat.format("Window opened | {0} | {1}", event.url, outgoingPages.size)
        println(" === =======  === === ")
        println(message)
//        logger.info("Window opened | {}", event.url)

        // TODO: handle BrowserUnavailableException
        val driver = browser.runCatching { newDriver(event.url) }.onFailure { warnInterruptible(this, it) }.getOrNull()
        if (driver != null) {
            driver.opener = this
            this.outgoingPages.add(driver)
        }
    }

    private fun onRequestWillBeSent(entry: NavigateEntry, event: RequestWillBeSent) {
        if (!entry.url.startsWith("http")) {
            // This can happen for the following cases:
            // 1. non-http resources, for example, ftp, ws, etc.
            // 2. chrome's internal page, for example, about:blank, chrome://settings/, chrome://settings/system, etc.
            return
        }

        if (!UrlUtils.isStandard(entry.url)) {
            logger.warn("Not a valid url | {}", entry.url)
            return
        }

        tracer?.trace("onRequestWillBeSent | driver | {}", event.requestId)

        val chromeNavigateEntry = ChromeNavigateEntry(navigateEntry)
        chromeNavigateEntry.updateStateBeforeRequestSent(event)

        // perform blocking logic
        val isMinor = chromeNavigateEntry.isMinorResource(event)
        if (isMinor && isBlocked(event.request.url)) {
            fetchAPI?.failRequest(event.requestId, ErrorReason.ABORTED)
        }

        // handle user-defined events
    }

    private fun isBlocked(url: String): Boolean {
        if (url in blockedURLs) {
            return true
        }

        if (resourceBlockProbability > 1e-6) {
            if (probabilisticBlockedURLs.any { url.matches(it.toRegex()) }) {
                return Random.nextInt(100) / 100.0f < resourceBlockProbability
            }
        }

        return false
    }

    private fun onResponseReceived(entry: NavigateEntry, event: ResponseReceived) {
        val chromeNavigateEntry = ChromeNavigateEntry(entry)

        tracer?.trace("onResponseReceived | driver | {}", event.requestId)

        chromeNavigateEntry.updateStateAfterResponseReceived(event)

        if (logger.isDebugEnabled) {
            reportInterestingResources(entry, event)
        }

        // handle user-defined events
    }

    private fun reportInterestingResources(entry: NavigateEntry, event: ResponseReceived) {
        runCatching { traceInterestingResources0(entry, event) }.onFailure { warnInterruptible(this, it) }
    }

    private fun traceInterestingResources0(entry: NavigateEntry, event: ResponseReceived) {
        val mimeType = event.response.mimeType
        val mimeTypes = listOf("application/json")
        if (mimeType !in mimeTypes) {
            return
        }

        val resourceTypes = listOf(
            ResourceType.FETCH,
            ResourceType.XHR,
            ResourceType.SCRIPT,
        )
        if (event.type !in resourceTypes) {
            // return
        }

        // page url is normalized
        val pageUrl = entry.pageUrl
        val resourceUrl = event.response.url
        val host = URLUtil.getHost(pageUrl) ?: "unknown"
        val reportDir = messageWriter.reportDir.resolve("trace").resolve(host)

        if (!Files.exists(reportDir)) {
            Files.createDirectories(reportDir)
        }

        val count = Files.list(reportDir).count()
        if (count > 2_000) {
            // TOO MANY tracing
            return
        }

        var suffix = "-" + event.type.name.lowercase() + "-urls.txt"
        var filename = AppPaths.fileId(pageUrl) + suffix
        var path = reportDir.resolve(filename)

        val message = String.format("%s\t%s", mimeType, event.response.url)
        messageWriter.writeTo(message, path)

        // configurable
        val saveResourceBody = mimeType == "application/json"
            && event.response.encodedDataLength < 1_000_000
            && alwaysFalse()
        if (saveResourceBody) {
            val body = rpc.invokeSilently("getResponseBody") {
                fetchAPI?.enable()
                fetchAPI?.getResponseBody(event.requestId)?.body
            }
            if (!body.isNullOrBlank()) {
                suffix = "-" + event.type.name.lowercase() + "-body.txt"
                filename = AppPaths.fromUri(resourceUrl, suffix = suffix)
                path = reportDir.resolve(filename)
                messageWriter.writeTo(body, path)
            }
        }
    }

    private suspend fun handleRedirect() {
        val finalUrl = currentUrl()
        // redirect
        if (finalUrl.isNotBlank() && finalUrl != navigateUrl) {
            // browser.addHistory(NavigateEntry(finalUrl))
        }
    }

    private fun addScriptToEvaluateOnNewDocument() {
        val js = browserSettings.scriptLoader.getPreloadJs(false)
        if (js !in initScriptCache) {
            // utils comes first
            initScriptCache.add(0, js)
        }

        val confuser = browserSettings.confuser
        initScriptCache.forEach {
            pageAPI?.addScriptToEvaluateOnNewDocument(confuser.confuse(it))
        }

        if (logger.isTraceEnabled) {
            reportInjectedJs()
        }

        // the cache is used for a single document, so we have to clear it
        initScriptCache.clear()
    }

    @Throws(WebDriverException::class)
    private fun getCookies0(): List> {
        val cookies = networkAPI?.cookies?.map { serialize(it) }
        return cookies ?: listOf()
    }

    private fun serialize(cookie: Cookie): Map {
        val mapper = jacksonObjectMapper()
        val json = mapper.writeValueAsString(cookie)
        val map: Map = mapper.readValue(json)
        return map.filterValues { it != null }.mapValues { it.toString() }
    }

    private fun reportInjectedJs() {
        val script = browserSettings.confuser.confuse(initScriptCache.joinToString("\n;\n\n\n;\n"))

        val dir = browser.id.contextDir.resolve("driver.$id/js")
        Files.createDirectories(dir)
        val report = Files.writeString(dir.resolve("preload.all.js"), script)
        tracer?.trace("All injected js: file://{}", report)
    }

    /**
     * Delays the coroutine for a given time without blocking a thread and resumes it after a specified time.
     *
     * This suspending function is cancellable. If the Job of the current coroutine is cancelled or completed while
     * this suspending function is waiting, this function immediately resumes with CancellationException.
     * */
    private suspend fun gap() {
        if (!isActive) {
            // throw IllegalWebDriverStateException("WebDriver is not active #$id | $navigateUrl", this)
        }

        // Delays coroutine for a given time without blocking a thread and resumes it after a specified time.
        delay(randomDelayMillis("gap"))
    }

    /**
     * Delays the coroutine for a given time without blocking a thread and resumes it after a specified time.
     *
     * This suspending function is cancellable. If the Job of the current coroutine is cancelled or completed while
     * this suspending function is waiting, this function immediately resumes with CancellationException.
     * */
    private suspend fun gap(type: String) {
        if (!isActive) {
            // throw IllegalWebDriverStateException("WebDriver is not active #$id | $navigateUrl", this)
        }

        delay(randomDelayMillis(type))
    }

    /**
     * Delays the coroutine for a given time without blocking a thread and resumes it after a specified time.
     *
     * This suspending function is cancellable. If the Job of the current coroutine is cancelled or completed while
     * this suspending function is waiting, this function immediately resumes with CancellationException.
     * */
    private suspend fun gap(millis: Long) {
        if (!isActive) {
            // throw IllegalWebDriverStateException("WebDriver is not active #$id | $navigateUrl", this)
        }

        delay(millis)
    }

    private suspend fun  invokeOnPage(name: String, message: String? = null, action: suspend () -> T): T? {
        try {
            return rpc.invokeDeferred(name) {
                action()
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, name, message)
        }

        return null
    }

    private suspend fun  invokeOnElement(
        selector: String, name: String, focus: Boolean = false, scrollIntoView: Boolean = false,
        action: suspend (Int) -> T
    ): T? {
        try {
            return rpc.invokeDeferred(name) {
                val nodeId = if (focus) {
                    page.focusOnSelector(selector)
                } else if (scrollIntoView) {
                    page.scrollIntoViewIfNeeded(selector)
                } else {
                    page.querySelector(selector)
                }

                if (nodeId != null && nodeId > 0) {
                    action(nodeId)
                } else {
                    null
                }
            }
        } catch (e: ChromeDriverException) {
            rpc.handleChromeException(e, name, "selector: [$selector], focus: $focus, scrollIntoView: $scrollIntoView")
        }

        return null
    }

    private suspend fun predicateOnElement(
        selector: String, name: String, focus: Boolean = false, scrollIntoView: Boolean = false,
        predicate: suspend (Int) -> Boolean
    ): Boolean = invokeOnElement(selector, name, focus, scrollIntoView, predicate) == true

    @Throws(WebDriverException::class)
    private suspend fun isNavigated(oldUrl: String): Boolean {
        return oldUrl != currentUrl()
    }

    private fun isValidNodeId(nodeId: Int?): Boolean {
        return nodeId != null && nodeId > 0
    }

    private fun createJsEvaluate(evaluate: Evaluate?): JsEvaluation? {
        evaluate ?: return null

        val result = evaluate.result
        val exception = evaluate.exceptionDetails
        return if (exception != null) {
            val jsException = JsException(
                text = exception.text,
                lineNumber = exception.lineNumber,
                columnNumber = exception.columnNumber,
                url = exception.url,
            )
            JsEvaluation(exception = jsException)
        } else {
            JsEvaluation(
                value = result.value,
                unserializableValue = result.unserializableValue,
                className = result.className,
                description = result.description
            )
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy