All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xyz.cssxsh.selenium.SeleniumTool.kt Maven / Gradle / Ivy

There is a newer version: 2.6.0
Show newest version
package xyz.cssxsh.selenium

import io.github.karlatemp.mxlib.selenium.*
import io.ktor.client.*
import io.ktor.client.engine.okhttp.*
import io.ktor.client.request.*
import io.ktor.http.*
import kotlinx.coroutines.*
import org.openqa.selenium.*
import org.openqa.selenium.chrome.*
import org.openqa.selenium.chromium.*
import org.openqa.selenium.edge.*
import org.openqa.selenium.firefox.*
import org.openqa.selenium.print.*
import org.openqa.selenium.remote.*
import java.io.*
import java.time.*
import java.util.*
import java.util.function.*
import java.util.logging.*
import java.util.zip.*
import kotlin.properties.*
import kotlin.reflect.*

// region Setup Selenium

private object AllIgnoredOutputStream : OutputStream() {
    override fun close() {}
    override fun write(b: ByteArray, off: Int, len: Int) {}
    override fun write(b: ByteArray) {}
    override fun write(b: Int) {}
    override fun flush() {}
}

private inline fun  reflect() = object : ReadWriteProperty {

    override fun getValue(thisRef: T, property: KProperty<*>): R {
        return T::class.java.getDeclaredField(property.name).apply { isAccessible = true }.get(thisRef) as R
    }

    override fun setValue(thisRef: T, property: KProperty<*>, value: R) {
        T::class.java.getDeclaredField(property.name).apply { isAccessible = true }.set(thisRef, value)
    }
}

/**
 * @see org.openqa.selenium.chromium.ChromiumDriver
 * @see org.openqa.selenium.devtools.CdpVersionFinder
 * @see org.openqa.selenium.devtools.CdpEndpointFinder
 * @see org.openqa.selenium.devtools.Connection
 * @see org.openqa.selenium.devtools.idealized.Network
 * @see org.openqa.selenium.devtools.v95.V95Network
 * @see org.openqa.selenium.remote.ErrorCodes
 * @see org.openqa.selenium.remote.ProtocolHandshake
 * @see org.openqa.selenium.remote.RemoteLogs
 * @see org.openqa.selenium.remote.RemoteWebDriver
 * @see org.openqa.selenium.remote.codec.w3c.W3CHttpResponseCodec
 * @see org.openqa.selenium.remote.http.netty.NettyWebSocket
 * @see org.openqa.selenium.net.UrlChecker
 * @see org.openqa.selenium.json.JsonOutput
 * @see org.openqa.selenium.os.OsProcess
 */
internal val logger: Logger = Logger.getLogger("org.openqa.selenium")

internal const val USER_CHOICE_KEY =
    "HKEY_CURRENT_USER\\SOFTWARE\\Microsoft\\Windows\\Shell\\Associations\\URLAssociations\\https\\UserChoice"

internal fun queryUserChoice(): String {
    if (System.getProperty("os.name").startsWith("Windows").not()) return ""
    return ProcessBuilder("reg", "query", USER_CHOICE_KEY, "/v", "ProgId").start()
        .inputStream.use { it.reader().readText() }
        .substringAfter("REG_SZ").trim()
}

internal const val EDGE_APPLICATION = "C:\\Program Files (x86)\\Microsoft\\Edge\\Application"

internal const val CHROME_APPLICATION = "C:\\Program Files (x86)\\Google\\Chrome\\Application"

internal val VERSION = """\d+(.\d+)*""".toRegex()

internal val ZIP_URL = "(?<=).{16,256}zip".toRegex()

internal typealias DriverSupplier = BiFunction?, RemoteWebDriver>

private val MxSeleniumInstance by lazy { MxSelenium() }

private var MxSelenium.initialized: Boolean by reflect()

private var MxSelenium.driverClass: Class by reflect()

private var MxSelenium.driverSupplier: DriverSupplier by reflect()

private val MxSelenium.data: File by reflect()

/**
 * Only Windows
 */
internal fun setupEdgeDriver() {
    val version = requireNotNull(File(EDGE_APPLICATION).list()?.firstOrNull { it matches VERSION }) { "Edge 版本获取失败" }
    val client = HttpClient(OkHttp)

    val xml = MxSeleniumInstance.data.resolve("msedgedriver-${version}.xml")
    if (xml.exists().not()) {
        xml.writeBytes(runBlocking(KtorContext) {
            client.get("https://msedgewebdriverstorage.blob.core.windows.net/edgewebdriver") {
                parameter("prefix", version)
                parameter("comp", "list")
                parameter("timeout", 60_000)
            }
        })
    }

    val url = ZIP_URL.findAll(xml.readText()).first { "win32" in it.value }.value

    val file = MxSeleniumInstance.data.resolve("msedgedriver-${version}.zip")
    if (file.exists().not()) {
        file.writeBytes(runBlocking(KtorContext) {
            client.get(url)
        })
    }

    val driver = MxSeleniumInstance.data.resolve("msedgedriver-${version}.exe")
    if (driver.exists().not()) {
        val zip = ZipFile(file)
        zip.getInputStream(zip.getEntry("msedgedriver.exe")).use { input ->
            driver.writeBytes(input.readAllBytes())
        }
    }

    System.setProperty(EdgeDriverService.EDGE_DRIVER_EXE_PROPERTY, driver.absolutePath)

    setMxSelenium(EdgeDriver::class.java) { agent, consumer ->
        val options = EdgeOptions()
        if (agent != null) options.addArguments("user-agent=$agent")
        consumer?.accept(options)
        val service = EdgeDriverService.createDefaultService()
        service.sendOutputTo(AllIgnoredOutputStream)
        EdgeDriver(service, options)
    }
}

internal fun setMxSelenium(driverClass: Class, driverSupplier: DriverSupplier) {
    MxSeleniumInstance.initialized = true
    MxSeleniumInstance.driverClass = driverClass
    MxSeleniumInstance.driverSupplier = driverSupplier
}

/**
 * 初始化 Selenium/MxLib 配置
 * @param browser 浏览器类型 Chrome, Firefox ...
 * @param factory [org.openqa.selenium.remote.http.HttpClient.Factory] , ktor, netty
 * @see setupEdgeDriver
 */
internal fun setupSelenium(browser: String = "", factory: String = "ktor") {

    logger.level = Level.OFF

    if (factory == "ktor") {
        System.setProperty("io.ktor.random.secure.random.provider", "DRBG")
    }
    if (MxSeleniumInstance.initialized) {
        MxSeleniumInstance.initialized = false
    }

    if (browser.isNotBlank()) System.setProperty("mxlib.selenium.browser", browser)
    if (factory.isNotBlank()) System.setProperty("webdriver.http.factory", factory)

    if (browser.startsWith("Edge") || queryUserChoice().startsWith("Edge")) {
        setupEdgeDriver()
        return
    }

    /**
     * 切换线程上下文,加载相关配置
     */
    val thread = Thread.currentThread()
    val oc = thread.contextClassLoader
    try {
        thread.contextClassLoader = KtorHttpClient.Factory::class.java.classLoader
        MxSelenium.initialize()
    } finally {
        thread.contextClassLoader = oc
    }

    if (MxSeleniumInstance.driverClass == FirefoxDriver::class.java) {
        setMxSelenium(FirefoxDriver::class.java) { agent, consumer ->
            val options = FirefoxOptions()
            if (agent != null) options.addPreference("general.useragent.override", agent)
            consumer?.accept(options)
            val service = GeckoDriverService.Builder().usingFirefoxBinary(options.binary).build()
            service.sendOutputTo(AllIgnoredOutputStream)
            FirefoxDriver(service, options)
        }
    }

    if (MxSeleniumInstance.driverClass == ChromeDriver::class.java) {
        setMxSelenium(ChromeDriver::class.java) { agent, consumer ->
            val options = ChromeOptions()
            if (agent != null) options.addArguments("user-agent=$agent")
            consumer?.accept(options)
            val service = ChromeDriverService.createServiceWithConfig(options)
            service.sendOutputTo(AllIgnoredOutputStream)
            ChromeDriver(service, options)
        }
    }
}

// endregion

// region RemoteWebDriver

private fun RemoteWebDriverConfig.toConsumer(): (Capabilities) -> Unit = { capabilities ->
    when (capabilities) {
        is ChromiumOptions<*> -> capabilities.apply {
            setHeadless(headless)
            setPageLoadStrategy(PageLoadStrategy.NORMAL)
            setAcceptInsecureCerts(true)
            addArguments("--silent")
            setExperimentalOption(
                "excludeSwitches",
                listOf("enable-automation", "ignore-certificate-errors")
            )
            addArguments("--hide-scrollbars")
            if (proxy.isNotBlank()) {
                addArguments("--proxy-server=${proxy}")
            }
            setExperimentalOption(
                "mobileEmulation",
                mapOf(
                    "deviceMetrics" to mapOf(
                        "width" to width,
                        "height" to height,
                        "pixelRatio" to pixelRatio
                    ),
                    "userAgent" to userAgent
                )
            )
        }
        is FirefoxOptions -> capabilities.apply {
            setHeadless(headless)
            setPageLoadStrategy(PageLoadStrategy.NORMAL)
            setLogLevel(FirefoxDriverLogLevel.FATAL)
            setAcceptInsecureCerts(true)
            if (proxy.isNotBlank()) {
                val url = Url(proxy)
                addPreference("network.proxy.type", 1)
                addPreference("network.proxy.http", url.host)
                addPreference("network.proxy.http_port", url.port)
                addPreference("network.proxy.share_proxy_settings", true)
            }

            // XXX 手动关闭 webgl
            addPreference("webgl.disabled", true)
            addPreference("devtools.responsive.touchSimulation.enabled", true)
            addPreference("devtools.responsive.viewport.width", width)
            addPreference("devtools.responsive.viewport.height", height)
            addPreference("devtools.responsive.viewport.pixelRatio", pixelRatio)
            addPreference("devtools.responsive.userAgent", userAgent)
            // XXX responsive 无法调用
            addPreference("general.useragent.override", userAgent)
            addArguments("--width=${width}", "--height=${height}")
        }
        else -> throw UnsupportedOperationException("不支持设置参数的浏览器 ${capabilities::class}")
    }
}

internal const val INIT = "xyz.cssxsh.selenium.timeout.init"

internal const val PAGE = "xyz.cssxsh.selenium.timeout.page"

internal const val INTERVAL = "xyz.cssxsh.selenium.timeout.interval"

private val Init: Duration by lazy { Duration.ofMillis(System.getProperty(INIT)?.toLongOrNull() ?: 10_000) }

private val PageLoad: Duration by lazy { Duration.ofMillis(System.getProperty(PAGE)?.toLongOrNull() ?: 180_000) }

private val Interval: Duration by lazy { Duration.ofMillis(System.getProperty(INTERVAL)?.toLongOrNull() ?: 10_000) }

/**
 * 创建一个 RemoteWebDriver
 * @param config 配置
 */
fun RemoteWebDriver(config: RemoteWebDriverConfig): RemoteWebDriver {

    if (config.log) {
        when (MxSeleniumInstance.driverClass) {
            ChromeDriver::class.java -> {
                val log = MxSeleniumInstance.data.resolve("chromedriver.log")
                System.setProperty(ChromeDriverService.CHROME_DRIVER_LOG_PROPERTY, log.absolutePath)
            }
            EdgeDriver::class.java -> {
                val log = MxSeleniumInstance.data.resolve("msedgedriver.log")
                System.setProperty(EdgeDriverService.EDGE_DRIVER_LOG_PROPERTY, log.absolutePath)
            }
            FirefoxDriver::class.java -> {
                val log = MxSeleniumInstance.data.resolve("geckodriver.log")
                System.setProperty(FirefoxDriver.SystemProperty.BROWSER_LOGFILE, log.absolutePath)
            }
            else -> Unit
        }
    }

    /**
     * 切换线程上下文,加载相关配置
     */
    val thread = Thread.currentThread()
    val oc = thread.contextClassLoader

    return try {
        thread.contextClassLoader = KtorHttpClient.Factory::class.java.classLoader

        MxSelenium.newDriver(null, config.toConsumer()).apply {
            manage().timeouts().apply {
                pageLoadTimeout(PageLoad)
                scriptTimeout(Interval)
            }
        }
    } finally {
        thread.contextClassLoader = oc
    }
}

// endregion

// region Screenshot

inline fun  useRemoteWebDriver(config: RemoteWebDriverConfig, block: (RemoteWebDriver) -> T): T {
    val driver = RemoteWebDriver(config)
    return try {
        block(driver)
    } finally {
        driver.quit()
    }
}

/**
 * 判断页面是否加载完全
 */
fun RemoteWebDriver.isReady(): Boolean {
    return executeScript(
        """
        function imagesComplete() {
            const images = document.getElementsByTagName('img');
            let complete = true;
            let count = 0;
            try {
                for (const image of images) {
                    complete = complete && image.complete;
                    image.complete && count++;
                }
            } finally {
                console.log(`ImagesComplete: ${'$'}{count}/${'$'}{images.length}`);
            }
            return complete;
        }
        return document.readyState === 'complete' && imagesComplete()
    """.trimIndent()
    ) as Boolean
}

/**
 * 隐藏指定 css 过滤器的 WebElement
 * @param css CSS过滤器
 */
fun RemoteWebDriver.hide(vararg css: String): List {
    if (css.isEmpty()) return emptyList()
    @Suppress("UNCHECKED_CAST")
    return executeScript(
        """
        const nodes = Array.from(arguments).flatMap((selector) => Array.from(document.querySelectorAll(selector)));
        for (const node of nodes) node.style.display = 'none';
        return nodes;
    """.trimIndent(), *css
    ) as ArrayList
}

/**
 * @see [WebDriver.close]
 * @see [FirefoxDriver.maybeGetDevTools]
 */
fun RemoteWebDriver.closeTab() {
    /**
     * 切换线程上下文
     */
    val thread = Thread.currentThread()
    val oc = thread.contextClassLoader
    try {
        thread.contextClassLoader = KtorHttpClient.Factory::class.java.classLoader
        close()
    } finally {
        thread.contextClassLoader = oc
    }
}

/**
 * 打开指定 url 页面,并截取图片
 * @param hide CSS过滤器
 * @return 返回的图片文件数据,格式PNG
 */
suspend fun RemoteWebDriver.getScreenshot(url: String, vararg hide: String): ByteArray {
    val home = windowHandle
    val tab = switchTo().newWindow(WindowType.TAB) as RemoteWebDriver

    try {
        withTimeout(PageLoad.toMillis()) {
            tab.get(url)
            delay(Init.toMillis())
            while (!isReady()) {
                delay(Interval.toMillis())
            }
        }
    } catch (_: TimeoutCancellationException) {
        // ignore
    }

    return try {
        tab.hide(css = hide)
        tab.getScreenshotAs(OutputType.BYTES)
    } finally {
        tab.closeTab()
        switchTo().window(home)
    }
}

/**
 * 将当前页面打印为PDF
 */
fun RemoteWebDriver.printToPDF(consumer: PrintOptions.() -> Unit = {}): ByteArray {
    val pdf = print(PrintOptions().apply(consumer))
    return Base64.getMimeDecoder().decode(pdf.content)
}

// endregion




© 2015 - 2025 Weber Informatics LLC | Privacy Policy