All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.proxy.LoadingProxyPool.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.proxy

import ai.platon.pulsar.common.AppPaths
import ai.platon.pulsar.common.config.ImmutableConfig
import ai.platon.pulsar.common.readable
import org.slf4j.LoggerFactory
import java.io.IOException
import java.nio.file.Files
import java.time.Duration
import java.time.Instant
import java.util.concurrent.TimeUnit

/**
 * Manage all external proxies.
 * Check all unavailable proxies, recover them if possible.
 * This might take a long time, so it should be run in a separate thread.
 */
class LoadingProxyPool(
        val proxyLoader: ProxyLoader,
        conf: ImmutableConfig
): ProxyPool(conf) {

    private val logger = LoggerFactory.getLogger(LoadingProxyPool::class.java)

    private val bannedIps get() = proxyLoader.bannedIps
    private val bannedSegments get() = proxyLoader.bannedSegments

    /**
     * Try to take a proxy from the pool, if the pool is empty, load proxies using a [ProxyLoader].
     *
     * */
    @Throws(ProxyException::class, InterruptedException::class)
    override fun take(): ProxyEntry? {
        lastActiveTime = Instant.now()

        var i = 0
        val maxRetry = 10
        var proxy: ProxyEntry? = null
        while (isActive && proxy == null && i++ < maxRetry && !Thread.currentThread().isInterrupted) {
            if (freeProxies.isEmpty()) {
                load()
            }

            // Block until timeout, thread interrupted or an available proxy entry returns
            // TODO: no need to block, just try to get a item from the queue
            proxy = poll0()
        }

        return proxy
    }

    /**
     * The proxy may be recovered later
     */
    override fun retire(proxyEntry: ProxyEntry) {
        proxyEntry.retire()

        if (proxyEntry.isBanned) {
            ban(proxyEntry)
        }
    }

    override fun report(proxyEntry: ProxyEntry) {
        logger.info("Ban proxy <{}> after {} pages served in {} | total ban: {}, banned ips: {} | {}",
                proxyEntry.outIp, proxyEntry.numSuccessPages, proxyEntry.elapsedTime.readable(),
                numProxyBanned, bannedIps.size, proxyEntry)
        val s = bannedSegments.chunked(20).joinToString("\n") { it.joinToString() }
        logger.info("Banned segments ({}): {}", bannedSegments.size, s)
    }

    private fun ban(proxyEntry: ProxyEntry) {
        // local proxy server
        if (proxyEntry.host == "127.0.0.1") {
            return
        }

        var banned = false

        if (proxyEntry.outIp !in bannedIps) {
            bannedIps.add(proxyEntry.outIp)
            banned = true
        }

        if (proxyEntry.outSegment !in bannedSegments) {
            bannedSegments.add(proxyEntry.outSegment)
            banned = true
        }

        if (banned) {
            ++numProxyBanned
            report(proxyEntry)
            // ban speed, proxy change speed
        }
    }

    @Throws(ProxyException::class)
    private fun load() {
        // synchronize proxyLoader to fix issue 41: https://github.com/platonai/PulsarRPA/issues/41
        val loadedProxies = synchronized(proxyLoader) {
            proxyLoader.updateProxies(Duration.ZERO)
        }

        loadedProxies.asSequence()
            .filterNot { it in proxyEntries }
            .filterNot { it.outIp in bannedIps }
            .filterNot { it.outSegment in bannedSegments }
            .forEach { offer(it) }
    }

    /**
     * Block until timeout, thread interrupted or an available proxy entry returns
     * */
    @Throws(InterruptedException::class)
    private fun poll0(): ProxyEntry? {
        // Retrieves and removes the head of the queue
        val proxy = freeProxies.poll(pollingTimeout.toMillis(), TimeUnit.MILLISECONDS) ?: return null

        val banState = handleBanState(proxy).takeIf { it.isBanned }?.also {
            numProxyBanned++
            logger.info("Proxy is banned <{}> | bp: {}, bh: {}, bs: {} | {}",
                    it, numProxyBanned, bannedIps.size, bannedSegments.size, proxy.display)
        }

        return proxy.takeIf { banState == null }
    }

    private fun handleBanState(proxyEntry: ProxyEntry): ProxyEntry.BanState {
        val banStrategy = proxyLoader.banStrategy?:""
        return when {
            banStrategy == "" -> ProxyEntry.BanState.OK
            banStrategy == "none" -> ProxyEntry.BanState.OK
            banStrategy == "clear" -> ProxyEntry.BanState.OK.also { bannedSegments.clear(); bannedIps.clear(); }
            banStrategy.startsWith("segment") && proxyEntry.outSegment in bannedSegments -> ProxyEntry.BanState.SEGMENT
            banStrategy.startsWith("host") && proxyEntry.outIp in bannedIps -> ProxyEntry.BanState.HOST
            proxyEntry.isBanned -> ProxyEntry.BanState.OTHER
            else -> ProxyEntry.BanState.OK
        }
    }

    override fun dump() {
        synchronized(AppPaths.PROXY_ARCHIVE_DIR) {
            try {
                Files.writeString(AppPaths.PROXY_BANNED_HOSTS_FILE, bannedIps.joinToString("\n"))
                Files.writeString(AppPaths.PROXY_BANNED_SEGMENTS_FILE, bannedSegments.joinToString("\n"))
            } catch (e: IOException) {
                logger.warn(e.toString())
            }
        }

        super.dump()
    }

    override fun toString(): String = String.format("total %d, free: %d, banH: %d banS: %d",
            proxyEntries.size, freeProxies.size, bannedIps.size, bannedSegments.size)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy