All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.proxy.ProxyLoader.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.proxy

import ai.platon.pulsar.common.AppPaths
import ai.platon.pulsar.common.concurrent.ConcurrentPassiveExpiringSet
import ai.platon.pulsar.common.config.ImmutableConfig
import ai.platon.pulsar.common.readable
import org.slf4j.LoggerFactory
import java.io.IOException
import java.nio.file.Files
import java.nio.file.Path
import java.time.Duration
import java.time.Instant
import java.util.concurrent.ThreadLocalRandom
import java.util.concurrent.atomic.AtomicBoolean

/**
 * Load proxies from proxy vendors
 */
abstract class ProxyLoader(conf: ImmutableConfig): AutoCloseable {
    companion object {
        val TEST_PROXY_FILE = AppPaths.PROXY_BASE_DIR.resolve("test-ip")
    }

    private val logger = LoggerFactory.getLogger(ProxyLoader::class.java)
    protected val startTime = Instant.now()
    protected val closed = AtomicBoolean()

    /**
     * The minimum time to live for proxy IPs, the implementation might drop short-live proxy IPs.
     * */
    var minimumProxyTTL = Duration.ofMinutes(5)
    var testProxyBeforeUse = false
    // TODO: configurable
    var testUrl = "https://www.amazon.com/"
    val fileWatchInterval = Duration.ofSeconds(30)
    val lastModifiedTimes = mutableMapOf()

    var banStrategy = conf.get("proxy.ban.strategy", "segment")
    var ipTimeToBan = conf.getDuration("proxy.ip.time.to.ban", Duration.ofHours(1))
    var segmentTimeToBan = conf.getDuration("proxy.segment.time.to.ban", Duration.ofHours(2))

    val bannedIps = ConcurrentPassiveExpiringSet(ipTimeToBan)
    val bannedSegments = ConcurrentPassiveExpiringSet(segmentTimeToBan)
    /**
     * The probability to choose a test ip if absent
     * */
    var testIpRate = 0.3

    val isActive get() = !closed.get()

    @Throws(ProxyException::class)
    abstract fun updateProxies(reloadInterval: Duration): List

    @Synchronized
    fun updateBanStrategy() {
        val path = AppPaths.PROXY_BAN_STRATEGY
        loadIfModified(path, fileWatchInterval) { Files.readAllLines(it) }
                .firstOrNull()?.let { banStrategy = it }?.also { logger.info("Proxy ban strategy: $it") }
    }

    /**
     * Get a test ip by a probability if exist in file [TEST_PROXY_FILE]
     * */
    fun loadTestProxyIfAbsent(): ProxyEntry? {
        return TEST_PROXY_FILE.takeIf { isActive && testIpRate > 0 && ThreadLocalRandom.current().nextDouble() <= testIpRate }
                ?.takeIf { Files.exists(it) }
                ?.let { Files.readString(it).trim() }
                ?.let { ProxyEntry.parse(it) }
                ?.also { it.isTestIp = true }
    }

    protected fun  loadIfModified(path: Path, expires: Duration, loader: (Path) -> List): List {
        if (!Files.exists(path)) {
            return listOf()
        }
        val lastModified = lastModifiedTimes.getOrDefault(path, Instant.EPOCH)

        try {
            val modified = Files.getLastModifiedTime(path).toInstant()
            val elapsed = Duration.between(lastModified, modified)

            if (elapsed > expires) {
                logger.info("Reload from file, last modified: {}, elapsed: {} | {}", lastModified, elapsed.readable(), path)
                return loader(path).also { lastModifiedTimes[path] = modified }
            }
        } catch (e: IOException) {
            logger.warn("Failed to load - {}", e.message)
        }

        return listOf()
    }

    override fun close() {
        if (closed.compareAndSet(false, true)) {
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy