All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.collect.UrlLoaders.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.collect

import ai.platon.pulsar.common.AppPaths
import ai.platon.pulsar.common.UrlExtractor
import ai.platon.pulsar.common.urls.Hyperlink
import ai.platon.pulsar.common.urls.HyperlinkDatum
import ai.platon.pulsar.common.urls.UrlAware
import ai.platon.pulsar.common.warnInterruptible
import com.google.gson.GsonBuilder
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.StandardOpenOption
import kotlin.random.Random

open class LocalFileUrlLoader(val path: Path): OneLoadExternalUrlLoader() {
    private val delimiter = "\t"
    private val gson = GsonBuilder().create()
    private val urlExtractor = UrlExtractor()

    override fun save(url: UrlAware, topic: UrlTopic) {
        val hyperlink = if (url is Hyperlink) url else Hyperlink(url)
        val json = gson.toJson(hyperlink.data())
        if (!Files.exists(path)) {
            Files.createDirectories(path.parent)
            Files.createFile(path)
        }
        Files.writeString(path, "${topic.group}$delimiter$json\n", StandardOpenOption.APPEND)
    }

    override fun loadToNow(sink: MutableCollection, size: Int, topic: UrlTopic): Collection {
        if (!Files.exists(path)) {
            return listOf()
        }

        val g = "${topic.group}"
        runCatching {
            Files.readAllLines(path).mapNotNullTo(sink) { parse(it, g) }
        }.onFailure { warnInterruptible(this, it, "Failed to load urls from $path") }

        return sink
    }

    override fun  loadToNow(sink: MutableCollection, size: Int, topic: UrlTopic, transformer: (UrlAware) -> T): Collection {
        if (!Files.exists(path)) {
            return listOf()
        }

        val g = "${topic.group}"
        runCatching {
            Files.readAllLines(path).mapNotNull { parse(it, g) }.mapTo(sink) { transformer(it) }
        }.onFailure { warnInterruptible(this, it, "Failed to load urls from $path") }

        return sink
    }

    override fun deleteAll(topic: UrlTopic): Long {
        return 0
    }

    private fun parse(line: String, group: String): Hyperlink? {
        val parts = line.split(delimiter)
        return if (parts.size == 2 && parts[0] == group) {
            val data = gson.fromJson(parts[1], HyperlinkDatum::class.java)
            Hyperlink(data)
        } else {
            urlExtractor.extract(line)?.let { Hyperlink(it) }
        }
    }
}

open class TemporaryLocalFileUrlLoader: LocalFileUrlLoader(
    AppPaths.PROC_TMP_TMP_DIR.resolve("hyperlink.${Random.nextLong()}.txt")
)




© 2015 - 2025 Weber Informatics LLC | Privacy Policy