All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.skeleton.common.message.WebPageInfoFormatters.kt Maven / Gradle / Ivy

The newest version!
package ai.platon.pulsar.skeleton.common.message

import ai.platon.pulsar.common.*
import ai.platon.pulsar.common.PulsarParams.*
import ai.platon.pulsar.common.config.Params
import ai.platon.pulsar.common.emoji.PopularEmoji
import ai.platon.pulsar.skeleton.common.persist.ext.options
import ai.platon.pulsar.skeleton.crawl.common.FetchState
import ai.platon.pulsar.persist.PageCounters
import ai.platon.pulsar.persist.WebPage
import ai.platon.pulsar.persist.metadata.Name
import ai.platon.pulsar.persist.model.ActiveDOMStat
import org.apache.commons.lang3.StringUtils
import org.apache.commons.lang3.time.DurationFormatUtils
import java.text.DecimalFormat
import java.time.Duration
import java.time.Instant

class FetchStatusFormatter(val page: WebPage) {
    companion object {
        private val df = DecimalFormat("0.0")
    }
    private val prevFetchTime get() = page.prevFetchTime
    private val fetchTime get() = page.fetchTime
    private val fetchInterval get() = page.fetchInterval
    private val distance get() = page.distance
    private val fetchCount get() = page.fetchCount
    private val contentPublishTime get() = page.contentPublishTime
    private val refContentPublishTime get() = page.refContentPublishTime
    private val pageCategory get() = page.pageCategory
    private val refItems get() = page.pageCounters.get(PageCounters.Ref.item)
    private val refChars get() = page.pageCounters.get(PageCounters.Ref.ch)
    private val contentScore get() = page.contentScore.toDouble()
    private val score get() = page.score.toDouble()
    private val cash get() = page.cash.toDouble()
    private val url get() = page.url

    override fun toString(): String {
        val pattern = "yyyy-MM-dd HH:mm:ss"
        val fetchTimeString = (DateTimes.format(prevFetchTime, pattern) + "->" + DateTimes.format(fetchTime, pattern)
                + "," + DurationFormatUtils.formatDuration(fetchInterval.toMillis(), "DdTH:mm:ss"))

        val params = Params.of(
                "T", fetchTimeString,
                "DC", "$distance,$fetchCount",
                "PT", DateTimes.isoInstantFormat(contentPublishTime) + "," + DateTimes.isoInstantFormat(refContentPublishTime),
                "C", "$refItems,$refChars",
                "S", df.format(contentScore) + "," + df.format(score) + "," + df.format(cash),
                pageCategory.symbol(), StringUtils.substring(url, 0, 80)
        ).withKVDelimiter(":")

        return params.formatAsLine()
    }
}

data class Record(
    val name: String,
    val value: Any,
    val prefix: String = "",
    val postfix: String = "",
    val width: Int = 0,
    val padding: Char = ' ',
) {
    fun format(): String = if (width > 0) StringUtils.leftPad(toString(), width, padding) else toString()

    override fun toString() = value.toString()
}

class PageLoadStatusFormatter(
        private val page: WebPage,
        private val prefix: String = "",
        private val withOptions: Boolean = false,
        private val withNormUrl: Boolean = false,
        private val withReferer: Boolean = false,
        private val withSymbolicLink: Boolean = false
) {
    private val verboseCount = 200
    private val url get() = page.url
    private val href get() = page.href
    private val location get() = page.location
    private val responseTime get() = page.metadata[Name.RESPONSE_TIME]?:""
    private val proxy get() = page.proxy
    private val protocolStatus get() = page.protocolStatus
    private val activeDOMStatTrace = page.activeDOMStatTrace
    private val m = page.pageModel

    private val taskStatusSymbol: String get() = when {
        prefix.isNotBlank() -> ""
        page.isCanceled -> "${PopularEmoji.CANCELLATION_X} "
        protocolStatus.isFailed -> "${PopularEmoji.BROKEN_HEART} "
        protocolStatus.isSuccess -> "${PopularEmoji.HUNDRED_POINTS} "
        else -> "${PopularEmoji.SKULL_CROSSBONES} "
    }
    private val pageStatusSymbol get() = when {
        page.isCanceled -> PopularEmoji.CANCELLATION_X // canceled
        page.isFetched && page.fetchCount == 1 -> PopularEmoji.LIGHTNING // fetched new
        page.isFetched -> PopularEmoji.CIRCLE_ARROW_1 // fetched, reload
        page.isCached -> PopularEmoji.HOT_BEVERAGE // cached
        page.isLoaded -> PopularEmoji.OPTICAL_DISC   // load from db
        else -> PopularEmoji.BUG  // BUG symbol
    }
    private val pageStatusText get() = when {
        page.isCanceled -> "Canceled"
        page.isFetched && page.fetchCount == 1 -> "New"
        page.isFetched -> "Updated"
        page.isCached -> "Cached"
        page.isLoaded -> "Loaded"
        else -> "Unknown"
    }
    private val pageStatus: String get() = when {
        page.id < verboseCount && page.id % 10 == 0 -> "$pageStatusText $pageStatusSymbol"
        page.id > verboseCount && page.id % verboseCount == 0 -> "$pageStatusText $pageStatusSymbol"
        else -> pageStatusSymbol.toString()
    }
    private val loadMessagePrefix get() = prefix.takeIf { it.isNotEmpty() } ?: pageStatus
    private val category get() = page.pageCategory.symbol()
    private val fetchReason get() = buildFetchReason()
    private val label = StringUtils.abbreviateMiddle(page.options.label, "..", 20)
    private val formattedLabel get() = if (label.isBlank()) "" else " | $label"
    private val prevFetchTimeBeforeUpdate = page.getVar(PulsarParams.VAR_PREV_FETCH_TIME_BEFORE_UPDATE) as? Instant ?: page.prevFetchTime
    private val prevFetchTimeDuration: Duration get() = Duration.between(prevFetchTimeBeforeUpdate, Instant.now())
    private val prevFetchTimeReport: String get() = when {
        prevFetchTimeDuration.toDays() > 20 * 360 -> ""
        else -> " last fetched ${prevFetchTimeDuration.readable()} ago,"
    }
    private val jsSate: String
        get() {
            val (ni, na, nnm, nst, w, h) = activeDOMStatTrace["lastStat"]?: ActiveDOMStat()
            val divisor = if (page.id < verboseCount) 10 else verboseCount
            val prefix = if (page.id % divisor == 0) {
                "i/a/nm/st/h:"
            } else ""
            return if (ni + na + nnm + nst + h != 0) {
                String.format("$prefix%d/%d/%d/%d/%d", ni, na, nnm, nst, h)
            } else ""
        }
    private val fieldCount: String get() = when {
        m == null -> ""
        m.numFields == 0 -> ""
        else -> String.format("%d/%d/%d", m.numNonBlankFields, m.numNonNullFields, m.numFields)
    }
    private val proxyFmt get() = if (proxy.isNullOrBlank()) "%s" else " | %s"
    private val jsFmt get() = if (jsSate.isBlank()) "%s" else " | %s"
    private val fetchCount get() = when {
        page.fetchRetries > 0 -> String.format("%d/%d", page.fetchRetries, page.fetchCount)
        else -> String.format("%d", page.fetchCount)
    }
    private val fieldCountFmt get() = if (m == null || m.numFields == 0) "%s" else " | nf:%-10s"
    private val failure get() = when {
        page.isCanceled -> String.format(" %s", page.protocolStatus.reason)
        protocolStatus.isFailed -> String.format(" %s", page.protocolStatus.toString())
        else -> ""
    }
    private val contextName get() = page.variables[VAR_PRIVACY_CONTEXT_NAME]?.let { " | $it" } ?: ""
    private val additionalStatus: String get() = page.getVar(VAR_ADD_LOAD_STATUS)?.toString()?.let { " | $it" } ?: ""
    private val symbolicLink get() = AppPaths.uniqueSymbolicLinkForUri(page.url)

    private val formattedMessage = "$prevFetchTimeReport fc:$fetchCount$failure" +
            "$jsFmt$fieldCountFmt$additionalStatus$proxyFmt$contextName$formattedLabel"
    private val fmt get() = "%3d. $taskStatusSymbol$loadMessagePrefix %s $fetchReason got %d %s in %s, $formattedMessage | %s"

    override fun toString(): String {
        return String.format(fmt,
                page.id,
                category,
                page.protocolStatus.minorCode,
                buildContentBytes(),
                DateTimes.readableDuration(responseTime),
                jsSate,
                fieldCount,
                proxy?:"",
                buildLocation()
        )
    }

    fun explain() {
        listOf(
            Record("id", page.id, width = 3),
            Record("category", category),
            Record("taskStatusSymbol", taskStatusSymbol, width = 1),
            Record("pageStatusSymbol", pageStatusSymbol, width = 1),
            Record("pageStatusText", pageStatusText),
            Record("pageStatus", pageStatus),
            Record("loadMessagePrefix", loadMessagePrefix),
            Record("fetchReason", fetchReason, width = 1),

            Record("contentBytes", buildContentBytes()),
            Record("minorCode", page.protocolStatus.minorCode),
            Record("responseTime", DateTimes.readableDuration(responseTime)),
            Record("prevFetchTime", prevFetchTimeReport),
            Record("fetchCount", fetchCount),
            Record("failure", failure),
            Record("jsSate", jsSate),
            Record("fieldCount", fieldCount),
            Record("proxy", proxy),
            Record("contextName", contextName),
            Record("symbolicLink", symbolicLink),
            Record("location", buildLocation()),
        )
            .filter { it.width > 0 }
            .joinToString(" ") { it.format() }

        TODO("NOT IMPLEMENTED")
    }

    private fun buildFetchReason(): String {
        val state = page.getVar(VAR_FETCH_STATE) as? CheckState
        val code = state?.code ?: FetchState.DO_NOT_FETCH
        return FetchState.toSymbol(code).takeIf { it.isNotBlank() }?.let { "for $it" } ?: ""
    }

    private fun buildContentBytes(): String {
        var contentLength = if (page.lastContentLength == 0L || page.lastContentLength == page.contentLength) {
            compactFormat(page.contentLength).trim()
        } else {
            compactFormat(page.contentLength).trim() + " <- " + compactFormat(page.lastContentLength).trim()
        }

        if (page.content == null) {
            contentLength = "0 <- $contentLength"
        }

        return if (page.persistedContentLength > 0) {
            contentLength + " [" + PopularEmoji.OPTICAL_DISC + compactFormat(page.persistedContentLength).trim() + "]"
        } else {
            contentLength
        }
    }

    private fun compactFormat(bytes: Long): String {
        return if (bytes == 0L) "0" else Strings.compactFormat(bytes, 7, false)
    }

    private fun buildLocation(): String {
        val expectedLocation = href ?: url
        val redirected = href != null && href != location
        val normalized = href != null && href != url
        var location = if (redirected) location else expectedLocation
        if (withOptions) location += " ${page.args}"
        val readableLocation0 = if (redirected) "[R] $location <- $expectedLocation" else location
        var readableLocation = if (normalized) "[N] $readableLocation0" else readableLocation0
        if (withNormUrl) readableLocation = "$readableLocation <- $url"
        if (withReferer) readableLocation = "$readableLocation <- ${page.referrer}"
        val doWithSymbolicLink = page.isFetched && (page.id < verboseCount || withSymbolicLink)
        return if (doWithSymbolicLink) "file://$symbolicLink | $readableLocation" else readableLocation
    }
}

class LoadedPagesStatusFormatter(
        val pages: Collection,
        val startTime: Instant,
        val withSymbolicLink: Boolean = false
) {
    override fun toString(): String {
        val elapsed = DateTimes.elapsedTime(startTime)
        val message = String.format("Fetched total %d pages in %s:\n", pages.size, elapsed.readable())
        val sb = StringBuilder(message)
        pages.forEachIndexed { i, p ->
            sb.append(i.inc()).append(".\t").append(PageLoadStatusFormatter(p, withSymbolicLink = withSymbolicLink)).append('\n')
        }
        return sb.toString()
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy