All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tfowl.jsoup.ktx.OpenGraph.kt Maven / Gradle / Ivy

The newest version!
package com.tfowl.jsoup.ktx

import org.jsoup.Jsoup
import org.jsoup.nodes.Document

private const val ROOT_PROPERTY_KEY = "."

data class OpenGraphPropertyValue(val content: String, val extended: Map) {
    val hasStructuredData = extended.isNotEmpty()

    override fun toString(): String {
        if (extended.isEmpty()) return content
        return buildString {
            append(content)
            extended.toList().joinTo(this, prefix = " (", postfix = ")") { (k,v) -> "$k=$v" }
        }
    }
}

class OpenGraph(private val data: Map>) {
    val keys: Set = data.keys

    fun getContent(key: String): String? = data[key]?.firstOrNull()?.content

    fun getAllContent(key: String): List = data[key]?.map { it.content } ?: emptyList()

    fun getStructured(key: String): OpenGraphPropertyValue? = data[key]?.firstOrNull()

    fun getAllStructured(key: String): List = data[key] ?: emptyList()

    override fun toString(): String = buildString {
        fun indent(level: Int): String = " ".repeat(4 * level)

        appendLine("OpenGraph {")

        val maxRootPropertyLength = data.keys.maxOf { it.length }

        for ((property, items) in data) {
            append(indent(level = 1))
            append("%-${maxRootPropertyLength}s".format(property))
            append(": ")

            val maxNestedPropertyLength = items.maxOfOrNull { it.extended.keys.maxOfOrNull { it.length } ?: 0 } ?: 0

            for ((index, item) in items.withIndex()) {
                val root = item.content
                val nestedKeys = item.extended.keys.minus(ROOT_PROPERTY_KEY)

                if (index > 0)
                    append(indent(level = 1) + " ".repeat(maxRootPropertyLength) + "  ")
                appendLine(root)


                for (key in nestedKeys) {
                    append(indent(level = 1) + " ".repeat(maxRootPropertyLength) + "  " + indent(level = 1))
                    append("%${maxNestedPropertyLength}s".format(key))
                    append(": ")
                    append(item.extended[key])
                    appendLine()
                }
            }
        }

        append("}")
    }

    companion object {
        fun from(document: Document): OpenGraph {
            val ogPropertiesAndContent = document.select("head meta[property^='og:']").map { meta ->
                val property = meta.attr("property").removePrefix("og:")
                val content = meta.attr("content")
                property to content
            }

            val data = mutableMapOf>>()

            for ((property, content) in ogPropertiesAndContent) {

                // Here we represent the root property with a special key
                if (":" !in property) {
                    data.computeIfAbsent(property) { mutableListOf() }
                        .add(mutableMapOf(ROOT_PROPERTY_KEY to content))
                }

                // Structured properties
                if (":" in property) {
                    val base = property.substringBefore(":")
                    val structuredProperty = property.substringAfter(":")

                    data.computeIfAbsent(base) { mutableListOf() }.lastOrInsertIfEmpty { mutableMapOf() }
                        .put(structuredProperty, content)
                }
            }

            return OpenGraph(data.map { (k, v) ->
                k to v.map { m ->
                    OpenGraphPropertyValue(
                        m.getValue(ROOT_PROPERTY_KEY), m.minus(
                            ROOT_PROPERTY_KEY
                        )
                    )
                }
            }.toMap())
        }
    }
}

private fun  MutableList.lastOrInsertIfEmpty(f: () -> T): T {
    if (isEmpty()) add(f())
    return last()
}

fun Document.getOpenGraphData(): OpenGraph = OpenGraph.from(this)

fun main() {
    val document = Jsoup.parse(
        """

The Rock (1996)































"""
    )


    val og = document.getOpenGraphData()

    println(og)

    println(og.getContent("image"))

    println(og.getAllContent("image"))

    println(og.getStructured("image"))

    println(og.getAllStructured("image"))
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy