All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.decoder.internal.HtmlTreeDecoder.kt Maven / Gradle / Ivy

package dev.burnoo.kspoon.decoder.internal

import com.fleeksoft.ksoup.nodes.Document
import com.fleeksoft.ksoup.nodes.Element
import com.fleeksoft.ksoup.select.Elements
import dev.burnoo.kspoon.HtmlTextMode
import dev.burnoo.kspoon.annotation.Selector
import dev.burnoo.kspoon.configuration.KspoonConfiguration
import dev.burnoo.kspoon.decoder.KspoonDecoder
import dev.burnoo.kspoon.exception.KspoonParseException
import dev.burnoo.kspoon.exception.kspoonError
import dev.burnoo.kspoon.serializer.DocumentSerializer
import dev.burnoo.kspoon.serializer.ElementSerializer
import dev.burnoo.kspoon.serializer.ElementsSerializer
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.descriptors.SerialDescriptor
import kotlinx.serialization.descriptors.StructureKind
import kotlinx.serialization.descriptors.elementNames
import kotlinx.serialization.encoding.CompositeDecoder
import kotlinx.serialization.modules.EmptySerializersModule
import kotlinx.serialization.modules.SerializersModule
import kotlinx.serialization.modules.contextual
import kotlinx.serialization.modules.overwriteWith
import kotlin.contracts.ExperimentalContracts
import kotlin.contracts.contract

@OptIn(ExperimentalSerializationApi::class)
internal class HtmlTreeDecoder(
    private val elements: Elements,
    private val configuration: KspoonConfiguration,
    extraSerializersModule: SerializersModule = EmptySerializersModule(),
    private val tagHierarchy: List = emptyList(),
) : TaggedDecoder(), KspoonDecoder {

    private val textMode = configuration.defaultTextMode
    private val coerceInputValues: Boolean = configuration.coerceInputValues

    override val serializersModule = SerializersModule {
        contextual(ElementSerializer)
        contextual(ElementsSerializer)
        contextual(DocumentSerializer)
    } overwriteWith extraSerializersModule
    private var elementIndex = 0

    override fun SerialDescriptor.getTag(index: Int): HtmlTag {
        val selectorAnnotation = getElementSelectorAnnotation(index)
        val newIndex = if (selectorAnnotation == null) {
            getElementName(index).toIntOrNull()
        } else {
            null
        }
        return when {
            selectorAnnotation != null -> selectorAnnotation.toHtmlTag()
            newIndex != null -> HtmlTag.Index(newIndex)
            else -> kspoonError(
                "Selector annotation not found for ${getElementDescriptor(index).serialName}," +
                    " parent selector: ${getSelectorFullPath(tag = null)}",
            )
        }
    }

    private fun SerialDescriptor.getElementSelectorAnnotation(index: Int): Selector? {
        val annotations = getElementAnnotations(index) + getElementDescriptor(index).annotations
        return annotations.filterIsInstance().firstOrNull()
    }

    override fun decodeElementIndex(descriptor: SerialDescriptor): Int {
        val isList = descriptor.kind == StructureKind.LIST
        val maxCount = if (isList) elements.size else descriptor.elementsCount
        while (shouldCoerceInputValue(maxCount, descriptor)) {
            elementIndex++
        }
        if (elementIndex == maxCount) return CompositeDecoder.DECODE_DONE
        return elementIndex++
    }

    private fun shouldCoerceInputValue(maxCount: Int, descriptor: SerialDescriptor): Boolean {
        // ensure coerceInputValues from config is enabled
        if (!coerceInputValues) return false
        // ensure current structure hasn't ended
        if (elementIndex == maxCount) return false
        // ensure current structure is a class (and not a list)
        if (descriptor.kind != StructureKind.CLASS) return false
        // check if the element is optional - if the instance can be created with Kotlin default value
        if (!descriptor.isElementOptional(0)) return false
        val tag = descriptor.getTag(elementIndex)
        if (tag !is HtmlTag.Selector) return false
        // check if default value was set in selector - then we skip selecting element here, as default value
        // will be returned instead
        if (tag.defaultValue != null) return false
        // finally checking if the selected element is empty
        return selectElement(tag) == null
    }

    override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder {
        val tag = currentTagOrNull ?: return this
        val selectedElements = selectElements(tag)
        return HtmlTreeDecoder(selectedElements, configuration, tagHierarchy = tagHierarchy + tag)
    }

    override fun decodeCollectionSize(descriptor: SerialDescriptor): Int = elements.size

    override fun decodeTaggedNotNullMark(tag: HtmlTag): Boolean {
        if (tag is HtmlTag.Index) return true
        if (tag is HtmlTag.Selector && tag.defaultValue != null) return true
        return selectElement(tag) != null
    }

    override fun decodeSequentially() = !configuration.coerceInputValues

    override fun decodeTaggedLong(tag: HtmlTag) = getTextAndMap(tag) { toLong() }
    override fun decodeTaggedShort(tag: HtmlTag) = getTextAndMap(tag) { toShort() }
    override fun decodeTaggedByte(tag: HtmlTag) = getTextAndMap(tag) { toByte() }
    override fun decodeTaggedFloat(tag: HtmlTag) = getTextAndMap(tag) { toFloat() }
    override fun decodeTaggedDouble(tag: HtmlTag) = getTextAndMap(tag) { toDouble() }
    override fun decodeTaggedBoolean(tag: HtmlTag) = getTextAndMap(tag) { toBoolean() }
    override fun decodeTaggedChar(tag: HtmlTag) = getText(tag).first()
    override fun decodeTaggedInt(tag: HtmlTag) = getTextAndMap(tag) { toInt() }
    override fun decodeTaggedString(tag: HtmlTag) = getText(tag)
    override fun decodeTaggedEnum(tag: HtmlTag, enumDescriptor: SerialDescriptor): Int {
        val text = getText(tag)
        val index = enumDescriptor.elementNames.indexOfFirst { it == text }
        return if (index == -1) {
            kspoonError(
                "Can't parse value '$text' for enum '${enumDescriptor.serialName}' at selector: ${getSelectorFullPath(tag)}",
            )
        } else index
    }

    private inline fun  getTextAndMap(tag: HtmlTag, map: String.() -> T): T {
        val text = getText(tag)
        return try {
            text.map()
        } catch (e: Throwable) {
            kspoonError("Error while converting 'text' to '${T::class}' for selector ${getSelectorFullPath(tag)}", e)
        }
    }

    private fun selectElements(tag: HtmlTag): Elements {
        return when (tag) {
            is HtmlTag.Selector -> elements.select(tag.selector)
            is HtmlTag.Index -> elements.getAtAsElements(tag.index)
        }
    }

    @OptIn(ExperimentalContracts::class)
    private fun selectElement(tag: HtmlTag): Element? {
        contract { returns(null) implies (tag is HtmlTag.Selector) }
        return when (tag) {
            is HtmlTag.Selector -> elements.select(tag.selector).getOrNull(tag.index)
            is HtmlTag.Index -> elements[tag.index]
        }
    }

    private fun getText(tag: HtmlTag, currentTextMode: HtmlTextMode = textMode): String {
        return try {
            val element = selectElementOrThrow(tag)
            val attribute = (tag as? HtmlTag.Selector)?.attribute
            val textModeFromSelector = (tag as? HtmlTag.Selector)?.textMode
            if (attribute != null) {
                element.attr(attribute)
            } else when (textModeFromSelector ?: currentTextMode) {
                HtmlTextMode.Text -> element.text()
                HtmlTextMode.InnerHtml -> element.html()
                HtmlTextMode.OuterHtml -> element.outerHtml()
                HtmlTextMode.Data -> element.data()
            }.withRegexIfPresent(tag)
        } catch (e: Exception) {
            if (tag is HtmlTag.Selector && tag.defaultValue != null) return tag.defaultValue
            if (e is KspoonParseException) {
                throw e
            } else {
                kspoonError("Error getting text for selector: ${getSelectorFullPath(tag)}", e)
            }
        }
    }

    private fun selectElementOrThrow(tag: HtmlTag): Element {
        return when (val element = selectElement(tag)) {
            null -> kspoonError("Element not found for selector: ${getSelectorFullPath(tag)}")
            else -> element
        }
    }

    private fun String.withRegexIfPresent(tag: HtmlTag): String {
        if (tag !is HtmlTag.Selector) return this
        if (tag.regex == null) return this
        val matchResult = tag.regex.find(this)
            ?: kspoonError("Regex '${tag.regex}' not found for current selector: ${getSelectorFullPath(tag)}")
        return if (matchResult.groupValues.size > 1) matchResult.groupValues[1] else matchResult.value
    }

    private fun Elements.getAtAsElements(index: Int) = getOrNull(index)?.let(::Elements) ?: Elements()

    private fun getSelectorFullPath(tag: HtmlTag?) = (tagHierarchy + tag)
        .filterNotNull()
        .joinToString(" -> ", prefix = "[", postfix = "]")

    // KspoonDecoder implementation
    override fun decodeElement(): Element? = selectElement(tag = currentTag)

    override fun decodeElementOrThrow(): Element = selectElementOrThrow(tag = currentTag)

    override fun decodeElements(): Elements = selectElements(tag = currentTag)

    override fun decodeDocument() = elements.firstOrNull() as? Document
        ?: kspoonError("Current Element is not a Document. Document type works only on root")

    override fun getSelectorFullPath(): String = getSelectorFullPath(tag = currentTagOrNull)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy