All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.Kspoon.kt Maven / Gradle / Ivy

Go to download

Annotation based HTML to Kotlin class parser with KMP support, jspoon successor

The newest version!
package dev.burnoo.kspoon

import com.fleeksoft.ksoup.Ksoup
import com.fleeksoft.ksoup.select.Elements
import dev.burnoo.kspoon.configuration.KspoonBuilder
import dev.burnoo.kspoon.configuration.KspoonConfiguration
import dev.burnoo.kspoon.decoder.internal.HtmlTreeDecoder
import dev.burnoo.kspoon.exception.KspoonParseException
import dev.burnoo.kspoon.exception.kspoonError
import kotlinx.serialization.DeserializationStrategy
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerializationException
import kotlinx.serialization.StringFormat
import kotlinx.serialization.descriptors.PrimitiveKind
import kotlinx.serialization.modules.SerializersModule
import kotlinx.serialization.serializer

/**
 * The main entry point to work with HTML parsing.
 * It is typically used by constructing an application-specific instance, with configured behaviour
 * and, if necessary, registered in [SerializersModule] custom serializers.
 * `Kspoon` instance can be configured in its `Kspoon {}` factory function using [KspoonBuilder].
 * For demonstration purposes or trivial usages, Kspoon [companion][Kspoon.Default] can be used instead.
 *
 * Then constructed instance can be used in context of [StringFormat], by using function [Kspoon.toFormat].
 *
 * Example:
 * ```
 * @Serializable
 * data class GitHubProfile(
 *     @Selector("h1", regex = "(.*) - GitHub")
 *     val username: String,
 *     @Selector("img[id=avatar]", attr = "abs:src")
 *     val avatarUrl: String,
 *     @Selector("li")
 *     val interests: List,
 *     @Selector(".pinned-repositories")
 *     val pinnedRepositories: List = emptyList(),
 * )
 *
 * val htmlContent = """
 *     

burnoo - GitHub

* *
    *
  • KMP
  • *
  • Android
  • *
* """ * * val kspoon = Kspoon { * parse = { html -> parse(html, baseUri = "https://github.com/") } * coerceInputValues = true * } * * val profile = kspoon.parse(htmlContent) * ``` * * Kspoon format configuration can be refined using the corresponding constructor: * ``` * val defaultKspoon = Kspoon { * coerceInputValues = true * } * // Will inherit the properties of defaultKspoon * val debugEndpointKspoon = Kspoon(defaultKspoon) { * // coerceInputValues is set to true * defaultTextMode = HtmlTextMode.OuterHtml * } * ``` */ @OptIn(ExperimentalSerializationApi::class) public sealed class Kspoon( @PublishedApi internal val configuration: KspoonConfiguration, ) { /** * The default instance of [Kspoon] with default configuration. * * Example: * ``` * @Serializable * data class Page(@Selector("p") val text: String) * * val html = "

Header

Text

" * val page = Kspoon.parse(html) * // Prints "Text" * println(page.text) * ``` */ public companion object Default : Kspoon(KspoonConfiguration()) /** * Parses the given HTML string [html] to the value of type [T] using deserializer * retrieved from the reified type parameter. * * Example: * ``` * @Serializable * data class Page(@Selector("p") val text: String) * * val html = "

Header

Text

" * val page = Kspoon.parse(html) * // Prints "Text" * println(page.text) * ``` * * @throws KspoonParseException in case of any parsing-specific error * @throws IllegalArgumentException if the parsed input is not a valid instance of [T] */ public inline fun parse(html: String): T = parse(configuration.serializersModule.serializer(), html) /** * Parses the given HTML string [html] to the value of type [T] using the given [deserializer]. * * Example: * ``` * @Serializable * data class Page(@Selector("p") val text: String) * * val html = "

Header

Text

" * val page = Kspoon.parse(Page.serializer(), html) * // Prints "Text" * println(page.text) * ``` * * @throws [SerializationException] in case of any parsing-specific error * @throws [IllegalArgumentException] if the parsed input cannot be represented as a valid instance of type [T] */ public fun parse(deserializer: DeserializationStrategy, html: String): T { if (deserializer.descriptor.kind is PrimitiveKind) { kspoonError("Parsing is not supported for primitive types. Use class instead") } val document = configuration.parse(Ksoup, html) val decoder = HtmlTreeDecoder( elements = Elements(document), configuration = configuration, extraSerializersModule = configuration.serializersModule, ) return decoder.decodeSerializableValue(deserializer) } /** * Converts this [Kspoon] instance to a [StringFormat] instance. This function can be used to make [Kspoon] work * with external libraries that uses serialization formats (e.g. ktor's content negotiation for kotlinx serialization * or Retrofit's kotlinx serialization converter). By default, the [StringFormat.encodeToString] crashes with * [KspoonParseException], but encoding can be delegated to the given [encodeStringFormatDelegate]. * * Example (ktor): * ``` * val client = HttpClient { * install(ContentNegotiation) { * register(ContentType.Text.Html, KotlinxSerializationConverter(Kspoon.toFormat())) * } * } * * val profile = client.get("https://github.com/burnoo").body() */ public fun toFormat(encodeStringFormatDelegate: StringFormat? = null): StringFormat = KspoonFormat( kspoon = this, serializersModule = configuration.serializersModule, encodeStringFormatDelegate = encodeStringFormatDelegate, ) } /** * Creates an instance of [Kspoon] configured from the optionally given [Kspoon instance][from] and adjusted with [builderAction]. * * Example: * ``` * val defaultKspoon = Kspoon { * coerceInputValues = true * } * // Will inherit the properties of defaultKspoon * val debugEndpointKspoon = Kspoon(defaultKspoon) { * // coerceInputValues is set to true * defaultTextMode = HtmlTextMode.OuterHtml * } * ``` */ public fun Kspoon(from: Kspoon = Kspoon.Default, builderAction: KspoonBuilder.() -> Unit): Kspoon { val builder = KspoonBuilder(from) builder.builderAction() val configuration = builder.build() return KspoonImpl(configuration) } private class KspoonImpl(configuration: KspoonConfiguration) : Kspoon(configuration)




© 2015 - 2024 Weber Informatics LLC | Privacy Policy