All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gitlab.mvysny.konsumexml.Konsumer.kt Maven / Gradle / Ivy

There is a newer version: 1.2
Show newest version
package com.gitlab.mvysny.konsumexml

import com.gitlab.mvysny.konsumexml.stax.*
import java.io.Closeable
import javax.xml.namespace.QName
import javax.xml.stream.events.XMLEvent

@DslMarker
@Target(AnnotationTarget.CLASS, AnnotationTarget.TYPE, AnnotationTarget.FUNCTION)
annotation class KonsumerDsl

/**
 * Konsumer exception occurred at [location].
 */
class KonsumerException(@Suppress("MemberVisibilityCanBePrivate") val location: Location, @Suppress("CanBeParameter") val elementName: QName?, msg: String, cause: Throwable? = null) : RuntimeException("$location, in element <$elementName>: $msg", cause)

/**
 * The settings for [Konsumer]. Mutable, can be changed during XML parsing.
 * @property failOnUnconsumedAttributes defaults to false. If true, any unconsumed attributes (attributes not queried by
 * via [AttributeKonsumer.getValue], with the exception of `xml:` and `xmlns:` attributes) will fail the parsing. Enabling this
 * allows stricter validation since unknown attributes will be discovered and reported.
 */
data class KonsumerSettings(var failOnUnconsumedAttributes: Boolean = false) {
    fun newAttributeKonsumer(stax: StaxParser): AttributeKonsumer {
        var k: AttributeKonsumer = StaxAttributeKonsumer(stax)
        if (failOnUnconsumedAttributes) {
            k = AttributeKonsumerWatchdog(k, stax)
        }
        return k
    }
}

/**
 * Upon calling a method, say [child], konsumer reads (consumes) next event from [reader] and matches it to the expected content.
 *
 * All methods throw [KonsumerException] on any parsing error, [javax.xml.stream.XMLStreamException] on any I/O errors and XML parsing errors.
 *
 * It is not necessary to [close] child konsumers - in fact you must not done so. See [close] for more info.
 * @property name the name of the current element we're in (the contents of which we're consuming). If we haven't consumed root element yet then this is null.
 */
@KonsumerDsl
class Konsumer(private val reader: StaxReader, val name: QName?, val settings: KonsumerSettings) : Closeable {
    /**
     * If true then this konsumer has finished reading the contents of its element. This konsumer can no longer be used
     * anymore; the parent konsumer is now allowed to continue consuming.
     */
    private var finished = false
    /**
     * Text contents encountered while crawling through this element. Does not include text contents from subelements.
     */
    private val textContents = StringBuilder()
    private var currentChildKonsumer: Konsumer? = null

    // lazy, so that settings can be changed beforehand
    private val attributesLazy: AttributeKonsumer by lazy(LazyThreadSafetyMode.NONE) { if (name == null) NullElementKonsumer(reader.stax) else settings.newAttributeKonsumer(reader.stax) }
    /**
     * Allows access to the attributes of the element [name]. Only accessible in the beginning, before any of the element
     * contents are enumerated. Accessing attributes later on will cause [IllegalStateException] to be thrown.
     *
     * By default unconsumed attributes (attributes not queried by via [AttributeKonsumer.getValue], with the exception of `xml:` and `xmlns:` attributes) will not fail the parsing;
     * however this can be changed by changing the [KonsumerSettings.failOnUnconsumedAttributes] setting.
     */
    val attributes: AttributeKonsumer
        get() {
            checkNotFinished()
            checkNoChildKonsumer()
            return attributesLazy
        }

    /**
     * The current location in the XML file.
     */
    val location: Location get() = reader.stax.location

    private fun checkNotFinished() {
        check(!finished) { "finished - cannot be used anymore" }
    }

    private fun checkNoChildKonsumer() {
        check(currentChildKonsumer?.finished != false) { "A child konsumer of '${currentChildKonsumer!!.name}' is ongoing, cannot use this consumer of '$name' until the child konsumer finishes" }
    }

    /**
     * Checks that the current element name is [name].
     * @throws KonsumerException if the current element is named differently.
     */
    @KonsumerDsl
    fun checkCurrent(name: String) {
        if (this.name == null) {
            throw KonsumerException(location, this.name, "Expected current element '$name' but I'm not currently in an element")
        }
        if (this.name.localPart != name) {
            throw KonsumerException(location, this.name, "Expected current element '$name' but I'm in '${this.name}'")
        }
    }

    /**
     * Expects the next element is of given [name], and consumes it. Runs a child consumer which
     * is responsible for consuming of the contents of this element. This consumer cannot be used while the child consumer
     * haven't finished consuming of the tokens.
     *
     * The function is able to skip over text, thus supporting mixed text + element contents. Use the [text] function to
     * retrieve the text contents encountered up until now.
     * @throws KonsumerException if the next element is of different name, or there is no next element (the current element ends),
     * or on end-of-stream.
     */
    @KonsumerDsl
    fun  child(name: String, block: Konsumer.() -> T): T {
        val childKonsumer: Konsumer = nextElement(Names.of(name), true)!!
        val result: T = childKonsumer.runProtected(block)
        childKonsumer.finish()
        return result
    }

    private fun  runProtected(block: Konsumer.() -> T): T = try {
        block()
    } catch (e: Exception) {
        if (e is KonsumerException) throw e
        throw KonsumerException(location, name, e.message ?: "", e)
    }

    /**
     * Expects the next element is of given [name], and consumes it; does nothing and returns `null` if there is no such next element.
     * If there is, runs a child consumer which
     * is responsible for consuming of the contents of this element. This consumer cannot be used while the child consumer
     * haven't finished consuming of the tokens.
     *
     * The function is able to skip over text, thus supporting mixed text + element contents. Use the [text] function to
     * retrieve the text contents encountered up until now.
     * @throws KonsumerException if the next element is of different name, or there is no next element (the current element ends),
     * or on end-of-stream.
     */
    @KonsumerDsl
    fun  childOpt(name: String, block: Konsumer.() -> T): T? {
        val childKonsumer = nextElement(Names.of(name)) ?: return null
        val result: T = childKonsumer.runProtected(block)
        childKonsumer.finish()
        return result
    }

    /**
     * Consumes all follow-up [XMLEvent.START_ELEMENT]s up to [maxCount] of an element with given [name]. Runs a child consumer which
     * is responsible for consuming of the contents of this element. This consumer cannot be used while the child consumer
     * haven't finished consuming of the tokens. The consummation stops when another element is encountered or when the
     * current element ends.
     *
     * The function is able to skip over text, thus supporting mixed text + element contents. Use the [text] function to
     * retrieve the text contents encountered up until now.
     *
     * Beware - this function will load all [T] items into memory and return that as a list. If the number of items is
     * potentially huge and they're processed in a stream fashion, try using [childrenSequence] to save a huge chunk of memory.
     *
     * @param minCount minimum elements to match, 0 or greater. If fewer elements are matched, [KonsumerException] is thrown. Defaults to 0.
     * @param maxCount maximum elements to match, 0 or greater. The function returns as soon as this amount is reached, therefore
     * it could leave some elements unconsumed. Defaults to [Int.MAX_VALUE].
     * @throws KonsumerException on end-of-stream, or if fewer elements than [minCount] has been encountered.
     * @return a list of values as returned by [block], one per every element encountered. Contains at most [maxCount] items.
     */
    @KonsumerDsl
    fun  children(name: String, minCount: Int = 0, maxCount: Int = Int.MAX_VALUE, block: Konsumer.() -> T): List =
            children(Names.of(name), minCount, maxCount, block)

    /**
     * Consumes all follow-up [XMLEvent.START_ELEMENT]s up to [maxCount] of an element with any of given [names]. Runs a child consumer which
     * is responsible for consuming of the contents of this element. This consumer cannot be used while the child consumer
     * haven't finished consuming of the tokens. The consummation stops when another element is encountered or when the
     * current element ends.
     *
     * The function is able to skip over text, thus supporting mixed text + element contents. Use the [text] function to
     * retrieve the text contents encountered up until now.
     *
     * Beware - this function will load all [T] items into memory and return that as a list. If the number of items is
     * potentially huge and they're processed in a stream fashion, try using [childrenSequence] to save a huge chunk of memory.
     *
     * @param minCount minimum elements to match, 0 or greater. If fewer elements are matched, [KonsumerException] is thrown. Defaults to 0.
     * @param maxCount maximum elements to match, 0 or greater. The function returns as soon as this amount is reached, therefore
     * it could leave some elements unconsumed. Defaults to [Int.MAX_VALUE].
     * @throws KonsumerException on end-of-stream, or if fewer elements than [minCount] has been encountered.
     * @return a list of values as returned by [block], one per every element encountered. Contains at most [maxCount] items.
     */
    @KonsumerDsl
    fun  children(names: Names, minCount: Int = 0, maxCount: Int = Int.MAX_VALUE, block: Konsumer.() -> T): List {
        if (maxCount == 0) return listOf()
        require(minCount >= 0) { "minCount must be 0 or greater but was $minCount" }
        require(maxCount >= minCount) { "maxCount must be $minCount or greater but was $maxCount" }

        val resultList: MutableList = mutableListOf()

        fun verifyMinCount() {
            if (resultList.size < minCount) {
                throw KonsumerException(location, name, "At least $minCount of element $names was expected, but only ${resultList.size} encountered")
            }
        }

        while (true) {
            val childKonsumer = nextElement(names)
            if (childKonsumer == null) {
                verifyMinCount()
                return resultList
            }
            val result: T = childKonsumer.runProtected(block)
            childKonsumer.finish()
            resultList.add(result)
            if (resultList.size >= maxCount) {
                return resultList
            }
        }
    }

    /**
     * Returns a sequence that lazily polls elements with given [name] from this konsumer, runs [block] and returns the
     * converted items. Intended to be used for stream-processing of XML contents. Uses way less memory than [children]
     * since it only holds one [T] item at any time.
     *
     * The sequence ends when there are no more elements with given [name]. To upper-limit the number of elements returned,
     * use [Sequence.take]. The sequence can only be iterated once. The sequence does not need to be iterated entirely.
     * Iterating the sequence does not close this konsumer.
     *
     * The function is able to skip over text, thus supporting mixed text + element contents. Use the [text] function to
     * retrieve the text contents encountered up until now.
     *
     * @param minCount minimum elements to match, 0 or greater. If fewer elements are matched, [KonsumerException] is thrown. Defaults to 0.
     * @throws KonsumerException on end-of-stream, or if fewer elements than [minCount] has been encountered.
     */
    @KonsumerDsl
    fun  childrenSequence(name: String, minCount: Int = 0, block: Konsumer.() -> T): Sequence = childrenSequence(Names.of(name), minCount, block)

    /**
     * Returns a sequence that lazily polls elements with given [names] from this konsumer, runs [block] and returns the
     * converted items. Intended to be used for stream-processing of XML contents. Uses way less memory than [children]
     * since it only holds one [T] item at any time.
     *
     * The sequence ends when there are no more elements with one of given [names]. To upper-limit the number of elements returned,
     * use [Sequence.take]. The sequence can only be iterated once. The sequence does not need to be iterated entirely.
     * Iterating the sequence fully does not [finish] this konsumer.
     *
     * The function is able to skip over text, thus supporting mixed text + element contents. Use the [text] function to
     * retrieve the text contents encountered up until now.
     *
     * @param minCount minimum elements to match, 0 or greater. If fewer elements are matched, [KonsumerException] is thrown. Defaults to 0.
     * @throws KonsumerException on end-of-stream, or if fewer elements than [minCount] has been encountered.
     */
    @KonsumerDsl
    fun  childrenSequence(names: Names, minCount: Int = 0, block: Konsumer.() -> T): Sequence {

        var elementsEncountered = 0

        fun verifyMinCount() {
            if (elementsEncountered < minCount) {
                throw KonsumerException(location, name, "At least $minCount of element $names was expected, but only $elementsEncountered encountered")
            }
        }

        return generateSequence {
            val childKonsumer = nextElement(names)
            if (childKonsumer == null) {
                verifyMinCount()
                null
            } else {
                val result: T = block(childKonsumer)
                childKonsumer.finish()
                elementsEncountered++
                result
            }
        }.constrainOnce()
    }

    /**
     * A lower-level API intended for "streaming" approach. The function checks if the next element has one of given [names]:
     * * If yes, then a child konsumer is returned, which consumes the contents of that element. Note: you must call [finish] after you're done with the child konsumer.
     * * If not, then `null` is returned and nothing happens.
     *
     * Generally you should use [childrenSequence] if you wish to stream-process the XML.
     * @param requireElement defaults to `false`. If `true`, the function will fail if there is no next element or if the next element has a different name than one of [names].
     * If `true`, the function will never return `null`.
     */
    @KonsumerDsl
    fun nextElement(names: Names, requireElement: Boolean = false): Konsumer? {
        checkNotFinished()
        checkNoChildKonsumer()
        attributes.finalize()

        while (reader.hasNext()) {
            val eventType = reader.next()
            when (eventType) {
                StaxEventType.CData, StaxEventType.Characters -> textContents.append(reader.stax.text!!.trim())
                StaxEventType.EndDocument -> throw KonsumerException(location, name, "Expected element $names but got END_DOCUMENT")
                StaxEventType.EndElement -> {
                    if (requireElement) {
                        throw KonsumerException(location, this.name, "Expected child element $names but got END_ELEMENT")
                    }
                    reader.pushBack()
                    return null
                }
                StaxEventType.StartElement -> {
                    if (!names.accepts(reader.stax.elementName)) {
                        if (requireElement) {
                            throw KonsumerException(location, this.name, "Expected element $names but got '${reader.stax.elementName}'")
                        }
                        reader.pushBack()
                        return null
                    }
                    val konsumer = Konsumer(reader, reader.stax.elementName, settings)
                    currentChildKonsumer = konsumer
                    return konsumer
                }
                else -> throw AssertionError("unexpected $eventType")
            }
        }
        throw KonsumerException(location, this.name, "Expected element $names but got end-of-stream")
    }

    /**
     * Utility method for expecting an element with given [name] and text contents only; the trimmed text contents are
     * returned.
     * @throws KonsumerException if the element is absent, or the element contains child elements.
     */
    @KonsumerDsl
    fun childText(name: String): String = childText(name) { it }

    /**
     * Utility method for expecting an element with given [name] and text contents only; the trimmed text contents are
     * returned.
     * @throws KonsumerException if the element is present and contains child elements.
     */
    @KonsumerDsl
    fun childTextOpt(name: String): String? = childTextOpt(name) { it }

    /**
     * Utility method for expecting a zero-to-infinite elements with given [name] and text contents only; the trimmed text contents are
     * returned.
     * @throws KonsumerException if the [minCount] condition can not be satisfied, or if the elements contains child elements.
     */
    @KonsumerDsl
    fun childrenText(name: String, minCount: Int = 0, maxCount: Int = Int.MAX_VALUE): List = childrenText(name, minCount, maxCount) { it }

    /**
     * Utility method for expecting one element with given [name] and text contents only; the trimmed text contents are
     * ran through given [converter] and returned. If the converter throws, the exception is wrapped in [KonsumerException]
     * with exact location and returned. This is perfect for performing conversion of the value to e.g. Int.
     * @throws KonsumerException if the element is absent, or the conversion fails, or the element contains child elements.
     */
    @KonsumerDsl
    fun  childText(name: String, converter: (String) -> T): T = child(name) { text(converter) }

    /**
     * Utility method for expecting 0..1 elements with given [name] and text contents only; the trimmed text contents are
     * ran through given [converter] and returned. If the converter throws, the exception is wrapped in [KonsumerException]
     * with exact location and returned. This is perfect for performing conversion of the value to e.g. Int.
     *
     * The function does nothing and returns `null` if there is no such next element.
     * @throws KonsumerException if the element is present, and either the conversion fails, or the element contains child elements.
     */
    @KonsumerDsl
    fun  childTextOpt(name: String, converter: (String) -> T): T? = childOpt(name) { text(converter) }

    /**
     * Utility method for expecting a zero-to-infinite elements with given [name] and text contents only; the trimmed text contents are
     * ran through given [converter] and returned. If the converter throws, the exception is wrapped in [KonsumerException]
     * with exact location and returned. This is perfect for performing conversion of the value to e.g. Int.
     * @throws KonsumerException if the [minCount] condition can not be satisfied, or if the elements contains child elements,
     * or if the conversion fails for any of the children elements.
     */
    @KonsumerDsl
    fun  childrenText(name: String, minCount: Int = 0, maxCount: Int = Int.MAX_VALUE, converter: (String) -> T): List = children(name, minCount, maxCount) { text(converter) }

    /**
     * Skims through all further text nodes until [XMLEvent.END_ELEMENT] is encountered,
     * then returns all of the text encountered in this element. Fails if an element is started.
     * @throws KonsumerException if an element is started or end-of-stream is encountered.
     */
    @KonsumerDsl
    fun text(): String {
        checkNotFinished()
        checkNoChildKonsumer()
        attributes.finalize()
        while (reader.hasNext()) {
            val eventType = reader.next()
            when (eventType) {
                StaxEventType.CData, StaxEventType.Characters -> textContents.append(reader.stax.text!!.trim())
                StaxEventType.EndDocument -> throw KonsumerException(location, name, "Expected text but got END_DOCUMENT")
                StaxEventType.StartElement -> throw KonsumerException(location, name, "Expected text but got START_ELEMENT: ${reader.stax.elementName}")
                StaxEventType.EndElement -> {
                    finished = true
                    return textContents.toString()
                }
                else -> throw AssertionError("unexpected $eventType")
            }
        }
        throw KonsumerException(location, name, "Expected text but got end of stream")
    }

    /**
     * Exactly as [text] but runs the text through given [converter]. If the converter fails, the exception is automatically
     * wrapped in [KonsumerException] with appropriate location. Handy for converting values to e.g. integers:
     * ```kotlin
     * text { it.toInt() }
     * ```
     * @throws KonsumerException if the conversion fails, an element is started or end-of-stream is encountered.
     */
    @KonsumerDsl
    fun  text(converter: (String) -> T): T {
        val t: String = text()
        try {
            return converter(t)
        } catch (e: Exception) {
            throw KonsumerException(location, name, "Failed to convert '$t': ${e.message}", e)
        }
    }

    /**
     * Consumes nearest [XMLEvent.END_ELEMENT], to finalize this Konsumer, to allow the parent konsumer to continue.
     * Fails if there are unconsumed events such as [XMLEvent.CDATA], [XMLEvent.CHARACTERS], [XMLEvent.START_ELEMENT].
     *
     * A low-level API, only to be used with the [nextElement] function.
     */
    @KonsumerDsl
    fun finish() {
        if (finished) return
        attributes.finalize()
        if (textContents.isNotBlank()) {
            throw KonsumerException(location, name, "Expected END_ELEMENT but there is unconsumed text: '$textContents'")
        }
        while (reader.hasNext()) {
            val eventType: StaxEventType = reader.next()
            when (eventType) {
                StaxEventType.CData, StaxEventType.Characters -> {
                    if (!reader.stax.text.isNullOrBlank()) {
                        throw KonsumerException(location, name, "Expected END_ELEMENT but there is unconsumed text: '${reader.stax.text}'")
                    }
                }
                StaxEventType.EndDocument -> throw KonsumerException(location, name, "Expected END_ELEMENT but got END_DOCUMENT")
                StaxEventType.StartElement -> throw KonsumerException(location, name, "Expected END_ELEMENT but got START_ELEMENT: '${reader.stax.elementName}'")
                StaxEventType.EndElement -> {
                    finished = true
                    return
                }
                else -> throw AssertionError("unexpected $eventType")
            }
        }
        throw KonsumerException(location, name, "Expected END_ELEMENT but got end of stream")
    }

    /**
     * Closes the underlying reader and makes it impossible to continue with XML parsing.
     *
     * Closing this class closes all underlying resources including the input stream. Konsumer only implements [Closeable]
     * for convenience purpose, so that you can parse the file contents easily, simply by writing:
     * ```kotlin
     * File("in.xml").konsumeXml().use { k ->
     *   k.child("root") {}
     * }
     * ```
     *
     * You must not close child consumers, since that would close the underlying [reader], making any further attempts to
     * read the XML fail.
     */
    override fun close() {
        reader.close()
    }

    /**
     * Skips over the rest of the contents of this element: skips all text nodes, child elements. Note that [finish] still needs
     * to be called on this konsumer.
     *
     * WARNING: This effectively disables validation for the contents of this element, since no child elements are
     * expressed as expected. Please see the README on how to skip contents while having support for validation.
     *
     * See the `README.md` file for examples on how to use this function most effectively.
     */
    @KonsumerDsl
    fun skipContents() {
        checkNotFinished()
        checkNoChildKonsumer()
        attributes.finalize()

        textContents.clear()
        while (reader.hasNext()) {
            val eventType = reader.next()
            when (eventType) {
                StaxEventType.CData, StaxEventType.Characters -> {
                    // skip text
                }
                StaxEventType.EndDocument, StaxEventType.EndElement -> {
                    reader.pushBack()  // so that finish() can be called
                    return
                }
                StaxEventType.StartElement -> {
                    val konsumer = Konsumer(reader, reader.stax.elementName, settings)
                    currentChildKonsumer = konsumer
                    konsumer.skipContents()
                    konsumer.finish()
                }
                else -> throw AssertionError("unexpected $eventType")
            }
        }
        throw KonsumerException(location, this.name, "Expected END_ELEMENT but got end-of-stream")
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy