All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.korlibs.io.serialization.yaml.Yaml.kt Maven / Gradle / Ivy

package korlibs.io.serialization.yaml

import kotlin.collections.set

object Yaml {
    fun decode(str: String): Any? = read(ListReader(tokenize(str)), level = 0)
    fun read(str: String): Any? = read(ListReader(tokenize(str)), level = 0)

    private fun parseStr(toks: List): Any? {
        if (toks.size == 1 && toks[0] is Token.STR) return toks[0].ustr
        return parseStr(toks.joinToString("") { it.ustr })
    }

    private fun parseStr(str: String) = when (str) {
        "null" -> null
        "true" -> true
        "false" -> false
        else -> str.toIntOrNull() ?: str.toDoubleOrNull() ?: str
    }

    //const val TRACE = true
    const val TRACE = false
    private val EMPTY_SET = setOf()
    private val SET_COMMA_END_ARRAY = setOf(",", "]")

    private fun read(s: ListReader, level: Int): Any? = s.run {
        var list: ArrayList? = null
        var map: MutableMap? = null
        var lastMapKey: String? = null
        var lastMapValue: Any? = null

        val levelStr = if (TRACE) "  ".repeat(level) else ""

        linehandle@ while (s.hasMore) {
            val token = s.peek()
            val line = token as? Token.LINE
            val lineLevel = line?.level
            if (TRACE && line != null) println("${levelStr}LINE($lineLevel)")
            if (lineLevel != null && lineLevel > level) {
                // child level
                val res = read(s, lineLevel)
                if (list != null) {
                    if (TRACE) println("${levelStr}CHILD.list.add: $res")
                    list.add(res)
                } else {
                    if (TRACE) println("${levelStr}CHILD.return: $res")
                    return res
                }
            } else if (lineLevel != null && lineLevel < level) {
                // parent level
                if (TRACE) println("${levelStr}PARENT: level < line.level")
                break
            } else {
                // current level
                if (line != null) s.read()
                if (s.eof) break
                val item = s.peek()
                when (item.str) {
                    "-" -> {
                        if (s.read().str != "-") invalidOp
                        if (list == null) {
                            list = arrayListOf()
                            if (map != null && lastMapKey != null && lastMapValue == null) {
                                map[lastMapKey] = list
                            }
                        }
                        if (TRACE) println("${levelStr}LIST_ITEM...")
                        val res = read(s, level + 1)
                        if (TRACE) println("${levelStr}LIST_ITEM: $res")
                        list.add(res)
                    }
                    "[" -> {
                        if (s.read().str != "[") invalidOp
                        val olist = arrayListOf()
                        array@ while (s.peek().str != "]") {
                            olist += readOrString(s, level, SET_COMMA_END_ARRAY, supportNonSpaceSymbols = false)
                            val p = s.peek().str
                            when (p) {
                                "," -> { s.read(); continue@array }
                                "]" -> break@array
                                else -> invalidOp("Unexpected '$p'")
                            }
                        }
                        if (s.read().str != "]") invalidOp
                        return olist
                    }
                    else -> {
                        val keyIds = s.readId()
                        val sp = s.peekOrNull() ?: Token.EOF
                        if (s.eof || (sp.str != ":" || (sp is Token.SYMBOL && !sp.isNextWhite))) {
                            val key = parseStr(keyIds)
                            if (TRACE) println("${levelStr}LIT: $key")
                            return key
                        } else {
                            val key = parseStr(keyIds).toString()
                            if (map == null) map = LinkedHashMap()
                            if (s.read().str != ":") invalidOp
                            if (TRACE) println("${levelStr}MAP[$key]...")
                            val next = s.peekOrNull()
                            val nextStr = next?.str
                            val hasSpaces = next is Token.SYMBOL && next.isNextWhite
                            val nextIsSpecialSymbol = nextStr == "[" || nextStr == "{" || (nextStr == "-" && hasSpaces)
                            val value = readOrString(s, level, EMPTY_SET, supportNonSpaceSymbols = !nextIsSpecialSymbol)
                            lastMapKey = key
                            lastMapValue = value
                            map[key] = value
                            list = null
                            if (TRACE) println("${levelStr}MAP[$key]: $value")
                        }
                    }
                }
            }
        }

        if (TRACE) println("${levelStr}RETURN: list=$list, map=$map")

        return map ?: list
    }

    private fun ListReader.readId(): List {
        val tokens = arrayListOf()
        while (hasMore) {
            val token = peek()
            if (token is Token.ID || token is Token.STR || ((token is Token.SYMBOL) && token.str == "-") || ((token is Token.SYMBOL) && token.str == ":" && !token.isNextWhite)) {
                tokens.add(token)
                read()
            } else {
                break
            }
        }
        return tokens
    }

    private fun readOrString(s: ListReader, level: Int, delimiters: Set, supportNonSpaceSymbols: Boolean): Any? {
        val sp = s.peek()
        return if (sp is Token.ID || (supportNonSpaceSymbols && sp is Token.SYMBOL && !sp.isNextWhite)) {
            var str = ""
            str@while (s.hasMore) {
                val p = s.peek()
                if (p is Token.LINE) break@str
                if (p.str in delimiters) break@str
                str += s.read().str
            }
            parseStr(str)
        } else {
            read(s, level + 1)
        }
    }

    fun tokenize(str: String): List = StrReader(str.replace("\r\n", "\n")).tokenize()

    private fun StrReader.tokenize(): List {
        val out = arrayListOf()

        val s = this
        var str = ""
        fun flush() {
            if (str.isNotBlank() && str.isNotEmpty()) {
                out += Token.ID(str.trim()); str = ""
            }
        }

        val indents = ArrayList()
        linestart@ while (hasMore) {
            // Line start
            flush()
            val indentStr = readWhile(kotlin.Char::isWhitespace).replace("\t", "     ")
            if (indentStr.contains('\n')) continue@linestart  // ignore empty lines with possible additional indent
            val indent = indentStr.length
            if (indents.isEmpty() || indent > indents.last()) {
                indents += indent
            } else {
                while (indents.isNotEmpty() && indent < indents.last()) indents.removeAt(indents.size - 1)
                if (indents.isEmpty()) invalidOp
            }
            val indentLevel = indents.size - 1
            while (out.isNotEmpty() && out.last() is Token.LINE) out.removeAt(out.size - 1)
            out += Token.LINE(indentStr, indentLevel)
            while (hasMore) {
                val c = read()
                when (c) {
                    ':', '-', '[', ']', ',' -> {
                        flush(); out += Token.SYMBOL("$c", peekChar())
                    }
                    '#' -> {
                        if (str.lastOrNull()?.isWhitespaceFast() == true || (str == "" && out.lastOrNull() is Token.LINE)) {
                            flush(); readUntilLineEnd(); skip(); continue@linestart
                        } else {
                            str += c
                        }
                    }
                    '\n' -> {
                        flush(); continue@linestart
                    }
                    '"', '\'' -> {
                        flush()
                        val last = out.lastOrNull()
                        //println("out=$last, c='$c', reader=$this")
                        if (last is Token.SYMBOL && (last.str == ":" || last.str == "[" || last.str == "{" || last.str == "," || last.str == "-")) {
                            s.unread()
                            //println(" -> c='$c', reader=$this")
                            out += Token.STR(s.readStringLit())
                        } else {
                            str += c
                        }
                    }
                    else -> str += c
                }
            }
        }
        flush()
        return out
    }

    interface Token {
        val str: String
        val ustr get() = str

        object EOF : Token {
            override val str: String = ""
        }

        data class LINE(override val str: String, val level: Int) : Token {
            override fun toString(): String = "LINE($level)"
        }

        data class ID(override val str: String) : Token
        data class STR(override val str: String) : Token {
            override val ustr = str.unquote()
        }

        data class SYMBOL(override val str: String, val next: Char) : Token {
            val isNextWhite: Boolean get() = next == ' ' || next == '\t' || next == '\n' || next == '\r'
        }
    }

    private fun StrReader.readUntilLineEnd() = this.readUntil { it == '\n' }

    private val invalidOp: Nothing get() = throw RuntimeException()
    private fun invalidOp(msg: String): Nothing = throw RuntimeException(msg)

    private class ListReader(val list: List, val ctx: T? = null) {
        class OutOfBoundsException(val list: ListReader<*>, val pos: Int) : RuntimeException()
        var position = 0
        val eof: Boolean get() = position >= list.size
        val hasMore: Boolean get() = position < list.size
        fun peekOrNull(): T? = list.getOrNull(position)
        fun peek(): T = list.getOrNull(position) ?: throw OutOfBoundsException(this, position)
        fun skip(count: Int = 1) = this.apply { this.position += count }
        fun read(): T = peek().apply { skip(1) }
        override fun toString(): String = "ListReader($list)"
    }

    private class StrReader(val str: String, var pos: Int = 0) {
        val length get() = str.length
        val hasMore get() = pos < length

        inline fun skipWhile(f: (Char) -> Boolean) { while (hasMore && f(peek())) skip() }
        fun skipUntil(f: (Char) -> Boolean): Unit = skipWhile { !f(it) }

        // @TODO: https://youtrack.jetbrains.com/issue/KT-29577
        private fun posSkip(count: Int): Int {
            val out = this.pos
            this.pos += count
            return out
        }

        fun skip() = skip(1)
        fun peek(): Char = if (hasMore) this.str[this.pos] else '\u0000'
        fun peekChar(): Char = peek()
        fun read(): Char = if (hasMore) this.str[posSkip(1)] else '\u0000'
        fun unread() = skip(-1)

        fun substr(start: Int, len: Int = length - pos): String {
            val start = (start).coerceIn(0, length)
            val end = (start + len).coerceIn(0, length)
            return this.str.substring(start, end)
        }

        fun skip(count: Int) = this.apply { this.pos += count }
        fun peek(count: Int): String = this.substr(this.pos, count)
        fun read(count: Int): String = this.peek(count).also { skip(count) }

        private inline fun readBlock(callback: () -> Unit): String {
            val start = pos
            callback()
            val end = pos
            return substr(start, end - start)
        }

        fun readWhile(f: (Char) -> Boolean): String = readBlock { skipWhile(f) }
        fun readUntil(f: (Char) -> Boolean): String = readBlock { skipUntil(f) }

        fun readStringLit(reportErrors: Boolean = true): String {
            val out = StringBuilder()
            val quotec = read()
            when (quotec) {
                '"', '\'' -> Unit
                else -> throw RuntimeException("Invalid string literal")
            }
            var closed = false
            while (hasMore) {
                val c = read()
                if (c == '\\') {
                    val cc = read()
                    out.append(
                        when (cc) {
                            '\\' -> '\\'; '/' -> '/'; '\'' -> '\''; '"' -> '"'
                            'b' -> '\b'; 'f' -> '\u000c'; 'n' -> '\n'; 'r' -> '\r'; 't' -> '\t'
                            'u' -> read(4).toInt(0x10).toChar()
                            else -> throw RuntimeException("Invalid char '$cc'")
                        }
                    )
                } else if (c == quotec) {
                    closed = true
                    break
                } else {
                    out.append(c)
                }
            }
            if (!closed && reportErrors) {
                throw RuntimeException("String literal not closed! '${this.str}'")
            }
            return out.toString()
        }

        override fun toString(): String = "StrReader(str=${str.length}, pos=$pos, range='${str.substring(pos.coerceIn(str.indices), (pos + 10).coerceIn(str.indices)).replace("\n", "\\n")}')"
    }

    private fun Char.isWhitespaceFast(): Boolean = this == ' ' || this == '\t' || this == '\r' || this == '\n'
    private fun String.isQuoted(): Boolean = this.startsWith('"') && this.endsWith('"')
    private fun String.unquote(): String = if (isQuoted()) this.substring(1, this.length - 1).unescape() else this
    private fun String.unescape(): String {
        val out = StringBuilder(this.length)
        var n = 0
        while (n < this.length) {
            val c = this[n++]
            when (c) {
                '\\' -> {
                    val c2 = this[n++]
                    when (c2) {
                        '\\' -> out.append('\\')
                        '"' -> out.append('\"')
                        'n' -> out.append('\n')
                        'r' -> out.append('\r')
                        't' -> out.append('\t')
                        'x', 'u' -> {
                            val N = if (c2 == 'u') 4 else 2
                            val chars = this.substring(n, n + N)
                            n += N
                            out.append(chars.toInt(16).toChar())
                        }
                        else -> {
                            out.append("\\$c2")
                        }
                    }
                }
                else -> out.append(c)
            }
        }
        return out.toString()
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy