All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.com.akuleshov7.ktoml.utils.SpecialCharacters.kt Maven / Gradle / Ivy

/**
 * This file contains utility methods for correct processing of escaped characters.
 * This logic is used for processing of Chars, Literal Strings and basic Strings.
 *
 * In TOML we need properly process escaped symbols like '\t', '\n', unicode symbols and other.
 * For Literal Strings ('') these symbols should be parsed "as is", for basic strings ("") and chars ('')
 * they should be decoded to proper characters.
 */

package com.akuleshov7.ktoml.utils

import com.akuleshov7.ktoml.exceptions.UnknownEscapeSymbolsException

internal const val COMPLEX_UNICODE_LENGTH = 8
internal const val COMPLEX_UNICODE_PREFIX = 'U'
internal const val HEX_RADIX = 16
internal const val SIMPLE_UNICODE_LENGTH = 4
internal const val SIMPLE_UNICODE_PREFIX = 'u'

/**
 * Converting special escaped symbols like newlines, tabs and unicode symbols to proper characters for decoding
 *
 * @param lineNo line number of a string
 * @return returning a string with converted escaped special symbols
 * @throws ParseException if unknown escaped symbols were used
 * @throws UnknownEscapeSymbolsException
 */
public fun String.convertSpecialCharacters(lineNo: Int): String {
    val resultString = StringBuilder()
    var i = 0
    while (i < length) {
        val currentChar = get(i)
        var offset = 1
        if (currentChar == '\\' && i != lastIndex) {
            // Escaped
            val next = get(i + 1)
            offset++
            when (next) {
                't' -> resultString.append('\t')
                'b' -> resultString.append('\b')
                'r' -> resultString.append('\r')
                'n' -> resultString.append('\n')
                'f' -> resultString.append('\u000C')
                '\\' -> resultString.append('\\')
                '\'' -> resultString.append('\'')
                '"' -> resultString.append('"')
                SIMPLE_UNICODE_PREFIX, COMPLEX_UNICODE_PREFIX ->
                    offset += resultString.appendEscapedUnicode(this, next, i + 2, lineNo)

                else -> throw UnknownEscapeSymbolsException("\\$next", lineNo)
            }
        } else {
            resultString.append(currentChar)
        }
        i += offset
    }
    return resultString.toString()
}

/**
 * Escaping and converting unicode symbols for decoding
 *
 * @param fullString
 * @param marker
 * @param codeStartIndex
 * @param lineNo line number of a string
 * @return position of
 * @throws ParseException
 * @throws UnknownEscapeSymbolsException
 */
public fun StringBuilder.appendEscapedUnicode(
    fullString: String,
    marker: Char,
    codeStartIndex: Int,
    lineNo: Int
): Int {
    val nbUnicodeChars = if (marker == SIMPLE_UNICODE_PREFIX) {
        SIMPLE_UNICODE_LENGTH
    } else {
        COMPLEX_UNICODE_LENGTH
    }
    if (codeStartIndex + nbUnicodeChars > fullString.length) {
        val invalid = fullString.substring(codeStartIndex - 1)
        throw UnknownEscapeSymbolsException("\\$invalid", lineNo)
    }
    val hexCode = fullString.substring(codeStartIndex, codeStartIndex + nbUnicodeChars)
    val codePoint = hexCode.toInt(HEX_RADIX)
    try {
        appendCodePointCompat(codePoint)
    } catch (e: IllegalArgumentException) {
        throw UnknownEscapeSymbolsException("\\$marker$hexCode", lineNo)
    }
    return nbUnicodeChars
}

/**
 * Escaping special characters for encoding
 *
 * @return converted string with escaped special symbols
 */
public fun String.escapeSpecialCharacters(): String {
    val withCtrlCharsEscaped = replace(controlCharacterRegex) { match ->
        when (val char = match.value.single()) {
            '\t' -> "\\t"
            '\b' -> "\\b"
            '\n' -> "\\n"
            '\u000C' -> "\\f"
            '\r' -> "\\r"
            else -> {
                val code = char.code

                val hexDigits = code.toString(HEX_RADIX)

                "\\$SIMPLE_UNICODE_PREFIX${
                    hexDigits.padStart(SIMPLE_UNICODE_LENGTH, '0')
                }"
            }
        }
    }

    val withQuotesEscaped = withCtrlCharsEscaped.replace(unescapedDoubleQuoteRegex) { match ->
        match.value.replace("\"", "\\\"")
    }

    return withQuotesEscaped.replace(
        unescapedBackslashRegex,
        Regex.escapeReplacement("\\\\")
    )
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy