All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.com.caesarealabs.searchit.impl.QueryTokenizer.kt Maven / Gradle / Ivy

The newest version!
package com.caesarealabs.searchit.impl

import com.github.michaelbull.result.Err
import com.github.michaelbull.result.Ok
import com.github.michaelbull.result.Result

private const val Quote = '\"'
private const val Backslash = '\\'

internal object QueryTokenizer {

    fun tokenize(query: String): Result, String> {
        val trimmed = query.trim()
        if (trimmed == "") return Ok(listOf())
        return tokenizeImpl(trimmed)
    }

    // Handles parsing of "" and \
    private fun tokenizeImpl(query: String): Result, String> {
        val tokens = mutableListOf()
        var currentString = StringBuilder()
        // If we are within quotes (""), treat everything literally
        var openedQuote = false
        // If we just did : we need to treat the next thing as the value of that key
        var specifiedKey: String? = null

        fun terminateExpression(allowOperators: Boolean) {
            if (currentString.isNotEmpty()) {
                val string = currentString.toString()
                val token = when {
                    specifiedKey != null -> {
                        // There's a ":" - treat it as a key value
                        val kv = QueryToken.KeyValue(specifiedKey!!, string)
                        specifiedKey = null
                        kv
                    }
                    // Allow operators - check for operators as well
                    allowOperators -> resolveTokenFromExpression(string)
                    // No operators - it's a raw string
                    else -> QueryToken.Raw(string)
                }
                tokens.add(token)
                currentString = StringBuilder()
            }
        }


        for (i in query.indices) {
            val char = query[i]
            when {
                char == Quote -> {
                    val escaped = i != 0 && query[i - 1] == Backslash
                    if (escaped) {
                        // Quote escaped - add it literally
                        currentString.append(char)
                    } else {
                        if (openedQuote) {
                            // Quote end - add everything raw
                            terminateExpression(allowOperators = false)
                            openedQuote = false
                        } else {
                            // Quote start - terminate previous expression and mark as start of quote
                            terminateExpression(allowOperators = true)
                            openedQuote = true
                        }
                    }
                }

                char == Backslash && i != query.length - 1 && query[i + 1] == Quote -> {
                    // Backslash before quote - do nothing. The quote will make sure to have special logic regarding this backslash (above code)
                }

                openedQuote -> {
                    // Within a quote - add it literally
                    currentString.append(char)
                }

                char == ':' -> {
                    if (specifiedKey != null) {
                        return Err("Key-value expression was specified with ':' but no value was given")
                    }
                    // Specified key - mark it
                    specifiedKey = if (currentString.isNotEmpty()) {
                        // : attached to something directly - use it
                        val str = currentString.toString()
                        currentString = StringBuilder()
                        str
                    } else {
                        // Try to get the previous token as a string
                        if (tokens.isEmpty()) {
                            return Err("Key-value expression was specified with ':' but no key was given")
                        } else {
                            val lastToken = tokens.last()
                            if (lastToken !is QueryToken.Raw) {
                                return Err("Non-literal expression given as key with ':'. Use quotes if needed")
                            }
                            // Last value was raw - use its text, but exclude it from the list because it should part of the key-value
                            tokens.removeLast()
                            lastToken.text
                        }
                    }

                }
                // Space - terminate expression
                char == ' ' -> terminateExpression(allowOperators = true)
                char == '(' || char == ')' -> {
                    // Parentheses - terminate expression and add token
                    terminateExpression(allowOperators = true)
                    if (specifiedKey != null) {
                        return Err("Parentheses can't be used as value of ':' expression")
                    }

                    tokens.add(
                        when (char) {
                            '(' -> QueryToken.Parentheses.Opening
                            ')' -> QueryToken.Parentheses.Closing
                            else -> error("Impossible")
                        }
                    )
                }

                else -> {
                    // Not a special character - add it literally
                    currentString.append(char)
                }
            }
        }

        // Add what is left
        terminateExpression(allowOperators = true)

        if (openedQuote) {
            return Err("A quote was not closed")
        } else if (specifiedKey != null) {
            return Err("Key-value expression was specified with ':' but no value was given")
        } else {
            return Ok(tokens)
        }

    }

    private fun resolveTokenFromExpression(expression: String) = when (expression.lowercase()) {
        "and" -> QueryToken.Operator.And
        "or" -> QueryToken.Operator.Or
        "not" -> QueryToken.Operator.Not
        else -> QueryToken.Raw(expression)
    }

}

public sealed interface QueryToken {
    public enum class Operator : QueryToken {
        Or, And, Not
    }

    public sealed interface WithContent : QueryToken

    public enum class Parentheses : QueryToken {
        Opening, Closing
    }

    public data class KeyValue(val key: String, val value: String) : WithContent {
        override fun toString(): String = "$key:$value"
    }

    public data class Raw(val text: String) : WithContent {
        override fun toString(): String = text
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy