All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dorkbox.collections.ahoCorasick.FiniteStateMachine.kt Maven / Gradle / Ivy

package dorkbox.collections.ahoCorasick

import java.util.*

/**
 * Creates a Finite State Machine for very fast string matching.
 *
 * This is a wrapper for DoubleArrayTrie, since that class is awkward to use
 */
class FiniteStateMachine(private val trie: DoubleArrayTrie) {
    companion object {
        fun  build(map: Map): FiniteStateMachine {
            return FiniteStateMachine(DoubleArrayTrie(map))
        }

        fun build(strings: List): FiniteStateMachine {
            if (strings.isEmpty()) {
                throw IllegalArgumentException("strings cannot be empty")
            }

            val map = TreeMap()
            for (key in strings) {
                map[key] = java.lang.Boolean.TRUE
            }

            return build(map)
        }

        fun build(vararg strings: String): FiniteStateMachine {
            if (strings.isEmpty()) {
                throw IllegalArgumentException("strings cannot be empty")
            }

            val map = TreeMap()
            for (key in strings) {
                map[key] = java.lang.Boolean.TRUE
            }

            return build(map)
        }

//        @JvmStatic
//        fun main(args: Array) {
//            val strings = arrayOf("khanacademy.com", "cnn.com", "google.com", "fun.reddit.com", "reddit.com")
//            val keys = Arrays.asList(*strings)
//            var text: String
//            run {
//                val map = TreeMap()
//                for (key in keys) {
//                    map[key] = key
//                }
//                val fsm: FiniteStateMachine<*> = build(map)
//                text = "reddit.google.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//                println()
//                text = "reddit.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//                println()
//                text = "fun.reddit.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//            }
//            println("\n\nTrying with new type\n\n")
//            run {
//                val fsm: FiniteStateMachine<*> = build(keys)
//                text = "reddit.google.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//                println()
//                text = "reddit.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//                println()
//                text = "fun.reddit.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//            }
//            println("\n\nTrying with new type\n\n")
//            run {
//                val fsm: FiniteStateMachine<*> = build(*strings)
//                text = "reddit.google.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//                println()
//                text = "reddit.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//                println()
//                text = "fun.reddit.com"
//                println("Searching : $text")
//                println(fsm.partialMatch(text))
//                println("Found: " + fsm.matches(text))
//            }
//            val fsm: FiniteStateMachine<*> = build(*strings)
//            run {
//                println("Keywords Orig: " + Arrays.toString(strings))
//                println("Keywords FSM : " + Arrays.toString(fsm.getKeywords()))
//            }
//        }
    }

    /**
     * @return true if this string is exactly contained. False otherwise
     */
    fun matches(text: String): Boolean {
        return (trie.exactMatchSearch(text) > -1)
    }

    /**
     * Parses text and finds PARTIALLY matching results. For exact matches only it is better to use `matches`
     *
     * @return a list of outputs that contain matches or partial matches. The returned list will specify HOW MUCH of the text matches (A full match would be from 0 (the start), to N (the length of the text).
     */
    fun partialMatch(text: String): List> {
        return trie.parseText(text)
    }

    /**
     * Parses text and returns true if there are PARTIALLY matching results. For exact matches only it is better to use `matches`
     *
     * @return true if there is a match or partial match. "fun.reddit.com" will partially match to "reddit.com"
     */
    fun hasPartialMatch(text: String): Boolean {
        return trie.parseText(text).isNotEmpty()
    }

    /**
     * Returns the backing keywords IN THEIR NATURAL ORDER, in the case that you need access to the original FSM data.
     *
     * @return for example, if the FSM was populated with [reddit.com, cnn.com], this will return [cnn.com, reddit.com]
     */
    fun getKeywords(): Array {
        return trie.v
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy