
dorkbox.collections.ahoCorasick.FiniteStateMachine.kt Maven / Gradle / Ivy
package dorkbox.collections.ahoCorasick
import java.util.*
/**
* Creates a Finite State Machine for very fast string matching.
*
* This is a wrapper for DoubleArrayTrie, since that class is awkward to use
*/
class FiniteStateMachine(private val trie: DoubleArrayTrie) {
companion object {
fun build(map: Map): FiniteStateMachine {
return FiniteStateMachine(DoubleArrayTrie(map))
}
fun build(strings: List): FiniteStateMachine {
if (strings.isEmpty()) {
throw IllegalArgumentException("strings cannot be empty")
}
val map = TreeMap()
for (key in strings) {
map[key] = java.lang.Boolean.TRUE
}
return build(map)
}
fun build(vararg strings: String): FiniteStateMachine {
if (strings.isEmpty()) {
throw IllegalArgumentException("strings cannot be empty")
}
val map = TreeMap()
for (key in strings) {
map[key] = java.lang.Boolean.TRUE
}
return build(map)
}
// @JvmStatic
// fun main(args: Array) {
// val strings = arrayOf("khanacademy.com", "cnn.com", "google.com", "fun.reddit.com", "reddit.com")
// val keys = Arrays.asList(*strings)
// var text: String
// run {
// val map = TreeMap()
// for (key in keys) {
// map[key] = key
// }
// val fsm: FiniteStateMachine<*> = build(map)
// text = "reddit.google.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// println()
// text = "reddit.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// println()
// text = "fun.reddit.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// }
// println("\n\nTrying with new type\n\n")
// run {
// val fsm: FiniteStateMachine<*> = build(keys)
// text = "reddit.google.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// println()
// text = "reddit.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// println()
// text = "fun.reddit.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// }
// println("\n\nTrying with new type\n\n")
// run {
// val fsm: FiniteStateMachine<*> = build(*strings)
// text = "reddit.google.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// println()
// text = "reddit.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// println()
// text = "fun.reddit.com"
// println("Searching : $text")
// println(fsm.partialMatch(text))
// println("Found: " + fsm.matches(text))
// }
// val fsm: FiniteStateMachine<*> = build(*strings)
// run {
// println("Keywords Orig: " + Arrays.toString(strings))
// println("Keywords FSM : " + Arrays.toString(fsm.getKeywords()))
// }
// }
}
/**
* @return true if this string is exactly contained. False otherwise
*/
fun matches(text: String): Boolean {
return (trie.exactMatchSearch(text) > -1)
}
/**
* Parses text and finds PARTIALLY matching results. For exact matches only it is better to use `matches`
*
* @return a list of outputs that contain matches or partial matches. The returned list will specify HOW MUCH of the text matches (A full match would be from 0 (the start), to N (the length of the text).
*/
fun partialMatch(text: String): List> {
return trie.parseText(text)
}
/**
* Parses text and returns true if there are PARTIALLY matching results. For exact matches only it is better to use `matches`
*
* @return true if there is a match or partial match. "fun.reddit.com" will partially match to "reddit.com"
*/
fun hasPartialMatch(text: String): Boolean {
return trie.parseText(text).isNotEmpty()
}
/**
* Returns the backing keywords IN THEIR NATURAL ORDER, in the case that you need access to the original FSM data.
*
* @return for example, if the FSM was populated with [reddit.com, cnn.com], this will return [cnn.com, reddit.com]
*/
fun getKeywords(): Array {
return trie.v
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy