All Downloads are FREE. Search and download functionalities are using the official Maven repository.

commonMain.com.darkrockstudios.symspellkt.impl.SymSpell.kt Maven / Gradle / Ivy

Go to download

A Kotlin Multiplatform implementation of the SymSpell Spell Checking algorithm.

There is a newer version: 3.1.0
Show newest version
package com.darkrockstudios.symspellkt.impl

import com.darkrockstudios.symspellkt.api.DictionaryHolder
import com.darkrockstudios.symspellkt.api.SpellChecker
import com.darkrockstudios.symspellkt.api.StringDistance
import com.darkrockstudios.symspellkt.common.Composition
import com.darkrockstudios.symspellkt.common.DamerauLevenshteinDistance
import com.darkrockstudios.symspellkt.common.DictionaryItem
import com.darkrockstudios.symspellkt.common.Murmur3HashFunction
import com.darkrockstudios.symspellkt.common.SpellCheckSettings
import com.darkrockstudios.symspellkt.common.SpellHelper
import com.darkrockstudios.symspellkt.common.SuggestionItem
import com.darkrockstudios.symspellkt.common.Verbosity
import com.darkrockstudios.symspellkt.common.addItemSorted
import com.darkrockstudios.symspellkt.exception.SpellCheckException
import com.darkrockstudios.symspellkt.exception.SpellCheckExceptionCode
import kotlin.math.abs
import kotlin.math.log10
import kotlin.math.max
import kotlin.math.min
import kotlin.math.pow

/**
 * Symspell implementation of the Spellchecker
 */
class SymSpell(
	spellCheckSettings: SpellCheckSettings = SpellCheckSettings(),
	stringDistance: StringDistance = DamerauLevenshteinDistance(),
	dictionaryHolder: DictionaryHolder = InMemoryDictionaryHolder(
		spellCheckSettings,
		Murmur3HashFunction()
	),
) : SpellChecker(dictionaryHolder, stringDistance, spellCheckSettings) {
	/**
	 * supports compound aware automatic spelling correction of multi-word input strings with three
	 * cases 1. mistakenly inserted space into a correct word led to two incorrect terms 2. mistakenly
	 * omitted space between two correct words led to one incorrect combined term 3. multiple
	 * independent input terms with/without spelling errors Find suggested spellings for a multi-word
	 * input string (supports word splitting/merging).
	 *
	 * @param word          The string being spell checked.
	 * @param editDistance The maximum edit distance between input and suggested words.
	 * @return A list of [SuggestionItem] object representing suggested correct spellings for
	 * the input string.
	 */
	@Throws(SpellCheckException::class)
	override fun lookupCompound(
		word: String,
		editDistance: Double,
		tokenizeOnWhiteSpace: Boolean
	): List {
		var runningPhrase = word
		if (editDistance > spellCheckSettings.maxEditDistance) {
			throw SpellCheckException(
				SpellCheckExceptionCode.LOOKUP_ERROR,
				"max Edit distance should be less than  global Max i.e" + spellCheckSettings
					.maxEditDistance
			)
		}
		if (runningPhrase.isEmpty()) {
			throw SpellCheckException(
				SpellCheckExceptionCode.LOOKUP_ERROR,
				"Invalid input of string",
			)
		}
		if (spellCheckSettings.lowerCaseTerms) {
			runningPhrase = runningPhrase.lowercase()
		}
		val items = if (tokenizeOnWhiteSpace) {
			SpellHelper.tokenizeOnWhiteSpace(runningPhrase)
		} else {
			arrayOf(runningPhrase)
		}
		var suggestions: MutableList = mutableListOf()
		val suggestionParts: MutableList = mutableListOf()
		var isLastCombi = false

		/*
	      Early exit when in exclusion list
	     */
		if (dictionary.getExclusionItem(runningPhrase)?.isNotEmpty() == true) {
			return SpellHelper
				.earlyExit(
					suggestions,
					dictionary.getExclusionItem(runningPhrase),
					editDistance,
					spellCheckSettings.topK,
					false
				)
		}

		for (i in items.indices) {
			val item = items[i]
			//Normal suggestions
			suggestions = lookup(item, Verbosity.Top, editDistance)

			//combi check, always before split
			if (i > 0 && !isLastCombi
				&& lookupCombineWords(
					item,
					items[i - 1],
					suggestions,
					suggestionParts,
					editDistance,
				)
			) {
				isLastCombi = true
				continue
			}

			isLastCombi = false

			if (
				suggestions.isNotEmpty() &&
				(suggestions[0].distance == 0.0 || item.length == 1)
			) {
				//choose best suggestion
				suggestionParts.add(suggestions[0])
			} else {
				lookupSplitWords(suggestionParts, suggestions, item, editDistance)
			}
		}

		var joinedTerm = ""
		var joinedCount = Double.MAX_VALUE
		for (si in suggestionParts) {
			joinedTerm = joinedTerm + si.term + " "
			joinedCount = min(joinedCount, si.frequency)
		}
		joinedTerm = joinedTerm.trim()
		val dist: Double = stringDistance.getDistance(
			joinedTerm, runningPhrase, 2.0.pow(31.0) - 1.0
		)

		val suggestionItem = SuggestionItem(joinedTerm, dist, joinedCount)
		return listOf(suggestionItem)
	}

	/**
	 * supports compound aware automatic spelling correction of multi-word input strings with
	 * mistakenly omitted space between two correct words led to one incorrect combined term
	 *
	 * @param token           The string being spell checked.
	 * @param previousToken   The string previousToken being spell checked.
	 * @param maxEditDistance The maximum edit distance between input and suggested words.
	 * @param suggestions     Suggestions items List
	 * @param suggestionParts Partial suggestions list.
	 */
	@Throws(SpellCheckException::class)
	private fun lookupCombineWords(
		token: String,
		previousToken: String?,
		suggestions: List,
		suggestionParts: MutableList, maxEditDistance: Double
	): Boolean {
		val suggestionsCombi: List = lookup(
			previousToken + token,
			Verbosity.Top,
			maxEditDistance
		)
		if (suggestionsCombi.isEmpty()) {
			return false
		}
		val best1: SuggestionItem = suggestionParts[suggestionParts.size - 1]
		val best2 = if (suggestions.isNotEmpty()) {
			suggestions[0]
		} else {
			SuggestionItem(
				token,
				maxEditDistance + 1,
				0.0
			)
		}

		val editDistance: Double = stringDistance
			.getDistance(
				"${best1.term} ${best2.term}",
				"$previousToken $token",
				maxEditDistance
			)

		if (editDistance >= 0 && suggestionsCombi[0].distance < editDistance) {
			suggestionsCombi[0].distance = (suggestionsCombi[0].distance + 1)
			suggestionParts.removeAt(suggestionParts.size - 1)
			suggestionParts.add(suggestionsCombi[0])
			return true
		}
		return false
	}


	/**
	 * supports compound aware automatic spelling correction of multi-word input strings with
	 * mistakenly inserted space into a correct word led to two incorrect terms
	 *
	 * @param suggestions     Suggestions items List
	 * @param maxEditDistance The maximum edit distance between input and suggested words.
	 * @param suggestionParts Partial suggestions list.
	 */
	@Throws(SpellCheckException::class)
	private fun lookupSplitWords(
		suggestionParts: MutableList,
		suggestions: List,
		word: String,
		maxEditDistance: Double
	) {
		//if no perfect suggestion, split word into pairs
		var suggestionSplitBest: SuggestionItem? = null
		if (suggestions.isNotEmpty()) {
			suggestionSplitBest = suggestions[0]
		}

		if (word.length <= 1) {
			suggestionParts.add(SuggestionItem(word, maxEditDistance + 1, 0.0))
			return
		}

		for (j in 1 until word.length) {
			val part1 = word.substring(0, j)
			val part2 = word.substring(j)

			val suggestions1 = lookup(
				part1,
				Verbosity.Top,
				maxEditDistance
			)

			if (SpellHelper.continueConditionIfHeadIsSame(suggestions, suggestions1)) {
				continue
			}

			val suggestions2 = lookup(part2, Verbosity.Top, maxEditDistance)

			if (SpellHelper.continueConditionIfHeadIsSame(suggestions, suggestions2)) {
				continue
			}

			val split = suggestions1[0].term + " " + suggestions2[0].term
			var splitDistance = stringDistance.getDistance(word, split, maxEditDistance)
			var count: Double

			if (splitDistance < 0) {
				splitDistance = maxEditDistance + 1
			}

			if (suggestionSplitBest != null) {
				if (splitDistance > suggestionSplitBest.distance) {
					continue
				}
				if (splitDistance < suggestionSplitBest.distance) {
					suggestionSplitBest = null
				}
			}

			val bigramFreq = dictionary.getItemFrequencyBiGram(split)

			//if bigram exists in bigram dictionary
			if (bigramFreq != null) {
				count = bigramFreq

				if (suggestions.isNotEmpty()) {
					if ((suggestions1[0].term + suggestions2[0].term) == word) {
						//make count bigger than count of single term correction
						count = max(count, suggestions[0].frequency + 2)
					} else if ((suggestions1[0].term === suggestions[0].term)
						|| (suggestions2[0].term == suggestions[0].term)
					) {
						//make count bigger than count of single term correction
						count = max(count, suggestions[0].frequency + 1)
					}
				} else if ((suggestions1[0].term + suggestions2[0].term) == word) {
					count = max(
						count,
						max(suggestions1[0].frequency, suggestions2[0].frequency)
					)
				}
			} else {
				count = min(
					spellCheckSettings.bigramCountMin,
					(suggestions1[0].frequency / nMax * suggestions2[0].frequency)
				)
			}

			val suggestionSplit = SuggestionItem(split, splitDistance, count)

			if ((suggestionSplitBest == null) || (suggestionSplit.frequency > suggestionSplitBest.frequency)) {
				suggestionSplitBest = suggestionSplit
			}
		}

		if (suggestionSplitBest != null) {
			suggestionParts.add(suggestionSplitBest)
		} else {
			suggestionParts.add(SuggestionItem(word, maxEditDistance + 1, 0.0))
		}
	}

	/**
	 * @param word          The word being spell checked.
	 * @param verbosity       The value controlling the quantity/closeness of the returned
	 * suggestions
	 * @param editDistance The maximum edit distance between phrase and suggested words.
	 * @return List of [SuggestionItem]
	 */
	@Throws(SpellCheckException::class)
	override fun lookup(
		word: String,
		verbosity: Verbosity,
		editDistance: Double
	): MutableList {
		var curPhrase = word
		var maxEditDistance = editDistance

		if (maxEditDistance > spellCheckSettings.maxEditDistance) {
			throw SpellCheckException(
				SpellCheckExceptionCode.LOOKUP_ERROR,
				"max Edit distance should be less than  global Max i.e" + spellCheckSettings
					.maxEditDistance
			)
		}

		val phraseLen = curPhrase.length
		if (spellCheckSettings.lowerCaseTerms) {
			curPhrase = curPhrase.lowercase()
		}
		var suggestionFrequency: Double
		val consideredDeletes: MutableSet = HashSet()
		val consideredSuggestions: MutableSet = HashSet()
		val suggestionItems: MutableList = ArrayList(spellCheckSettings.topK)

		/*
	      Early exit when in exclusion list
	     */
		val exclusionItem = dictionary.getExclusionItem(curPhrase)
		if (!exclusionItem.isNullOrEmpty()) {
			return SpellHelper
				.earlyExit(
					suggestionItems,
					exclusionItem,
					maxEditDistance,
					spellCheckSettings.topK,
					false
				)
		}

		/*
	    Early exit when word is too big
	     */
		if ((phraseLen - maxEditDistance) > spellCheckSettings.maxLength) {
			return SpellHelper.earlyExit(
				suggestionItems,
				curPhrase,
				maxEditDistance,
				spellCheckSettings.topK,
				spellCheckSettings.ignoreUnknown
			)
		}

		val frequency = dictionary.getItemFrequency(curPhrase)

		if (frequency != null) {
			suggestionFrequency = frequency
			val si = SuggestionItem(curPhrase, 0.0, suggestionFrequency)
			suggestionItems.addItemSorted(si, spellCheckSettings.topK)

			if (verbosity != Verbosity.All) {
				return SpellHelper.earlyExit(
					suggestionItems,
					curPhrase,
					maxEditDistance,
					spellCheckSettings.topK,
					spellCheckSettings.ignoreUnknown
				)
			}
		}

		consideredSuggestions.add(curPhrase)
		var maxEditDistance2 = maxEditDistance
		var phrasePrefixLen: Int = phraseLen
		val candidates: MutableList = ArrayList()

		if (phraseLen > spellCheckSettings.prefixLength) {
			phrasePrefixLen = spellCheckSettings.prefixLength
			candidates.add(curPhrase.substring(0, phrasePrefixLen))
		} else {
			candidates.add(curPhrase)
		}

		while (candidates.isNotEmpty()) {
			val candidate = candidates.removeAt(0)
			val candidateLen = candidate.length
			val lenDiff = phrasePrefixLen - candidateLen

			// Empty candidates cause a bunch of unnecessary suggestions and waste time.
			// I think this is okay? If there are ever problems with single letter word
			// corrections, this could be the culprit, because this causes words like "a"
			// and "I" to be suggested.
			if (candidate.isEmpty()) {
				continue
			}

			/*
      early termination: if candidate distance is already higher than suggestion distance,
      than there are no better suggestions to be expected
       */
			if (lenDiff > maxEditDistance2) {
				if (verbosity == Verbosity.All) {
					continue
				}
				break
			}


			/*
      read candidate entry from dictionary
       */
			val deletes = dictionary.getDeletes(candidate)
			if (deletes != null && deletes.size > 0) {
				for (suggestion in deletes) {
					if (filterOnEquivalance(suggestion, curPhrase, candidate, maxEditDistance2)
						||
						filterOnPrefixLen(
							suggestion.length, spellCheckSettings.prefixLength,
							phrasePrefixLen, candidate.length, maxEditDistance2
						)
					) {
						continue
					}
					/*
            True Damerau-Levenshtein Edit Distance: adjust
                    distance, if both distances>0
                    We allow simultaneous edits (deletes) of
                    max_edit_distance on on both the dictionary and
                    the phrase term. For replaces and adjacent
                    transposes the resulting edit distance stays
                    <= max_edit_distance. For inserts and deletes the
                    resulting edit distance might exceed
                    max_edit_distance. To prevent suggestions of a
                    higher edit distance, we need to calculate the
                    resulting edit distance, if there are
                    simultaneous edits on both sides.
                    Example: (bank==bnak and bank==bink, but
                    bank!=kanb and bank!=xban and bank!=baxn for
                    max_edit_distance=1). Two deletes on each side of
                    a pair makes them all equal, but the first two
                    pairs have edit distance=1, the others edit
                    distance=2.
          */
					var distance: Double
					var minDistance: Int

					if (candidateLen == 0) {
						/*
              suggestions which have no common chars with
                        phrase (phrase_len<=max_edit_distance &&
                        suggestion_len<=max_edit_distance)
            */
						distance = max(phraseLen.toDouble(), suggestion.length.toDouble())
						if (distance > maxEditDistance2 || !consideredSuggestions.add(suggestion)) {
							continue
						}
					} else if (suggestion.length == 1) {
						distance = (if (curPhrase.indexOf(suggestion[0]) < 0) phraseLen else phraseLen - 1).toDouble()
						if (distance > maxEditDistance2 || !consideredSuggestions.add(suggestion)) {
							continue
						}
					} else {
						/*
              handles the shortcircuit of min_distance assignment when first boolean expression
              evaluates to False
             */

						minDistance = getMinDistanceOnPrefixbasis(
							maxEditDistance, candidate,
							curPhrase, suggestion
						)

						if (isDistanceCalculationRequired(
								curPhrase, maxEditDistance, minDistance, suggestion,
								candidate
							)
						) {
							continue
						} else {
							if (verbosity != Verbosity.All
								&& !deleteInSuggestionPrefix(
									candidate, candidateLen,
									suggestion, suggestion.length
								) || !consideredSuggestions
									.add(suggestion)
							) {
								continue
							}
							distance = stringDistance.getDistance(curPhrase, suggestion, maxEditDistance2)
							if (distance < 0) {
								continue
							}
						}
					}

					if (SpellHelper.isLessOrEqualDouble(distance, maxEditDistance2)) {
						suggestionFrequency = dictionary.getItemFrequency(suggestion)!!
						val si = SuggestionItem(suggestion, distance, suggestionFrequency)
						if (suggestionItems.isNotEmpty()) {
							if (verbosity == Verbosity.Closest && distance < maxEditDistance2) {
								suggestionItems.clear()
							} else if (verbosity == Verbosity.Top) {
								if (SpellHelper.isLessDouble(distance, maxEditDistance2)
									|| suggestionFrequency > suggestionItems[0].frequency
								) {
									maxEditDistance2 = distance
									suggestionItems[0] = si
								}
								continue
							}
						}

						if (verbosity != Verbosity.All) {
							maxEditDistance2 = distance
						}
						suggestionItems.addItemSorted(si, spellCheckSettings.topK)
					}
				}
			}

			if (lenDiff < maxEditDistance && candidateLen <= spellCheckSettings.prefixLength) {
				if (verbosity != Verbosity.All && lenDiff >= maxEditDistance2) {
					continue
				}

				for (i in 0 until candidateLen) {
					val delete = candidate.substring(0, i) + candidate.substring(i + 1, candidateLen)
					if (consideredDeletes.add(delete)) {
						candidates.add(delete)
					}
				}
			}
		}

		return suggestionItems
	}

	private fun getMinDistanceOnPrefixbasis(
		maxEditDistance: Double, candidate: String?, phrase: String,
		suggestion: String
	): Int {
		return if ((spellCheckSettings.prefixLength - maxEditDistance) == candidate!!.length.toDouble()) {
			(min(
				phrase.length.toDouble(),
				suggestion.length.toDouble()
			) - spellCheckSettings.prefixLength).toInt()
		} else {
			0
		}
	}

	private fun filterOnPrefixLen(
		suggestionLen: Int,
		prefixLen: Int,
		phrasePrefixLen: Int,
		candidateLen: Int,
		maxEditDistance2: Double
	): Boolean {
		val suggestionPrefixLen = min(suggestionLen.toDouble(), prefixLen.toDouble()).toInt()
		return (suggestionPrefixLen > phrasePrefixLen
				&& (suggestionPrefixLen - candidateLen) > maxEditDistance2)
	}

	private fun filterOnEquivalance(
		delete: String,
		phrase: String,
		candidate: String,
		maxEditDistance2: Double
	): Boolean {
		return (delete == phrase || (abs((delete.length - phrase.length).toDouble()) > maxEditDistance2)
				|| (delete.length < candidate.length) || (delete.length == candidate.length
				&& delete != candidate))
	}

	/**
	 * Check whether all delete chars are present in the suggestion prefix in correct order, otherwise
	 * this is just a hash collision
	 */
	private fun deleteInSuggestionPrefix(
		delete: String,
		deleteLen: Int,
		suggestion: String,
		suggestionLen: Int
	): Boolean {
		var runningSuggestionLen = suggestionLen
		if (deleteLen == 0) {
			return true
		}
		if (spellCheckSettings.prefixLength < runningSuggestionLen) {
			runningSuggestionLen = spellCheckSettings.prefixLength
		}

		var j = 0
		for (i in 0 until deleteLen) {
			val delChar = delete[i]
			while (j < runningSuggestionLen && delChar != suggestion[j]) {
				j++
			}
			if (j == runningSuggestionLen) {
				return false
			}
		}
		return true
	}

	private fun isDistanceCalculationRequired(
		phrase: String,
		maxEditDistance: Double,
		min: Int,
		suggestion: String,
		candidate: String
	): Boolean {
		return (phrase.length - maxEditDistance == candidate.length.toDouble()
				&& (min > 1
				&& phrase.substring(phrase.length + 1 - min) != suggestion.substring(suggestion.length + 1 - min))
				|| (min > 0 && phrase[phrase.length - min] != suggestion[suggestion.length - min] && phrase[phrase.length - min - 1] != suggestion[suggestion.length - min] && phrase[phrase.length - min] != suggestion[suggestion.length - min - 1]))
	}

	/**
	 * word_segmentation` divides a string into words by inserting missing spaces at the appropriate
	 * positions misspelled words are corrected and do not affect segmentation existing spaces are
	 * allowed and considered for optimum segmentation
	 *
	 *
	 * `word_segmentation` uses a novel approach *without* recursion. https://medium.com/@wolfgarbe/fast-word-segmentation-for-noisy-text-2c2c41f9e8da
	 * While each string of length n can be segmented in 2^n−1 possible compositions
	 * https://en.wikipedia.org/wiki/Composition_(combinatorics) `word_segmentation` has a linear
	 * runtime O(n) to find the optimum composition
	 *
	 *
	 * Find suggested spellings for a multi-word input string (supports word splitting/merging).
	 *
	 * @param phrase                    The string being spell checked.
	 * @param maxSegmentationWordLength The maximum word length
	 * @param maxEditDistance           The maximum edit distance
	 * @return The word segmented string
	 */
	@Throws(SpellCheckException::class)
	override fun wordBreakSegmentation(
		phrase: String,
		maxSegmentationWordLength: Int,
		maxEditDistance: Double
	): Composition {
		/*
	    number of all words in the corpus used to generate the
	        frequency dictionary. This is used to calculate the word
	        occurrence probability p from word counts c : p=c/nMax. nMax equals
	        the sum of all counts c in the dictionary only if the
	        dictionary is complete, but not if the dictionary is
	        truncated or filtered
	     */

		var curPhrase = phrase
		if (curPhrase.isEmpty()) {
			return Composition()
		}
		if (spellCheckSettings.lowerCaseTerms) {
			curPhrase = curPhrase.lowercase()
		}

		/*
	      Early exit when in exclusion list
	     */
		val exclusion = dictionary.getExclusionItem(curPhrase)
		if (!exclusion.isNullOrEmpty()) {
			return Composition(curPhrase, dictionary.getExclusionItem(curPhrase))
		}

		val arraySize = min(maxSegmentationWordLength.toDouble(), curPhrase.length.toDouble()).toInt()
		val compositions: Array = arrayOfNulls(arraySize)
		for (i in 0 until arraySize) {
			compositions[i] = Composition()
		}
		var circularIndex = -1
		//outer loop (column): all possible part start positions
		for (j in curPhrase.indices) {
			//inner loop (row): all possible part lengths (from start position): part can't be bigger
			// than longest word in dictionary (other than long unknown word)
			val imax = min((curPhrase.length - j).toDouble(), maxSegmentationWordLength.toDouble())
				.toInt()
			for (i in 1..imax) {
				//get top spelling correction/ed for part
				var part = curPhrase.substring(j, j + i)
				var separatorLength = 0
				var topEd = 0.0
				var topProbabilityLog: Double
				var topResult: String
				if (part[0].isWhitespace()) {
					//remove space for levensthein calculation
					part = part.substring(1)
				} else {
					//add ed+1: space did not exist, had to be inserted
					separatorLength = 1
				}

				//remove space from part1, add number of removed spaces to topEd
				topEd += part.length
				//remove space
				part = part.replace(" ", "")
				//add number of removed spaces to ed
				topEd -= part.length

				val results: List =
					this.lookup(part, Verbosity.Top, maxEditDistance)
				if (results.isNotEmpty()) {
					topResult = results[0].term
					topEd += results[0].distance

					//Naive Bayes Rule
					//we assume the word probabilities of two words to be independent
					//therefore the resulting probability of the word combination is the product of the
					// two word probabilities

					//instead of computing the product of probabilities we are computing the sum of the
					// logarithm of probabilities
					//because the probabilities of words are about 10^-10, the product of many such small
					// numbers could exceed (underflow) the floating number range and become zero
					//log(ab)=log(a)+log(b)
					topProbabilityLog = log10(results[0].frequency / nMax)
				} else {
					topResult = part
					//default, if word not found
					//otherwise long input text would win as long unknown word (with ed=edmax+1 ), although
					// there should many spaces inserted
					topEd += part.length
					topProbabilityLog = log10(10.0 / (nMax * 10.0.pow(part.length.toDouble())))
				}
				val destinationIndex = ((i + circularIndex) % arraySize)

				val destComp =
					compositions[destinationIndex] ?: error("Failed to find destinationIndex: $destinationIndex")

				//set values in first loop
				if (j == 0) {
					destComp.apply {
						segmentedString = part
						correctedString = topResult
						distanceSum = topEd
						logProbSum = topProbabilityLog
					}
				} else {
					val circularComp =
						compositions[circularIndex] ?: error("Failed to find circularIndex: $circularIndex")

					if ((i == maxSegmentationWordLength) //replace values if better probabilityLogSum, if same edit distance OR one
						// space difference
						|| (((circularComp.distanceSum + topEd
								== destComp.distanceSum) || (circularComp.distanceSum + separatorLength + topEd
								== destComp.distanceSum)) && (destComp.logProbSum
								< circularComp.logProbSum + topProbabilityLog)) //replace values if smaller edit distance
						|| (circularComp.distanceSum + separatorLength + topEd
								< destComp.distanceSum)
					) {
						destComp.segmentedString = circularComp.segmentedString + " " + part
						destComp.correctedString = circularComp.correctedString + " " + topResult
						destComp.distanceSum = circularComp.distanceSum + topEd
						destComp.logProbSum = circularComp.logProbSum + topProbabilityLog
					}
				}
			}

			circularIndex++
			if (circularIndex >= arraySize) {
				circularIndex = 0
			}
		}
		return compositions[circularIndex]!!
	}

	override fun createDictionaryEntry(word: String, frequency: Int): Boolean {
		return createDictionaryEntry(word, frequency.toDouble())
	}

	override fun createDictionaryEntry(word: String, frequency: Double): Boolean {
		return dictionary.addItem(DictionaryItem(word, frequency))
	}

	companion object {
		// N equals the sum of all counts c in the dictionary only if the dictionary is complete,
		// but not if the dictionary is truncated or filtered
		private const val nMax = 1024908267229L
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy