All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.mightguy.spellcheckconsole.SpellCheckerConsole Maven / Gradle / Ivy

package io.github.mightguy.spellcheckconsole;

import io.github.mightguy.spellcheck.symspell.api.DataHolder;
import io.github.mightguy.spellcheck.symspell.common.Composition;
import io.github.mightguy.spellcheck.symspell.common.DictionaryItem;
import io.github.mightguy.spellcheck.symspell.common.Murmur3HashFunction;
import io.github.mightguy.spellcheck.symspell.common.SpellCheckSettings;
import io.github.mightguy.spellcheck.symspell.common.SuggestionItem;
import io.github.mightguy.spellcheck.symspell.common.WeightedDamerauLevenshteinDistance;
import io.github.mightguy.spellcheck.symspell.exception.SpellCheckException;
import io.github.mightguy.spellcheck.symspell.impl.InMemoryDataHolder;
import io.github.mightguy.spellcheck.symspell.impl.SymSpellCheck;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class SpellCheckerConsole {

  private DataHolder dataHolder;
  private SymSpellCheck symSpellCheck;


  private boolean init() throws IOException, SpellCheckException {
    ClassLoader classLoader = SpellCheckerConsole.class.getClassLoader();

    SpellCheckSettings spellCheckSettings = SpellCheckSettings.builder().build();

    WeightedDamerauLevenshteinDistance weightedDamerauLevenshteinDistance =
        new WeightedDamerauLevenshteinDistance(spellCheckSettings.getDeletionWeight(),
            spellCheckSettings.getInsertionWeight(), spellCheckSettings.getReplaceWeight(),
            spellCheckSettings.getTranspositionWeight(), null);
    dataHolder = new InMemoryDataHolder(spellCheckSettings, new Murmur3HashFunction());

    symSpellCheck = new SymSpellCheck(dataHolder, weightedDamerauLevenshteinDistance,
        spellCheckSettings);
    loadUniGramFile(
        new File(classLoader.getResource("frequency_dictionary_en_82_765.txt").getFile()));
    loadBiGramFile(
        new File(classLoader.getResource("frequency_bigramdictionary_en_243_342.txt").getFile()));

    return false;
  }

  private void loadUniGramFile(File file) throws IOException, SpellCheckException {
    try (BufferedReader br = new BufferedReader(new FileReader(file))) {
      String line;
      while ((line = br.readLine()) != null) {
        String[] arr = line.split("\\s+");
        dataHolder.addItem(new DictionaryItem(arr[0], Double.parseDouble(arr[1]), -1.0));
      }
    }
  }

  private void loadBiGramFile(File file) throws IOException, SpellCheckException {
    try (BufferedReader br = new BufferedReader(new FileReader(file))) {
      String line;
      while ((line = br.readLine()) != null) {
        String[] arr = line.split("\\s+");
        dataHolder
            .addItem(new DictionaryItem(arr[0] + " " + arr[1], Double.parseDouble(arr[2]), -1.0));
      }
    }
  }

  private void suggestItem(String[] args) throws IOException, SpellCheckException {
    if (args.length > 0) {
      suggestItemOnArgs(args[0]);
    } else {
      suggestItemOnConsole();
    }

  }

  private void suggestItemOnArgs(String inputTerm) throws SpellCheckException {
    List suggestions = symSpellCheck.lookup(inputTerm);
    SuggestionItem compound = symSpellCheck.lookupCompound(inputTerm).get(0);
    Composition composition = symSpellCheck.wordBreakSegmentation(inputTerm, 10, 2);

    suggestions.stream()
        .limit(10)
        .forEach(suggestion -> log.info(
            "Lookup suggestion: "
                + suggestion.getTerm() + " "
                + suggestion.getDistance() + " "
                + suggestion.getCount()));
    log.info("LookupCompound: " + compound.getTerm());
    log.info("Composition: " + composition.getCorrectedString());
  }

  private void suggestItemOnConsole() throws IOException, SpellCheckException {
    String inputTerm;
    BufferedReader reader =
        new BufferedReader(new InputStreamReader(System.in));
    while (true) {
      log.info("Please enter the term to get the suggest Item");
      inputTerm = reader.readLine();
      if (inputTerm.equalsIgnoreCase("q")) {
        return;
      }
      suggestItemOnArgs(inputTerm);
    }
  }

  public static void main(String[] args) throws IOException, SpellCheckException {
    SpellCheckerConsole spellCheckerConsole = new SpellCheckerConsole();
    spellCheckerConsole.init();
    spellCheckerConsole.suggestItem(args);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy