All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.server.ConfidenceMapLoader Maven / Gradle / Ivy

The newest version!
/* LanguageTool, a natural language style checker
 * Copyright (C) 2024 Daniel Naber (http://www.danielnaber.de)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool.server;

import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.tools.ConfidenceKey;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static java.nio.charset.StandardCharsets.UTF_8;

class ConfidenceMapLoader {

  private static final Logger logger = LoggerFactory.getLogger(ConfidenceMapLoader.class);

  Map load(File filePattern) throws IOException {
    if (!filePattern.toString().contains("{lang}")) {
      throw new RuntimeException("The 'ruleIdToConfidenceFile' parameter must contain '{lang}' as a placeholder for the language code");
    }
    Map confMap = new HashMap<>();
    for (Language lang : Languages.get()) {
      int loadCount = 0;
      String fileName = filePattern.getAbsolutePath().replace("{lang}", lang.getShortCode());
      if (!Paths.get(fileName).toFile().exists()) {
        continue;
      }
      List lines = Files.readAllLines(Paths.get(fileName), UTF_8);
      for (String line : lines) {
        if (line.startsWith("#")) {
          continue;
        }
        String[] parts = line.split(",");
        if (parts.length >= 2) {   // there might be more columns for better debugging, but we don't use them here
          try {
            float confidence = Float.parseFloat(parts[1]);
            confMap.put(new ConfidenceKey(lang, parts[0]), confidence);
            loadCount++;
          } catch (NumberFormatException e) {
            throw new RuntimeException("Invalid confidence float value in " + fileName + ", expected 'RULE_ID,float_value[,...]': " + line);
          }
        } else {
          throw new RuntimeException("Invalid line in " + fileName + ", expected 'RULE_ID,float_value[,...]': " + line);
        }
      }
      logger.info("Loaded " + loadCount + " mappings for " + lang + " from confidence map for rules from " + fileName);
    }
    if (confMap.size() == 0) {
      throw new RuntimeException("No confidence values could be loaded for " + filePattern +
        " -- please check there are actually files that match this pattern");
    }
    return confMap;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy