Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.languagetool.server.TextChecker Maven / Gradle / Ivy
/* LanguageTool, a natural language style checker
* Copyright (C) 2016 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.server;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.sun.net.httpserver.HttpExchange;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.jetbrains.annotations.NotNull;
import org.languagetool.*;
import org.languagetool.gui.Configuration;
import org.languagetool.language.LanguageIdentifier;
import org.languagetool.markup.AnnotatedText;
import org.languagetool.rules.CategoryId;
import org.languagetool.rules.RuleMatch;
import org.languagetool.tools.Tools;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.*;
import static org.languagetool.server.ServerTools.print;
/**
* @since 3.4
*/
abstract class TextChecker {
protected abstract void setHeaders(HttpExchange httpExchange);
protected abstract String getResponse(AnnotatedText text, DetectedLanguage lang, Language motherTongue, List matches,
List hiddenMatches, String incompleteResultReason);
@NotNull
protected abstract List getPreferredVariants(Map parameters);
protected abstract DetectedLanguage getLanguage(String text, Map parameters, List preferredVariants);
protected abstract boolean getLanguageAutoDetect(Map parameters);
@NotNull
protected abstract List getEnabledRuleIds(Map parameters);
@NotNull
protected abstract List getDisabledRuleIds(Map parameters);
protected static final int CONTEXT_SIZE = 40; // characters
protected final HTTPServerConfig config;
private static final String ENCODING = "UTF-8";
private static final int CACHE_STATS_PRINT = 500; // print cache stats every n cache requests
private final Map languageCheckCounts = new HashMap<>();
private final boolean internalServer;
private Queue workQueue;
private RequestCounter reqCounter;
private final LanguageIdentifier identifier;
private final ExecutorService executorService;
private final ResultCache cache;
private final DatabaseLogger logger;
private final Long logServerId;
TextChecker(HTTPServerConfig config, boolean internalServer, Queue workQueue, RequestCounter reqCounter) {
this.config = config;
this.internalServer = internalServer;
this.workQueue = workQueue;
this.reqCounter = reqCounter;
this.identifier = new LanguageIdentifier();
this.identifier.enableFasttext(config.getFasttextBinary(), config.getFasttextModel());
this.executorService = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("lt-textchecker-thread-%d").build());
this.cache = config.getCacheSize() > 0 ? new ResultCache(config.getCacheSize()) : null;
this.logger = DatabaseLogger.getInstance();
if (logger.isLogging()) {
this.logServerId = DatabaseAccess.getInstance().getOrCreateServerId();
} else {
this.logServerId = null;
}
}
void shutdownNow() {
executorService.shutdownNow();
}
void checkText(AnnotatedText aText, HttpExchange httpExchange, Map parameters, ErrorRequestLimiter errorRequestLimiter,
String remoteAddress) throws Exception {
checkParams(parameters);
long timeStart = System.currentTimeMillis();
UserLimits limits = ServerTools.getUserLimits(parameters, config);
// logging information
String agent = parameters.get("useragent") != null ? parameters.get("useragent") : "-";
Long agentId = null, userId = null;
if (logger.isLogging()) {
DatabaseAccess db = DatabaseAccess.getInstance();
agentId = db.getOrCreateClientId(parameters.get("useragent"));
userId = limits.getPremiumUid();
}
String referrer = httpExchange.getRequestHeaders().getFirst("Referer");
String userAgent = httpExchange.getRequestHeaders().getFirst("User-Agent");
if (aText.getPlainText().length() > limits.getMaxTextLength()) {
String msg = "limit: " + limits.getMaxTextLength() + ", size: " + aText.getPlainText().length();
logger.log(new DatabaseAccessLimitLogEntry("MaxCharacterSizeExceeded", logServerId, agentId, userId, msg, referrer, userAgent));
throw new TextTooLongException("Your text exceeds the limit of " + limits.getMaxTextLength() +
" characters (it's " + aText.getPlainText().length() + " characters). Please submit a shorter text.");
}
UserConfig userConfig = new UserConfig(
limits.getPremiumUid() != null ? getUserDictWords(limits.getPremiumUid()) : Collections.emptyList(),
new HashMap<>(), config.getMaxSpellingSuggestions());
//print("Check start: " + text.length() + " chars, " + langParam);
boolean autoDetectLanguage = getLanguageAutoDetect(parameters);
List preferredVariants = getPreferredVariants(parameters);
DetectedLanguage detLang = getLanguage(aText.getPlainText(), parameters, preferredVariants);
Language lang = detLang.getGivenLanguage();
Integer count = languageCheckCounts.get(lang.getShortCodeWithCountryAndVariant());
if (count == null) {
count = 1;
} else {
count++;
}
//print("Starting check: " + aText.getPlainText().length() + " chars, #" + count);
String motherTongueParam = parameters.get("motherTongue");
Language motherTongue = motherTongueParam != null ? Languages.getLanguageForShortCode(motherTongueParam) : null;
boolean useEnabledOnly = "yes".equals(parameters.get("enabledOnly")) || "true".equals(parameters.get("enabledOnly"));
List enabledRules = getEnabledRuleIds(parameters);
List disabledRules = getDisabledRuleIds(parameters);
List enabledCategories = getCategoryIds("enabledCategories", parameters);
List disabledCategories = getCategoryIds("disabledCategories", parameters);
if ((disabledRules.size() > 0 || disabledCategories.size() > 0) && useEnabledOnly) {
throw new IllegalArgumentException("You cannot specify disabled rules or categories using enabledOnly=true");
}
if (enabledRules.size() == 0 && enabledCategories.size() == 0 && useEnabledOnly) {
throw new IllegalArgumentException("You must specify enabled rules or categories when using enabledOnly=true");
}
boolean useQuerySettings = enabledRules.size() > 0 || disabledRules.size() > 0 ||
enabledCategories.size() > 0 || disabledCategories.size() > 0;
boolean allowIncompleteResults = "true".equals(parameters.get("allowIncompleteResults"));
boolean enableHiddenRules = "true".equals(parameters.get("enableHiddenRules"));
JLanguageTool.Mode mode = ServerTools.getMode(parameters);
QueryParams params = new QueryParams(enabledRules, disabledRules, enabledCategories, disabledCategories,
useEnabledOnly, useQuerySettings, allowIncompleteResults, enableHiddenRules, mode);
Long textSessionId = null;
try {
if (parameters.containsKey("textSessionId")) {
textSessionId = Long.valueOf(parameters.get("textSessionId"));
}
} catch(NumberFormatException ignored) {
}
int textSize = aText.getPlainText().length();
List ruleMatchesSoFar = Collections.synchronizedList(new ArrayList<>());
Future> future = executorService.submit(new Callable>() {
@Override
public List call() throws Exception {
// use to fake OOM in thread for testing:
/*if (Math.random() < 0.1) {
throw new OutOfMemoryError();
}*/
return getRuleMatches(aText, lang, motherTongue, params, userConfig, f -> ruleMatchesSoFar.add(f));
}
});
String incompleteResultReason = null;
List matches;
if (limits.getMaxCheckTimeMillis() < 0) {
matches = future.get();
} else {
try {
matches = future.get(limits.getMaxCheckTimeMillis(), TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
future.cancel(true);
if (ExceptionUtils.getRootCause(e) instanceof ErrorRateTooHighException) {
logger.log(new DatabaseCheckErrorLogEntry("ErrorRateTooHigh", logServerId, agentId, userId, lang, detLang.getDetectedLanguage(), textSize, "matches: " + ruleMatchesSoFar.size()));
}
if (params.allowIncompleteResults && ExceptionUtils.getRootCause(e) instanceof ErrorRateTooHighException) {
print(e.getMessage() + " - returning " + ruleMatchesSoFar.size() + " matches found so far. Detected language: " + detLang);
matches = new ArrayList<>(ruleMatchesSoFar); // threads might still be running, so make a copy
incompleteResultReason = "Results are incomplete: " + ExceptionUtils.getRootCause(e).getMessage();
} else if (e.getCause() != null && e.getCause() instanceof OutOfMemoryError) {
throw (OutOfMemoryError)e.getCause();
} else {
throw new RuntimeException(e.getMessage() + ", detected: " + detLang, e);
}
} catch (TimeoutException e) {
boolean cancelled = future.cancel(true);
Path loadFile = Paths.get("/proc/loadavg"); // works in Linux only(?)
String loadInfo = loadFile.toFile().exists() ? Files.readAllLines(loadFile).toString() : "(unknown)";
if (errorRequestLimiter != null) {
errorRequestLimiter.logAccess(remoteAddress);
}
String message = "Text checking took longer than allowed maximum of " + limits.getMaxCheckTimeMillis() +
" milliseconds (cancelled: " + cancelled +
", lang: " + lang.getShortCodeWithCountryAndVariant() +
", detected: " + detLang +
", #" + count +
", " + aText.getPlainText().length() + " characters of text" +
", h: " + reqCounter.getHandleCount() + ", r: " + reqCounter.getRequestCount() + ", system load: " + loadInfo + ")";
if (params.allowIncompleteResults) {
print(message + " - returning " + ruleMatchesSoFar.size() + " matches found so far");
matches = new ArrayList<>(ruleMatchesSoFar); // threads might still be running, so make a copy
incompleteResultReason = "Results are incomplete: text checking took longer than allowed maximum of " +
String.format(Locale.ENGLISH, "%.2f", limits.getMaxCheckTimeMillis()/1000.0) + " seconds";
} else {
logger.log(new DatabaseCheckErrorLogEntry("MaxCheckTimeExceeded",
logServerId, agentId, limits.getPremiumUid(), lang, detLang.getDetectedLanguage(), textSize, "load: "+ loadInfo));
throw new RuntimeException(message, e);
}
}
}
setHeaders(httpExchange);
List hiddenMatches = new ArrayList<>();
if (config.getHiddenMatchesServer() != null && params.enableHiddenRules && config.getHiddenMatchesLanguages().contains(lang)) {
ResultExtender resultExtender = new ResultExtender(config.getHiddenMatchesServer(), config.getHiddenMatchesServerTimeout());
try {
long start = System.currentTimeMillis();
List extensionMatches = resultExtender.getExtensionMatches(aText.getPlainText(), lang);
hiddenMatches = resultExtender.getFilteredExtensionMatches(matches, extensionMatches);
long end = System.currentTimeMillis();
print("Hidden matches: " + extensionMatches.size() + " -> " + hiddenMatches.size() + " in " + (end-start) + "ms");
} catch (Exception e) {
print("Warn: Failed to query hidden matches server at " + config.getHiddenMatchesServer() + ": " + e.getClass() + ": " + e.getMessage());
}
}
String response = getResponse(aText, detLang, motherTongue, matches, hiddenMatches, incompleteResultReason);
String messageSent = "sent";
String languageMessage = lang.getShortCodeWithCountryAndVariant();
try {
httpExchange.sendResponseHeaders(HttpURLConnection.HTTP_OK, response.getBytes(ENCODING).length);
httpExchange.getResponseBody().write(response.getBytes(ENCODING));
} catch (IOException exception) {
// the client is disconnected
messageSent = "notSent: " + exception.getMessage();
}
if (motherTongue != null) {
languageMessage += " (mother tongue: " + motherTongue.getShortCodeWithCountryAndVariant() + ")";
}
if (autoDetectLanguage) {
languageMessage += "[auto]";
}
languageCheckCounts.put(lang.getShortCodeWithCountryAndVariant(), count);
int computationTime = (int) (System.currentTimeMillis() - timeStart);
print("Check done: " + aText.getPlainText().length() + " chars, " + languageMessage + ", #" + count + ", " + referrer + ", "
+ matches.size() + " matches, "
+ computationTime + "ms, agent:" + agent
+ ", " + messageSent + ", q:" + (workQueue != null ? workQueue.size() : "?")
+ ", h:" + reqCounter.getHandleCount() + ", distinctH:" + reqCounter.getDistinctIps()
+ ", r:" + reqCounter.getRequestCount());
int matchCount = matches.size();
DatabaseCheckLogEntry logEntry = new DatabaseCheckLogEntry(userId, agentId, logServerId, textSize, matchCount,
lang, detLang.getDetectedLanguage(), computationTime, textSessionId);
Map ruleMatchCount = new HashMap<>();
for (RuleMatch match : matches) {
String ruleId = match.getRule().getId();
ruleMatchCount.put(ruleId, ruleMatchCount.getOrDefault(ruleId, 0) + 1);
}
for (Map.Entry ruleCount : ruleMatchCount.entrySet()) {
logEntry.addRuleMatch(new DatabaseRuleMatchLogEntry(ruleCount.getKey(), ruleCount.getValue()));
}
logger.log(logEntry);
}
private List getUserDictWords(Long userId) {
DatabaseAccess db = DatabaseAccess.getInstance();
return db.getUserDictWords(userId);
}
protected void checkParams(Map parameters) {
if (parameters.get("text") == null && parameters.get("data") == null) {
throw new IllegalArgumentException("Missing 'text' or 'data' parameter");
}
}
private List getRuleMatches(AnnotatedText aText, Language lang,
Language motherTongue, QueryParams params, UserConfig userConfig, RuleMatchListener listener) throws Exception {
if (cache != null && cache.requestCount() > 0 && cache.requestCount() % CACHE_STATS_PRINT == 0) {
double hitRate = cache.hitRate();
String hitPercentage = String.format(Locale.ENGLISH, "%.2f", hitRate * 100.0f);
print("Cache stats: " + hitPercentage + "% hit rate");
logger.log(new DatabaseCacheStatsLogEntry(logServerId, (float) hitRate));
}
JLanguageTool lt = getLanguageToolInstance(lang, motherTongue, params, userConfig);
return lt.check(aText, true, JLanguageTool.ParagraphHandling.NORMAL, listener, params.mode);
}
@NotNull
private List getCategoryIds(String paramName, Map parameters) {
List stringIds = getCommaSeparatedStrings(paramName, parameters);
List ids = new ArrayList<>();
for (String stringId : stringIds) {
ids.add(new CategoryId(stringId));
}
return ids;
}
@NotNull
protected List getCommaSeparatedStrings(String paramName, Map parameters) {
String disabledParam = parameters.get(paramName);
List result = new ArrayList<>();
if (disabledParam != null) {
result.addAll(Arrays.asList(disabledParam.split(",")));
}
return result;
}
Language detectLanguageOfString(String text, String fallbackLanguage, List preferredVariants) {
Language lang = identifier.detectLanguage(text);
if (lang == null) {
lang = Languages.getLanguageForShortCode(fallbackLanguage != null ? fallbackLanguage : "en");
}
if (preferredVariants.size() > 0) {
for (String preferredVariant : preferredVariants) {
if (!preferredVariant.contains("-")) {
throw new IllegalArgumentException("Invalid format for 'preferredVariants', expected a dash as in 'en-GB': '" + preferredVariant + "'");
}
String preferredVariantLang = preferredVariant.split("-")[0];
if (preferredVariantLang.equals(lang.getShortCode())) {
lang = Languages.getLanguageForShortCode(preferredVariant);
if (lang == null) {
throw new IllegalArgumentException("Invalid 'preferredVariants', no such language/variant found: '" + preferredVariant + "'");
}
}
}
} else {
if (lang.getDefaultLanguageVariant() != null) {
lang = lang.getDefaultLanguageVariant();
}
}
return lang;
}
/**
* Create a JLanguageTool instance for a specific language, mother tongue, and rule configuration.
*
* @param lang the language to be used
* @param motherTongue the user's mother tongue or {@code null}
*/
private JLanguageTool getLanguageToolInstance(Language lang, Language motherTongue, QueryParams params, UserConfig userConfig) throws Exception {
JLanguageTool lt = new JLanguageTool(lang, motherTongue, cache, userConfig);
lt.setMaxErrorsPerWordRate(config.getMaxErrorsPerWordRate());
if (config.getLanguageModelDir() != null) {
lt.activateLanguageModelRules(config.getLanguageModelDir());
}
if (config.getWord2VecModelDir () != null) {
lt.activateWord2VecModelRules(config.getWord2VecModelDir());
}
if (config.getRulesConfigFile() != null) {
configureFromRulesFile(lt, lang);
} else {
configureFromGUI(lt, lang);
}
if (params.useQuerySettings) {
Tools.selectRules(lt, new HashSet<>(params.disabledCategories), new HashSet<>(params.enabledCategories),
new HashSet<>(params.disabledRules), new HashSet<>(params.enabledRules), params.useEnabledOnly);
}
return lt;
}
private void configureFromRulesFile(JLanguageTool langTool, Language lang) throws IOException {
print("Using options configured in " + config.getRulesConfigFile());
// If we are explicitly configuring from rules, ignore the useGUIConfig flag
if (config.getRulesConfigFile() != null) {
org.languagetool.gui.Tools.configureFromRules(langTool, new Configuration(config.getRulesConfigFile()
.getCanonicalFile().getParentFile(), config.getRulesConfigFile().getName(), lang));
} else {
throw new RuntimeException("config.getRulesConfigFile() is null");
}
}
private void configureFromGUI(JLanguageTool langTool, Language lang) throws IOException {
Configuration config = new Configuration(lang);
if (internalServer && config.getUseGUIConfig()) {
print("Using options configured in the GUI");
org.languagetool.gui.Tools.configureFromRules(langTool, config);
}
}
private static class QueryParams {
final List enabledRules;
final List disabledRules;
final List enabledCategories;
final List disabledCategories;
final boolean useEnabledOnly;
final boolean useQuerySettings;
final boolean allowIncompleteResults;
final boolean enableHiddenRules;
final JLanguageTool.Mode mode;
QueryParams(List enabledRules, List disabledRules, List enabledCategories, List disabledCategories,
boolean useEnabledOnly, boolean useQuerySettings, boolean allowIncompleteResults, boolean enableHiddenRules, JLanguageTool.Mode mode) {
this.enabledRules = enabledRules;
this.disabledRules = disabledRules;
this.enabledCategories = enabledCategories;
this.disabledCategories = disabledCategories;
this.useEnabledOnly = useEnabledOnly;
this.useQuerySettings = useQuerySettings;
this.allowIncompleteResults = allowIncompleteResults;
this.enableHiddenRules = enableHiddenRules;
this.mode = Objects.requireNonNull(mode);
}
}
}