org.languagetool.MultiThreadedJLanguageTool Maven / Gradle / Ivy
Show all versions of languagetool-core Show documentation
/* LanguageTool, a natural language style checker
* Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import org.languagetool.markup.AnnotatedText;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
/**
* A variant of {@link JLanguageTool} that uses several threads for rule matching.
* Use this if you want text checking to be fast and do not care about the
* high load that this might cause. Call {@link #shutdown()} when you don't need
* the object anymore.
*
* Also see the javadoc of {@link JLanguageTool}.
*
* Thread-safety: this class is not thread-safe, see the remarks at {@link JLanguageTool}.
*/
public class MultiThreadedJLanguageTool extends JLanguageTool {
private final int threadPoolSize;
private final ExecutorService threadPool;
public MultiThreadedJLanguageTool(Language language) {
this(language, null);
}
/**
* @see #shutdown()
* @param threadPoolSize the number of concurrent threads
* @since 2.9
*/
public MultiThreadedJLanguageTool(Language language, int threadPoolSize) {
this(language, null, threadPoolSize);
}
/**
* @see #shutdown()
*/
public MultiThreadedJLanguageTool(Language language, Language motherTongue) {
this(language, motherTongue, getDefaultThreadCount());
}
/**
* @see #shutdown()
* @param threadPoolSize the number of concurrent threads
* @since 2.9
*/
public MultiThreadedJLanguageTool(Language language, Language motherTongue, int threadPoolSize) {
super(language, motherTongue);
this.threadPoolSize = threadPoolSize;
threadPool = Executors.newFixedThreadPool(getThreadPoolSize(), new DaemonThreadFactory());
}
/**
* Call this to shut down the internally used thread pool.
* @since 3.0
*/
public void shutdown() {
threadPool.shutdownNow();
}
/**
* Call this to shut down the internally used thread pool after all running tasks are finished.
* @since 3.1
*/
public void shutdownWhenDone() {
threadPool.shutdown();
}
private static int getDefaultThreadCount() {
String threadCountStr = System.getProperty("org.languagetool.thread_count_internal", "-1");
int threadPoolSize = Integer.parseInt(threadCountStr);
if (threadPoolSize == -1) {
threadPoolSize = Runtime.getRuntime().availableProcessors();
}
return threadPoolSize;
}
/**
* When no thread pool size is configured, the number of available processors is returned.
*/
protected int getThreadPoolSize() {
return threadPoolSize;
}
/**
* @return a fixed size executor with the given number of threads
*/
protected ExecutorService getExecutorService() {
return threadPool;
}
@Override
protected List analyzeSentences(List sentences) throws IOException {
List analyzedSentences = new ArrayList<>();
ExecutorService executorService = getExecutorService();
int j = 0;
List> callables = new ArrayList<>();
for (String sentence : sentences) {
AnalyzeSentenceCallable analyzeSentenceCallable =
++j < sentences.size()
? new AnalyzeSentenceCallable(sentence)
: new ParagraphEndAnalyzeSentenceCallable(sentence);
callables.add(analyzeSentenceCallable);
}
try {
List> futures = executorService.invokeAll(callables);
for (Future future : futures) {
AnalyzedSentence analyzedSentence = future.get();
rememberUnknownWords(analyzedSentence);
printSentenceInfo(analyzedSentence);
analyzedSentences.add(analyzedSentence);
}
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
return analyzedSentences;
}
@Override
protected List performCheck(List analyzedSentences, List sentences,
List allRules, ParagraphHandling paraMode,
AnnotatedText annotatedText, RuleMatchListener listener) throws IOException {
int charCount = 0;
int lineCount = 0;
int columnCount = 1;
List ruleMatches = new ArrayList<>();
ExecutorService executorService = getExecutorService();
try {
List>> callables =
createTextCheckCallables(paraMode, annotatedText, analyzedSentences, sentences, allRules, charCount, lineCount, columnCount, listener);
List>> futures = executorService.invokeAll(callables);
for (Future> future : futures) {
ruleMatches.addAll(future.get());
}
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
return ruleMatches;
}
private List>> createTextCheckCallables(ParagraphHandling paraMode,
AnnotatedText annotatedText, List analyzedSentences, List sentences,
List allRules, int charCount, int lineCount, int columnCount, RuleMatchListener listener) {
int threads = getThreadPoolSize();
int totalRules = allRules.size();
int chunkSize = totalRules / threads;
int firstItem = 0;
List>> callables = new ArrayList<>();
// split the rules - all rules are independent, so it makes more sense to split
// the rules than to split the text:
for (int i = 0; i < threads; i++) {
List subRules;
//TODO: make sure we don't split rules with same id so RuleGroupFilter still works
if (i == threads - 1) {
// make sure the last rules are not lost due to rounding issues:
subRules = allRules.subList(firstItem, totalRules);
} else {
subRules = allRules.subList(firstItem, firstItem + chunkSize);
}
callables.add(new TextCheckCallable(subRules, sentences, analyzedSentences, paraMode, annotatedText, charCount, lineCount, columnCount, listener));
firstItem = firstItem + chunkSize;
}
return callables;
}
private class AnalyzeSentenceCallable implements Callable {
private final String sentence;
private AnalyzeSentenceCallable(String sentence) {
this.sentence = sentence;
}
@Override
public AnalyzedSentence call() throws Exception {
return getAnalyzedSentence(sentence);
}
}
private final class ParagraphEndAnalyzeSentenceCallable extends AnalyzeSentenceCallable {
private ParagraphEndAnalyzeSentenceCallable(String sentence) {
super(sentence);
}
@Override
public AnalyzedSentence call() throws Exception {
AnalyzedSentence analyzedSentence = super.call();
AnalyzedTokenReadings[] anTokens = analyzedSentence.getTokens();
anTokens[anTokens.length - 1].setParagraphEnd();
analyzedSentence = new AnalyzedSentence(anTokens); ///TODO: why???
return analyzedSentence;
}
}
private static class DaemonThreadFactory implements ThreadFactory {
@Override
public Thread newThread(Runnable r) {
Thread thread = new Thread(r);
thread.setDaemon(true); // so we don't have to shut down executor explicitly
return thread;
}
}
}