All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.languagetool.MultiThreadedJLanguageTool Maven / Gradle / Ivy

Go to download

LanguageTool is an Open Source proofreading software for English, French, German, Polish, Romanian, and more than 20 other languages. It finds many errors that a simple spell checker cannot detect like mixing up there/their and it detects some grammar problems.

There is a newer version: 6.5
Show newest version
/* LanguageTool, a natural language style checker
 * Copyright (C) 2013 Daniel Naber (http://www.danielnaber.de)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */
package org.languagetool;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;

import org.languagetool.markup.AnnotatedText;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;

/**
 * A variant of {@link JLanguageTool} that uses several threads for rule matching.
 * Use this if you want text checking to be fast and do not care about the 
 * high load that this might cause. Call {@link #shutdown()} when you don't need
 * the object anymore.
 * 
 * 

Also see the javadoc of {@link JLanguageTool}.

* *

Thread-safety: this class is not thread-safe, see the remarks at {@link JLanguageTool}. */ public class MultiThreadedJLanguageTool extends JLanguageTool { private final int threadPoolSize; private final ExecutorService threadPool; public MultiThreadedJLanguageTool(Language language) { this(language, null); } /** * @see #shutdown() * @param threadPoolSize the number of concurrent threads * @since 2.9 */ public MultiThreadedJLanguageTool(Language language, int threadPoolSize) { this(language, null, threadPoolSize); } /** * @see #shutdown() */ public MultiThreadedJLanguageTool(Language language, Language motherTongue) { this(language, motherTongue, getDefaultThreadCount()); } /** * @see #shutdown() * @param threadPoolSize the number of concurrent threads * @since 2.9 */ public MultiThreadedJLanguageTool(Language language, Language motherTongue, int threadPoolSize) { super(language, motherTongue); this.threadPoolSize = threadPoolSize; threadPool = Executors.newFixedThreadPool(getThreadPoolSize(), new DaemonThreadFactory()); } /** * Call this to shut down the internally used thread pool. * @since 3.0 */ public void shutdown() { threadPool.shutdownNow(); } /** * Call this to shut down the internally used thread pool after all running tasks are finished. * @since 3.1 */ public void shutdownWhenDone() { threadPool.shutdown(); } private static int getDefaultThreadCount() { String threadCountStr = System.getProperty("org.languagetool.thread_count_internal", "-1"); int threadPoolSize = Integer.parseInt(threadCountStr); if (threadPoolSize == -1) { threadPoolSize = Runtime.getRuntime().availableProcessors(); } return threadPoolSize; } /** * When no thread pool size is configured, the number of available processors is returned. */ protected int getThreadPoolSize() { return threadPoolSize; } /** * @return a fixed size executor with the given number of threads */ protected ExecutorService getExecutorService() { return threadPool; } @Override protected List analyzeSentences(List sentences) throws IOException { List analyzedSentences = new ArrayList<>(); ExecutorService executorService = getExecutorService(); int j = 0; List> callables = new ArrayList<>(); for (String sentence : sentences) { AnalyzeSentenceCallable analyzeSentenceCallable = ++j < sentences.size() ? new AnalyzeSentenceCallable(sentence) : new ParagraphEndAnalyzeSentenceCallable(sentence); callables.add(analyzeSentenceCallable); } try { List> futures = executorService.invokeAll(callables); for (Future future : futures) { AnalyzedSentence analyzedSentence = future.get(); rememberUnknownWords(analyzedSentence); printSentenceInfo(analyzedSentence); analyzedSentences.add(analyzedSentence); } } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e); } return analyzedSentences; } @Override protected List performCheck(List analyzedSentences, List sentences, List allRules, ParagraphHandling paraMode, AnnotatedText annotatedText, RuleMatchListener listener) throws IOException { int charCount = 0; int lineCount = 0; int columnCount = 1; List ruleMatches = new ArrayList<>(); ExecutorService executorService = getExecutorService(); try { List>> callables = createTextCheckCallables(paraMode, annotatedText, analyzedSentences, sentences, allRules, charCount, lineCount, columnCount, listener); List>> futures = executorService.invokeAll(callables); for (Future> future : futures) { ruleMatches.addAll(future.get()); } } catch (InterruptedException | ExecutionException e) { throw new RuntimeException(e); } return ruleMatches; } private List>> createTextCheckCallables(ParagraphHandling paraMode, AnnotatedText annotatedText, List analyzedSentences, List sentences, List allRules, int charCount, int lineCount, int columnCount, RuleMatchListener listener) { int threads = getThreadPoolSize(); int totalRules = allRules.size(); int chunkSize = totalRules / threads; int firstItem = 0; List>> callables = new ArrayList<>(); // split the rules - all rules are independent, so it makes more sense to split // the rules than to split the text: for (int i = 0; i < threads; i++) { List subRules; //TODO: make sure we don't split rules with same id so RuleGroupFilter still works if (i == threads - 1) { // make sure the last rules are not lost due to rounding issues: subRules = allRules.subList(firstItem, totalRules); } else { subRules = allRules.subList(firstItem, firstItem + chunkSize); } callables.add(new TextCheckCallable(subRules, sentences, analyzedSentences, paraMode, annotatedText, charCount, lineCount, columnCount, listener)); firstItem = firstItem + chunkSize; } return callables; } private class AnalyzeSentenceCallable implements Callable { private final String sentence; private AnalyzeSentenceCallable(String sentence) { this.sentence = sentence; } @Override public AnalyzedSentence call() throws Exception { return getAnalyzedSentence(sentence); } } private final class ParagraphEndAnalyzeSentenceCallable extends AnalyzeSentenceCallable { private ParagraphEndAnalyzeSentenceCallable(String sentence) { super(sentence); } @Override public AnalyzedSentence call() throws Exception { AnalyzedSentence analyzedSentence = super.call(); AnalyzedTokenReadings[] anTokens = analyzedSentence.getTokens(); anTokens[anTokens.length - 1].setParagraphEnd(); analyzedSentence = new AnalyzedSentence(anTokens); ///TODO: why??? return analyzedSentence; } } private static class DaemonThreadFactory implements ThreadFactory { @Override public Thread newThread(Runnable r) { Thread thread = new Thread(r); thread.setDaemon(true); // so we don't have to shut down executor explicitly return thread; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy