org.opencastproject.dictionary.hunspell.DictionaryServiceImpl Maven / Gradle / Ivy
The newest version!
/*
* Licensed to The Apereo Foundation under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
*
* The Apereo Foundation licenses this file to you under the Educational
* Community License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of the License
* at:
*
* http://opensource.org/licenses/ecl2.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
*/
package org.opencastproject.dictionary.hunspell;
import static org.opencastproject.util.ReadinessIndicator.ARTIFACT;
import org.opencastproject.dictionary.api.DictionaryService;
import org.opencastproject.metadata.mpeg7.Textual;
import org.opencastproject.metadata.mpeg7.TextualImpl;
import org.opencastproject.util.ReadinessIndicator;
import org.apache.commons.lang3.StringUtils;
import org.osgi.framework.BundleContext;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Dictionary;
import java.util.Hashtable;
import java.util.LinkedList;
/**
* This dictionary service implementation passes the input text
* to the hunspell spell checker and returns its results.
*/
@Component(
immediate = true,
service = DictionaryService.class,
property = {
"service.description=Dictionary Service"
}
)
public class DictionaryServiceImpl implements DictionaryService {
/** The logging facility */
private static final Logger logger =
LoggerFactory.getLogger(DictionaryServiceImpl.class);
public static final String HUNSPELL_BINARY_CONFIG_KEY =
"org.opencastproject.dictionary.hunspell.binary";
public static final String HUNSPELL_COMMAND_CONFIG_KEY =
"org.opencastproject.dictionary.hunspell.command";
/* The hunspell binary to execute */
private String binary = "hunspell";
/* The regular command line options for filtering */
private String command = " -i utf-8 -d de_DE,en_GB,en_US -G";
public void setBinary(String b) {
binary = b;
}
public String getBinary() {
return binary;
}
public void setCommand(String c) {
command = c;
}
public String getCommand() {
return command;
}
/**
* OSGi callback on component activation.
*
* @param ctx the bundle context
*/
@Activate
void activate(BundleContext ctx) throws UnsupportedEncodingException {
Dictionary properties = new Hashtable();
properties.put(ARTIFACT, "dictionary");
ctx.registerService(ReadinessIndicator.class.getName(),
new ReadinessIndicator(), properties);
/* Get hunspell binary from config file */
String binary = ctx.getProperty(HUNSPELL_BINARY_CONFIG_KEY);
if (binary != null) {
logger.info("Setting hunspell binary to '{}'", binary);
this.binary = binary;
}
/* Get hunspell command line options from config file */
String command = ctx.getProperty(HUNSPELL_COMMAND_CONFIG_KEY);
if (command != null) {
logger.info("Setting hunspell command line options to '{}'", command);
this.command = command;
}
}
/**
* Run hunspell with text as input.
**/
public LinkedList runHunspell(String text) throws Throwable {
// create a new list of arguments for our process
String commandLine = binary + ' ' + command;
String[] commandList = commandLine.split("\\s+");
InputStream stdout = null;
InputStream stderr = null;
OutputStream stdin = null;
Process p = null;
BufferedReader bufr = null;
LinkedList words = new LinkedList();
logger.info("Executing hunspell command '{}'", StringUtils.join(commandList, " "));
p = new ProcessBuilder(commandList).start();
stderr = p.getErrorStream();
stdout = p.getInputStream();
stdin = p.getOutputStream();
/* Pipe text through hunspell for filtering */
stdin.write(text.getBytes("UTF-8"));
stdin.flush();
stdin.close();
/* Get output of hunspell */
String line;
bufr = new BufferedReader(new InputStreamReader(stdout, "UTF-8"));
while ((line = bufr.readLine()) != null) {
words.add(line);
}
bufr.close();
/* Get error messages */
bufr = new BufferedReader(new InputStreamReader(stderr));
while ((line = bufr.readLine()) != null) {
logger.warn(line);
}
bufr.close();
if (p.waitFor() != 0) {
logger.error("Hunspell reported an error (Missing dictionaries?)");
throw new IllegalStateException("Hunspell returned error code");
}
return words;
}
/**
* Filter the text according to the rules defined by the dictionary
* implementation used. This implementation will just let the whole text pass
* through.
*
* @return filtered text
**/
@Override
public Textual cleanUpText(String text) {
LinkedList words = null;
try {
words = runHunspell(text);
} catch (Throwable t) {
logger.error("Error executing hunspell");
logger.error(t.getMessage(), t);
return null;
}
String result = StringUtils.join(words, " ");
if ("".equals(result)) {
return null;
}
return new TextualImpl(result);
}
}