All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencastproject.dictionary.hunspell.DictionaryServiceImpl Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to The Apereo Foundation under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 *
 * The Apereo Foundation licenses this file to you under the Educational
 * Community License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License
 * at:
 *
 *   http://opensource.org/licenses/ecl2.txt
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 */

package org.opencastproject.dictionary.hunspell;

import static org.opencastproject.util.ReadinessIndicator.ARTIFACT;

import org.opencastproject.dictionary.api.DictionaryService;
import org.opencastproject.metadata.mpeg7.Textual;
import org.opencastproject.metadata.mpeg7.TextualImpl;
import org.opencastproject.util.ReadinessIndicator;

import org.apache.commons.lang3.StringUtils;
import org.osgi.framework.BundleContext;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Dictionary;
import java.util.Hashtable;
import java.util.LinkedList;

/**
 * This dictionary service implementation passes the input text
 * to the hunspell spell checker and returns its results.
 */
@Component(
    immediate = true,
    service = DictionaryService.class,
    property = {
        "service.description=Dictionary Service"
    }
)
public class DictionaryServiceImpl implements DictionaryService {

  /** The logging facility */
  private static final Logger logger =
      LoggerFactory.getLogger(DictionaryServiceImpl.class);

  public static final String HUNSPELL_BINARY_CONFIG_KEY =
      "org.opencastproject.dictionary.hunspell.binary";

  public static final String HUNSPELL_COMMAND_CONFIG_KEY =
      "org.opencastproject.dictionary.hunspell.command";

  /* The hunspell binary to execute */
  private String binary = "hunspell";

  /* The regular command line options for filtering */
  private String command = " -i utf-8 -d de_DE,en_GB,en_US -G";

  public void setBinary(String b) {
    binary = b;
  }

  public String getBinary() {
    return binary;
  }

  public void setCommand(String c) {
    command = c;
  }

  public String getCommand() {
    return command;
  }

  /**
   * OSGi callback on component activation.
   *
   * @param  ctx  the bundle context
   */
  @Activate
  void activate(BundleContext ctx) throws UnsupportedEncodingException {
    Dictionary properties = new Hashtable();
    properties.put(ARTIFACT, "dictionary");
    ctx.registerService(ReadinessIndicator.class.getName(),
        new ReadinessIndicator(), properties);

    /* Get hunspell binary from config file */
    String binary = ctx.getProperty(HUNSPELL_BINARY_CONFIG_KEY);
    if (binary != null) {
      logger.info("Setting hunspell binary to '{}'", binary);
      this.binary = binary;
    }

    /* Get hunspell command line options from config file */
    String command = ctx.getProperty(HUNSPELL_COMMAND_CONFIG_KEY);
    if (command != null) {
      logger.info("Setting hunspell command line options to '{}'", command);
      this.command = command;
    }
  }


  /**
   * Run hunspell with text as input.
   **/
  public LinkedList runHunspell(String text) throws Throwable {

    // create a new list of arguments for our process
    String commandLine = binary + ' ' + command;
    String[] commandList = commandLine.split("\\s+");


    InputStream  stdout = null;
    InputStream  stderr = null;
    OutputStream stdin  = null;
    Process p = null;
    BufferedReader bufr = null;
    LinkedList words = new LinkedList();

    logger.info("Executing hunspell command '{}'", StringUtils.join(commandList, " "));
    p = new ProcessBuilder(commandList).start();
    stderr = p.getErrorStream();
    stdout = p.getInputStream();
    stdin  = p.getOutputStream();

    /* Pipe text through hunspell for filtering */
    stdin.write(text.getBytes("UTF-8"));
    stdin.flush();
    stdin.close();

    /* Get output of hunspell */
    String line;
    bufr = new BufferedReader(new InputStreamReader(stdout, "UTF-8"));
    while ((line = bufr.readLine()) != null) {
      words.add(line);
    }
    bufr.close();

    /* Get error messages */
    bufr = new BufferedReader(new InputStreamReader(stderr));
    while ((line = bufr.readLine()) != null) {
      logger.warn(line);
    }
    bufr.close();

    if (p.waitFor() != 0) {
      logger.error("Hunspell reported an error (Missing dictionaries?)");
      throw new IllegalStateException("Hunspell returned error code");
    }

    return words;
  }


  /**
   * Filter the text according to the rules defined by the dictionary
   * implementation used. This implementation will just let the whole text pass
   * through.
   *
   * @return filtered text
   **/
  @Override
  public Textual cleanUpText(String text) {

    LinkedList words = null;

    try {
      words = runHunspell(text);
    } catch (Throwable t) {
      logger.error("Error executing hunspell");
      logger.error(t.getMessage(), t);
      return null;
    }


    String result = StringUtils.join(words, " ");
    if ("".equals(result)) {
      return null;
    }
    return new TextualImpl(result);
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy