All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.cmdline.CLI Maven / Gradle / Ivy

There is a newer version: 2.5.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package opennlp.tools.cmdline;

import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import opennlp.tools.cmdline.chunker.ChunkerConverterTool;
import opennlp.tools.cmdline.chunker.ChunkerCrossValidatorTool;
import opennlp.tools.cmdline.chunker.ChunkerEvaluatorTool;
import opennlp.tools.cmdline.chunker.ChunkerMETool;
import opennlp.tools.cmdline.chunker.ChunkerTrainerTool;
import opennlp.tools.cmdline.dictionary.DictionaryBuilderTool;
import opennlp.tools.cmdline.doccat.DoccatConverterTool;
import opennlp.tools.cmdline.doccat.DoccatCrossValidatorTool;
import opennlp.tools.cmdline.doccat.DoccatEvaluatorTool;
import opennlp.tools.cmdline.doccat.DoccatTool;
import opennlp.tools.cmdline.doccat.DoccatTrainerTool;
import opennlp.tools.cmdline.entitylinker.EntityLinkerTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorConverterTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorCrossValidatorTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorEvaluatorTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorTrainerTool;
import opennlp.tools.cmdline.languagemodel.NGramLanguageModelTool;
import opennlp.tools.cmdline.lemmatizer.LemmatizerEvaluatorTool;
import opennlp.tools.cmdline.lemmatizer.LemmatizerMETool;
import opennlp.tools.cmdline.lemmatizer.LemmatizerTrainerTool;
import opennlp.tools.cmdline.namefind.CensusDictionaryCreatorTool;
import opennlp.tools.cmdline.namefind.TokenNameFinderConverterTool;
import opennlp.tools.cmdline.namefind.TokenNameFinderCrossValidatorTool;
import opennlp.tools.cmdline.namefind.TokenNameFinderEvaluatorTool;
import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
import opennlp.tools.cmdline.parser.BuildModelUpdaterTool;
import opennlp.tools.cmdline.parser.CheckModelUpdaterTool;
import opennlp.tools.cmdline.parser.ParserConverterTool;
import opennlp.tools.cmdline.parser.ParserEvaluatorTool;
import opennlp.tools.cmdline.parser.ParserTool;
import opennlp.tools.cmdline.parser.ParserTrainerTool;
import opennlp.tools.cmdline.parser.TaggerModelReplacerTool;
import opennlp.tools.cmdline.postag.POSTaggerConverterTool;
import opennlp.tools.cmdline.postag.POSTaggerCrossValidatorTool;
import opennlp.tools.cmdline.postag.POSTaggerEvaluatorTool;
import opennlp.tools.cmdline.postag.POSTaggerTrainerTool;
import opennlp.tools.cmdline.sentdetect.SentenceDetectorConverterTool;
import opennlp.tools.cmdline.sentdetect.SentenceDetectorCrossValidatorTool;
import opennlp.tools.cmdline.sentdetect.SentenceDetectorEvaluatorTool;
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTool;
import opennlp.tools.cmdline.sentdetect.SentenceDetectorTrainerTool;
import opennlp.tools.cmdline.tokenizer.DictionaryDetokenizerTool;
import opennlp.tools.cmdline.tokenizer.SimpleTokenizerTool;
import opennlp.tools.cmdline.tokenizer.TokenizerConverterTool;
import opennlp.tools.cmdline.tokenizer.TokenizerCrossValidatorTool;
import opennlp.tools.cmdline.tokenizer.TokenizerMEEvaluatorTool;
import opennlp.tools.cmdline.tokenizer.TokenizerMETool;
import opennlp.tools.cmdline.tokenizer.TokenizerTrainerTool;
import opennlp.tools.util.Version;

public final class CLI {

  private static final Logger logger = LoggerFactory.getLogger(CLI.class);

  public static final String CMD = "opennlp";

  private static Map toolLookupMap;

  static {
    toolLookupMap = new LinkedHashMap<>();

    List tools = new LinkedList<>();

    // Document Categorizer
    tools.add(new DoccatTool());
    tools.add(new DoccatTrainerTool());
    tools.add(new DoccatEvaluatorTool());
    tools.add(new DoccatCrossValidatorTool());
    tools.add(new DoccatConverterTool());

    // Language Detector
    tools.add(new LanguageDetectorTool());
    tools.add(new LanguageDetectorTrainerTool());
    tools.add(new LanguageDetectorConverterTool());
    tools.add(new LanguageDetectorCrossValidatorTool());
    tools.add(new LanguageDetectorEvaluatorTool());

    // Dictionary Builder
    tools.add(new DictionaryBuilderTool());

    // Tokenizer
    tools.add(new SimpleTokenizerTool());
    tools.add(new TokenizerMETool());
    tools.add(new TokenizerTrainerTool());
    tools.add(new TokenizerMEEvaluatorTool());
    tools.add(new TokenizerCrossValidatorTool());
    tools.add(new TokenizerConverterTool());
    tools.add(new DictionaryDetokenizerTool());

    // Sentence detector
    tools.add(new SentenceDetectorTool());
    tools.add(new SentenceDetectorTrainerTool());
    tools.add(new SentenceDetectorEvaluatorTool());
    tools.add(new SentenceDetectorCrossValidatorTool());
    tools.add(new SentenceDetectorConverterTool());

    // Name Finder
    tools.add(new TokenNameFinderTool());
    tools.add(new TokenNameFinderTrainerTool());
    tools.add(new TokenNameFinderEvaluatorTool());
    tools.add(new TokenNameFinderCrossValidatorTool());
    tools.add(new TokenNameFinderConverterTool());
    tools.add(new CensusDictionaryCreatorTool());


    // POS Tagger
    tools.add(new opennlp.tools.cmdline.postag.POSTaggerTool());
    tools.add(new POSTaggerTrainerTool());
    tools.add(new POSTaggerEvaluatorTool());
    tools.add(new POSTaggerCrossValidatorTool());
    tools.add(new POSTaggerConverterTool());

    //Lemmatizer
    tools.add(new LemmatizerMETool());
    tools.add(new LemmatizerTrainerTool());
    tools.add(new LemmatizerEvaluatorTool());

    // Chunker
    tools.add(new ChunkerMETool());
    tools.add(new ChunkerTrainerTool());
    tools.add(new ChunkerEvaluatorTool());
    tools.add(new ChunkerCrossValidatorTool());
    tools.add(new ChunkerConverterTool());

    // Parser
    tools.add(new ParserTool());
    tools.add(new ParserTrainerTool()); // trains everything
    tools.add(new ParserEvaluatorTool());
    tools.add(new ParserConverterTool()); // trains everything
    tools.add(new BuildModelUpdaterTool()); // re-trains  build model
    tools.add(new CheckModelUpdaterTool()); // re-trains  build model
    tools.add(new TaggerModelReplacerTool());

    // Entity Linker
    tools.add(new EntityLinkerTool());

    // Language Model
    tools.add(new NGramLanguageModelTool());

    for (CmdLineTool tool : tools) {
      toolLookupMap.put(tool.getName(), tool);
    }

    toolLookupMap = Collections.unmodifiableMap(toolLookupMap);
  }

  /**
   * @return a set which contains all tool names
   */
  public static Set getToolNames() {
    return toolLookupMap.keySet();
  }

  /**
   * @return a read only map with tool names and instances
   */
  public static Map getToolLookupMap() {
    return toolLookupMap;
  }

  private static void usage() {
    logger.info("OpenNLP {}.", Version.currentVersion() );
    logger.info("Usage: {} TOOL", CMD);

    // distance of tool name from line start
    int numberOfSpaces = -1;
    for (String toolName : toolLookupMap.keySet()) {
      if (toolName.length() > numberOfSpaces) {
        numberOfSpaces = toolName.length();
      }
    }
    numberOfSpaces = numberOfSpaces + 4;

    final StringBuilder sb = new StringBuilder("where TOOL is one of: \n\n");
    for (CmdLineTool tool : toolLookupMap.values()) {

      sb.append("  ").append(tool.getName());
      sb.append(" ".repeat(Math.max(0, StrictMath.abs(
              tool.getName().length() - numberOfSpaces))));
      sb.append(tool.getShortDescription()).append("\n");
    }
    logger.info(sb.toString());

    logger.info("All tools print help when invoked with help parameter");
    logger.info("Example: opennlp SimpleTokenizer help");
  }

  public static void main(String[] args) {

    if (args.length == 0) {
      usage();
      System.exit(0);
    }

    final long startTime = System.currentTimeMillis();
    String[] toolArguments = new String[args.length - 1];
    System.arraycopy(args, 1, toolArguments, 0, toolArguments.length);

    String toolName = args[0];

    //check for format
    String formatName = StreamFactoryRegistry.DEFAULT_FORMAT;
    int idx = toolName.indexOf(".");
    if (-1 < idx) {
      formatName = toolName.substring(idx + 1);
      toolName = toolName.substring(0, idx);
    }
    CmdLineTool tool = toolLookupMap.get(toolName);

    try {
      if (null == tool) {
        throw new TerminateToolException(1, "Tool " + toolName + " is not found.");
      }

      if ((0 == toolArguments.length && tool.hasParams()) ||
          0 < toolArguments.length && "help".equals(toolArguments[0])) {
        if (tool instanceof TypedCmdLineTool) {
          logger.info(((TypedCmdLineTool) tool).getHelp(formatName));
        } else if (tool instanceof BasicCmdLineTool) {
          logger.info(tool.getHelp());
        }

        System.exit(0);
      }

      if (tool instanceof TypedCmdLineTool) {
        ((TypedCmdLineTool) tool).run(formatName, toolArguments);
      } else if (tool instanceof BasicCmdLineTool) {
        if (-1 == idx) {
          ((BasicCmdLineTool) tool).run(toolArguments);
        } else {
          throw new TerminateToolException(1, "Tool " + toolName + " does not support formats.");
        }
      } else {
        throw new TerminateToolException(1, "Tool " + toolName + " is not supported.");
      }
    }
    catch (TerminateToolException e) {
      logger.error(e.getLocalizedMessage(), e);
      System.exit(e.getCode());
    }

    final long endTime = System.currentTimeMillis();
    logger.info(String.format("Execution time: %.3f seconds\n", (endTime - startTime) / 1000.0));
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy