All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.sentdetect.lang.Factory Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.sentdetect.lang;

import java.util.Collections;
import java.util.Set;

import opennlp.tools.sentdetect.DefaultEndOfSentenceScanner;
import opennlp.tools.sentdetect.DefaultSDContextGenerator;
import opennlp.tools.sentdetect.EndOfSentenceScanner;
import opennlp.tools.sentdetect.SDContextGenerator;
import opennlp.tools.sentdetect.lang.th.SentenceContextGenerator;

public class Factory {

  public static final char[] ptEosCharacters = new char[] { '.', '?', '!', ';',
      ':', '(', ')', '«', '»', '\'', '"' };

  public static final char[] defaultEosCharacters = new char[] { '.', '!', '?' };

  public static final char[] thEosCharacters = new char[] { ' ','\n' };

  public static final char[] jpEosCharacters = new char[] {'。', '!', '?'};

  public EndOfSentenceScanner createEndOfSentenceScanner(String languageCode) {

    return new DefaultEndOfSentenceScanner(getEOSCharacters(languageCode));
  }

  public EndOfSentenceScanner createEndOfSentenceScanner(
      char[] customEOSCharacters) {
    return new DefaultEndOfSentenceScanner(customEOSCharacters);
  }

  public SDContextGenerator createSentenceContextGenerator(String languageCode, Set abbreviations) {

    if ("th".equals(languageCode) || "tha".equals(languageCode)) {
      return new SentenceContextGenerator();
    } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
      return new DefaultSDContextGenerator(abbreviations, ptEosCharacters);
    }

    return new DefaultSDContextGenerator(abbreviations, defaultEosCharacters);
  }

  public SDContextGenerator createSentenceContextGenerator(
      Set abbreviations, char[] customEOSCharacters) {
    return new DefaultSDContextGenerator(abbreviations, customEOSCharacters);
  }

  public SDContextGenerator createSentenceContextGenerator(String languageCode) {
    return createSentenceContextGenerator(languageCode, Collections.emptySet());
  }

  public char[] getEOSCharacters(String languageCode) {
    if ("th".equals(languageCode) || "tha".equals(languageCode)) {
      return thEosCharacters;
    } else if ("pt".equals(languageCode) || "por".equals(languageCode)) {
      return ptEosCharacters;
    } else if ("jp".equals(languageCode) || "jpn".equals(languageCode)) {
      return jpEosCharacters;
    }

    return defaultEosCharacters;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy