opennlp.tools.doccat.DocumentCategorizer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.doccat;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
/**
* Interface for classes which categorize documents.
*/
public interface DocumentCategorizer {
/**
* Categorize the given text provided as tokens along with
* the provided extra information
*
* @param text the tokens of text to categorize
* @param extraInformation extra information
* @return per category probabilities
*/
double[] categorize(String[] text, Map extraInformation);
/**
* Categorizes the given text, provided in separate tokens.
* @param text the tokens of text to categorize
* @return per category probabilities
*/
double[] categorize(String[] text);
/**
* get the best category from previously generated outcome probabilities
*
* @param outcome a vector of outcome probabilities
* @return the best category String
*/
String getBestCategory(double[] outcome);
/**
* get the index of a certain category
*
* @param category the category
* @return an index
*/
int getIndex(String category);
/**
* get the category at a given index
*
* @param index the index
* @return a category
*/
String getCategory(int index);
/**
* get the number of categories
*
* @return the no. of categories
*/
int getNumberOfCategories();
/**
* get the name of the category associated with the given probabilties
*
* @param results the probabilities of each category
* @return the name of the outcome
*/
String getAllResults(double[] results);
/**
* Returns a map in which the key is the category name and the value is the score
*
* @param text the input text to classify
* @return a map with the score as a key. The value is a Set of categories with the score.
*/
Map scoreMap(String[] text);
/**
* Get a map of the scores sorted in ascending aorder together with their associated categories.
* Many categories can have the same score, hence the Set as value
*
* @param text the input text to classify
* @return a map with the score as a key. The value is a Set of categories with the score.
*/
SortedMap> sortedScoreMap(String[] text);
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy