All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hpe.caf.languagedetection.cld2.Cld2Result Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015-2024 Open Text.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hpe.caf.languagedetection.cld2;

import java.util.ArrayList;

/**
 * Main result class of the Cld2 implementation of the language detector.
 */
public class Cld2Result
{
    /**
     * plain text will be true for CAF
     */
    private boolean isPlainText;

    /**
     * most appropriate flag could be kCLDFlagBestEffort = 0x4000, instead of UNKNOWN_LANGUAGE it will return a best answer, useful for
     * short text.
     *
     * 
     * // Public use flags, debug output controls
     * static const int kCLDFlagScoreAsQuads = 0x0100;  // Force Greek, etc. => quads
     * static const int kCLDFlagHtml =         0x0200;  // Debug HTML => stderr
     * static const int kCLDFlagCr =           0x0400;  //  per chunk if HTML
     * static const int kCLDFlagVerbose =      0x0800;  // More debug HTML => stderr
     * static const int kCLDFlagQuiet =        0x1000;  // Less debug HTML => stderr
     * static const int kCLDFlagEcho =         0x2000;  // Echo input => stderr
     * static const int kCLDFlagBestEffort =   0x4000;  // Give best-effort answer, even on short text
     *
     * Flag meanings:
     * kCLDFlagScoreAsQuads
     * Normally, several languages are detected solely by their Unicode script.
     * Combined with appropritate lookup tables, this flag forces them instead
     * to be detected via quadgrams. This can be a useful refinement when looking
     * for meaningful text in these languages, instead of just character sets.
     * The default tables do not support this use.
     * kCLDFlagHtml
     * For each detection call, write an HTML file to stderr, showing the text
     * chunks and their detected languages.
     * kCLDFlagCr
     * In that HTML file, force a new line for each chunk.
     * kCLDFlagVerbose
     * In that HTML file, show every lookup entry.
     * kCLDFlagQuiet
     * In that HTML file, suppress most of the output detail.
     * kCLDFlagEcho
     * Echo every input buffer to stderr.
     * kCLDFlagBestEffort
     * Give best-effort answer, instead of UNKNOWN_LANGUAGE. May be useful for
     * short text if the caller prefers an approximate answer over none.
     * 
*/ private int flags; /** * array for the top 3 languages output by the detector */ private int[] language3; /** * array for the top 3 languages' percent confidences of being correct */ private int[] percent3; /** * output number of non-tag/letters-only text found */ private int[] textBytes; /** * is_reliable set true if the returned Language is some amount more probable than the second-best Language. Calculation is a complex * function of the length of the text and the different-script runs of text. */ private boolean[] isReliable; /** * detector hint, such as "en" "en,it", ENGLISH */ private String tld_hint; /** * encoding hint, is from an encoding detector applied to an input */ private int encoding_hint; /** * integer values from Cld2Language. For CAF we will pass in tld_hint */ private int language_hint; /** * Array containing the ISO 639-1 codes for the top three languages detected */ private String[] languageCodes; /** * array containing the full names for the top three languages detected e.g. "English" "French" */ private String[] languageNames; private boolean valid; /** * constructor setting up default values. *
     * plaintext will be true for CAF
     * flags default set to 0
     * if there are no hints:
     * tld_hint is required to be null,
     * encoding_hint is required to be the integer value of UNKNOWN_ENCODING
     * language_hint is required to be the integer value of UNKNOWN_LANGUAGE
     * 
*/ public Cld2Result() { this.isPlainText = true; this.flags = 0; language3 = new int[]{Cld2Language.UNKNOWN_LANGUAGE, Cld2Language.UNKNOWN_LANGUAGE, Cld2Language.UNKNOWN_LANGUAGE}; percent3 = new int[3]; textBytes = new int[1]; isReliable = new boolean[1]; tld_hint = null; encoding_hint = Cld2Encoding.UNKNOWN_ENCODING.getValue(); language_hint = Cld2Language.UNKNOWN_LANGUAGE; languageCodes = new String[3]; languageNames = new String[3]; valid = true; } public String[] getLanguageCodes() { return languageCodes; } public String[] getLanguageNames() { return languageNames; } public int[] getConfidences() { return percent3; } /** * convert an integer array to an array list of strings (for percent3 and language3) * * @param iArray * @return strings */ private ArrayList convertToStringArrayList(int[] iArray) { ArrayList strings = new ArrayList(); for (Integer i : iArray) { strings.add(i.toString()); } return strings; } public boolean isPlainText() { return isPlainText; } public void setIsPlainText(boolean plainText) { isPlainText = plainText; } public int getFlags() { return flags; } public void setFlags(int flags) { this.flags = flags; } public int[] getLanguage3() { return language3; } public void setLanguage3(int[] language3) { this.language3 = language3; } public int[] getPercent3() { return percent3; } public void setPercent3(int[] percent3) { this.percent3 = percent3; } public int[] getTextBytes() { return textBytes; } public void setTextBytes(int[] textBytes) { this.textBytes = textBytes; } public boolean[] isReliable() { return isReliable; } public void setIsReliable(boolean[] isReliable) { this.isReliable = isReliable; } public String getTld_hint() { return tld_hint; } public void setTld_hint(String tld_hint) { this.tld_hint = tld_hint; } public int getEncoding_hint() { return encoding_hint; } public void setEncoding_hint(int encoding_hint) { this.encoding_hint = encoding_hint; } public int getLanguage_hint() { return language_hint; } public void setLanguage_hint(int language_hint) { this.language_hint = language_hint; } public void setLanguageCodes(String[] languageCodes) { this.languageCodes = languageCodes; } public void setLanguageNames(String[] languageNames) { this.languageNames = languageNames; } public boolean isValid() { return valid; } public void setValid(boolean valid) { this.valid = valid; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy