All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hpe.caf.languagedetection.cld2.Cld2Wrapper Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015-2024 Open Text.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hpe.caf.languagedetection.cld2;

import com.hpe.caf.languagedetection.LanguageDetectorException;
import com.hpe.caf.languagedetection.LanguageDetectorSettings;
import com.sun.jna.Native;
import com.sun.jna.Platform;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Wrapper for the CLD2 library
 */
public class Cld2Wrapper
{
    private static final Logger LOG = LoggerFactory.getLogger(Cld2Wrapper.class);
    
    /**
     * JNA interface access class
     */
    private Cld2Library cld2Library;

    /**
     * Using JNA to load the libcld2 library and use the cld2Library object as an access point
     */
    public Cld2Wrapper()
    {
        System.setProperty("jna.library.path", System.getProperty("cld2.location", System.getenv("cld2.location")));

        LOG.debug("Library location: " + System.getProperty("jna.library.path"));

        cld2Library = Native.load((Platform.isWindows() ? "win64/libcld2.dll" : "linux/libcld2.so"), Cld2Library.class);
//        cld2Library = Native.load("libcld2", Cld2Library.class);
    }

    /**
     * Using JNA, Calls into the cld2Library passing in the required fields to carry out the language detection.
     *
     * @param inputBytes - bytes of text data utf-8
     * @param settings - settings object with hints
     * @return Cld2Result - Contains the languages and is handled by the Cld2 class to produce a LanguageDetectorResult
     * @throws LanguageDetectorException - Attempt to detect the language has been unsuccessful, causes LanguageDetectorException
     */
    public Cld2Result detectLanguageSummaryWithHints(byte[] inputBytes, LanguageDetectorSettings settings) throws LanguageDetectorException
    {
        Cld2Result cld2Result = new Cld2Result();

        if (!settings.getHints().isEmpty()) {
            cld2Result.setTld_hint(String.join(",", settings.getHints()));
        }

        cld2Result.setEncoding_hint(Cld2Encoding.getValueFromString(settings.getEncodingHint()));

        try {
            int result = cld2Library.DetectLanguageSummaryWithHints(inputBytes, inputBytes.length, true, cld2Result.getTld_hint(),
                                                                    cld2Result.getEncoding_hint(), cld2Result.getLanguage_hint(), cld2Result.getLanguage3(), cld2Result.getPercent3(), cld2Result.getTextBytes(), cld2Result.isReliable());

            if (result == Cld2Language.UNKNOWN_LANGUAGE && !cld2Result.isReliable()[0]) {
                cld2Result.setValid(false);
            }

            cld2Result.setLanguageCodes(getLanguageCodes(cld2Result.getLanguage3()));
            cld2Result.setLanguageNames(getLanguageNames(cld2Result.getLanguage3()));
            return cld2Result;
        } catch (Throwable e) {
            throw new LanguageDetectorException("Language detection failed.\n", e);
        }
    }

    /**
     * using jna, calls into the c++ code to retrieve the language code based on the integer language enum value. the name of the C++
     * method is mangled
     *
     * @param lang3
     * @return String[] containing the codes
     */
    private String[] getLanguageCodes(int[] lang3)
    {
        String[] codes = new String[3];
        for (int i = 0; i < 3; i++) {
            codes[i] = cld2Library._ZN4CLD212LanguageCodeENS_8LanguageE(lang3[i]);
        }
        return codes;
    }

    /**
     * using jna, calls into the c++ code to retrieve the language name based on the integer language enum value.
     *
     * @param lang3
     * @return String[] containing the language names
     */
    private String[] getLanguageNames(int[] lang3)
    {
        String[] names = new String[3];
        for (int i = 0; i < 3; i++) {
            names[i] = cld2Library._ZN4CLD212LanguageNameENS_8LanguageE(lang3[i]);
        }
        return names;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy