All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.language.translate.YandexTranslator Maven / Gradle / Ivy

Go to download

This is the translate Apache Tika™ toolkit. Translator implementations may depend on web services.

There is a newer version: 1.0.18
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tika.language.translate;

import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Properties;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.translate.Translator;

import static java.nio.charset.StandardCharsets.UTF_8;

/**
 * An implementation of a REST client for the YANDEX Translate API.
 * You can sign up for free access online on the API Key form
 * and set your Application's User Key in the translator.yandex.properties file.
 */
public class YandexTranslator implements Translator {
    
    /**
     * Yandex Translate API service end-point URL
     */
    private static final String YANDEX_TRANSLATE_URL_BASE = "https://translate.yandex.net/api/v1.5/tr.json/translate";

    /**
     * Default USer-Key, a real User-Key must be provided before the Lingo24 can successfully request translations
     */
    private static final String DEFAULT_KEY = "dummy-key";

    /**
     * Identifies the client of the request, used for authentication 
     */
    private String apiKey;
    
    /**
     * The Yandex Translate API can handle text in plain and/or html format, the default
     * format is plain
     */
    private String format = "plain";

    public YandexTranslator() {
        Properties config = new Properties();
        try {
            config.load(YandexTranslator.class
                    .getResourceAsStream(
                            "translator.yandex.properties"));
            this.apiKey = config.getProperty("translator.api-key");
            this.format = config.getProperty("translator.text.format");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public String translate(String text, String sourceLanguage,
            String targetLanguage) throws TikaException, IOException {
        if (!this.isAvailable()) {
            return text;
        }
        
        WebClient client = WebClient.create(YANDEX_TRANSLATE_URL_BASE);
        
        String langCode;
        
        if (sourceLanguage == null) {
            //Translate Service will identify source language
            langCode = targetLanguage;
        } else {
            //Source language is well known
            langCode = sourceLanguage + '-' + targetLanguage;
        }

        //TODO Add support for text over 10k characters
        Response response = client.accept(MediaType.APPLICATION_JSON)
                .query("key", this.apiKey).query("lang", langCode)
                .query("text", text).get();
        BufferedReader reader = new BufferedReader(new InputStreamReader(
                (InputStream) response.getEntity(), UTF_8));
        String line = null;
        StringBuffer responseText = new StringBuffer();
        while ((line = reader.readLine()) != null) {
            responseText.append(line);
        }

        try {
            ObjectMapper mapper = new ObjectMapper();
            JsonNode jsonResp = mapper.readTree(responseText.toString());
            
            if (!jsonResp.findValuesAsText("code").isEmpty()) {
                String code = jsonResp.findValuesAsText("code").get(0);
                if (code.equals("200")) {
                    return jsonResp.findValue("text").get(0).asText();
                } else {
                    throw new TikaException(jsonResp.findValue("message").get(0).asText());
                }
            } else {
                throw new TikaException("Return message not recognized: " + responseText.toString().substring(0, Math.min(responseText.length(), 100)));
            }
        } catch (JsonParseException e) {
            throw new TikaException("Error requesting translation from '" + sourceLanguage + "' to '" + targetLanguage + "', JSON response from Lingo24 is not well formatted: " + responseText.toString());
        }
    }


    /**
     * Get the API Key in use for client authentication
     * @return API Key
     */
    public String getApiKey() {
        return apiKey;
    }

    /**
     * Set the API Key for client authentication
     * @param apiKey API Key
     */
    public void setApiKey(String apiKey) {
        this.apiKey = apiKey;
    }

    /**
     * Retrieve the current text format setting.
     * The Yandex Translate API can handle text in plain and/or html format, the default
     * format is plain
     * @return
     */
    public String getFormat() {
        return format;
    }

    /**
     * Set the text format to use (plain/html)
     * @param format Text format setting, either plain or html
     */
    public void setFormat(String format) {
        this.format = format;
    }

    @Override
    public String translate(String text, String targetLanguage)
            throws TikaException, IOException {
        return this.translate(text, null, targetLanguage);
    }

    @Override
    public boolean isAvailable() {
        return this.apiKey!=null && !this.apiKey.equals(DEFAULT_KEY);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy