All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.language.translate.CachedTranslator Maven / Gradle / Ivy

Go to download

This is the translate Apache Tika™ toolkit. Translator implementations may depend on web services.

There is a newer version: 1.0.18
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tika.language.translate;

import java.io.IOException;
import java.util.HashMap;

import org.apache.tika.exception.TikaException;
import org.apache.tika.language.detect.LanguageResult;

import com.fasterxml.jackson.databind.util.LRUMap;

/**
 * CachedTranslator. Saves a map of previous translations in order to prevent repetitive translation requests.
 */
public class CachedTranslator extends AbstractTranslator {
    private static final int INITIAL_ENTRIES = 100;
    private static final int MAX_ENTRIES = 1000;
    private Translator translator;
    // The cache is a map from sourceLang:targetLang to an LRUMap of previously translated pairs.
    // Old entries are removed from the cache when it reaches its limit.
    // For example, {en:fr -> {hello -> salut}}.
    private HashMap> cache;
    
    /**
     * Create a new CachedTranslator (must set the {@link Translator} with {@link #setTranslator(Translator)} before use!)
     */
    public CachedTranslator(){
    	this(null);
    }

    /**
     * Create a new CachedTranslator.
     *
     * @param translator The translator that should be used for the underlying translation service. The properties
     *                   for that service must be set properly!
     */
    public CachedTranslator(Translator translator) {
        this.translator = translator;
        this.cache = new HashMap<>();
    }

    /**
	 * @return the translator
	 */
	public Translator getTranslator() {
		return translator;
	}

	/**
	 * @param translator the translator to set
	 */
	public void setTranslator(Translator translator) {
		this.translator = translator;
	}

	@Override
    public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException {
        if (translator == null) {
            return text;
        }
        LRUMap translationCache = getTranslationCache(sourceLanguage, targetLanguage);
        String translatedText = translationCache.get(text);
        if (translatedText == null) {
            translatedText = translator.translate(text, sourceLanguage, targetLanguage);
            translationCache.put(text, translatedText);
        }
        return translatedText;
    }

    @Override
    public String translate(String text, String targetLanguage) throws TikaException, IOException {
        LanguageResult language = detectLanguage(text);
        String sourceLanguage = language.getLanguage();
        return translate(text, sourceLanguage, targetLanguage);
    }

    @Override
    public boolean isAvailable() {
        return translator != null && translator.isAvailable();
    }

    /**
     * Get the number of different source/target translation pairs this CachedTranslator
     * currently has in its cache.
     *
     * @return Number of translation source/target pairs in this CachedTranslator's cache.
     * @since Tika 1.6
     */
    public int getNumTranslationPairs() {
        return cache.size();
    }

    /**
     * Get the number of different translations from the source language to the target language
     * this CachedTranslator has in its cache.
     *
     * @param sourceLanguage The source language of translation.
     * @param targetLanguage The target language of translation.
     * @return The number of translations between source and target.
     * @since Tika 1.6
     */
    public int getNumTranslationsFor(String sourceLanguage, String targetLanguage) {
        LRUMap translationCache = cache.get(buildCacheKeyString(sourceLanguage, targetLanguage));
        if (translationCache == null) {
            return 0;
        } else {
            return translationCache.size();
        }
    }

    /**
     * Check whether this CachedTranslator's cache contains a translation of the text from the
     * source language to the target language.
     *
     * @param text What string to check for.
     * @param sourceLanguage The source language of translation.
     * @param targetLanguage The target language of translation.
     * @return true if the cache contains a translation of the text, false otherwise.
     */
    public boolean contains(String text, String sourceLanguage, String targetLanguage) {
        LRUMap translationCache = getTranslationCache(sourceLanguage, targetLanguage);
        return translationCache.get(text) != null;
    }

    /**
     * Check whether this CachedTranslator's cache contains a translation of the text to the target language,
     * attempting to auto-detect the source language.
     *
     * @param text What string to check for.
     * @param targetLanguage The target language of translation.
     * @return true if the cache contains a translation of the text, false otherwise.
     */
    public boolean contains(String text, String targetLanguage) {
		try {
			LanguageResult language = detectLanguage(text);
	        String sourceLanguage = language.getLanguage();
	        return contains(text, sourceLanguage, targetLanguage);
		} catch (IOException e) {
			// TODO what to do if we get an error?
			return false;
		}
    }

    /**
     * Build the String to be used as the key into this CachedTranslator's cache.
     *
     * @param sourceLanguage The source language of translation.
     * @param targetLanguage The target language of translation.
     * @return The string to be used as the key into this CachedTranslator's cache.
     */
    private String buildCacheKeyString(String sourceLanguage, String targetLanguage) {
        return sourceLanguage + ":" + targetLanguage;
    }

    /**
     * Get the cache of translations from the given source language to target language.
     *
     * @param sourceLanguage The source language of translation.
     * @param targetLanguage The target language of translation.
     * @return The LRUMap representing the translation cache.
     */
    private LRUMap getTranslationCache(String sourceLanguage, String targetLanguage) {
        LRUMap translationCache = cache.get(buildCacheKeyString(sourceLanguage, targetLanguage));
        if (translationCache == null) {
            translationCache = new LRUMap<>(INITIAL_ENTRIES, MAX_ENTRIES);
            cache.put(buildCacheKeyString(sourceLanguage, targetLanguage), translationCache);
        }
        return translationCache;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy