All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.language.translate.MosesTranslator Maven / Gradle / Ivy

Go to download

This is the translate Apache Tika™ toolkit. Translator implementations may depend on web services.

There is a newer version: 1.0.18
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tika.language.translate;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.Properties;

import org.apache.tika.exception.TikaException;

/**
 * Translator that uses the Moses decoder for translation.
 * Users must install the Moses system before using this Translator. @link http://www.statmt.org/moses/.
 */
public class MosesTranslator extends ExternalTranslator {

    private static final String DEFAULT_PATH = "dummy-path";
    private static final String TMP_FILE_NAME = "tika.moses.translation.tmp";

    private String smtPath = DEFAULT_PATH;
    private String scriptPath = DEFAULT_PATH;

    /**
     * Default constructor that attempts to read the smt jar and script paths from the
     * translator.moses.properties file.
     *
     * @throws java.lang.AssertionError When the properties file is unreadable.
     */
    public MosesTranslator() {
        Properties config = new Properties();
        try {
            config.load(MosesTranslator.class
                    .getResourceAsStream("translator.moses.properties"));
            new MosesTranslator(
                    config.getProperty("translator.smt_path"),
                    config.getProperty("translator.script_path"));
        } catch (IOException e) {
            throw new AssertionError("Failed to read translator.moses.properties.");
        }
    }

    /**
     * Create a Moses Translator with the specified smt jar and script paths.
     *
     * @param smtPath Full path to the jar to run.
     * @param scriptPath Full path to the script to pass to the smt jar.
     */
    public MosesTranslator(String smtPath, String scriptPath) {
        this.smtPath = smtPath;
        this.scriptPath = scriptPath;
        System.out.println(buildCommand(smtPath, scriptPath));
    }

    @Override
    public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException {
        if (!isAvailable() || !checkCommand(buildCheckCommand(smtPath), 1)) return text;
        File tmpFile = new File(TMP_FILE_NAME);
        @SuppressWarnings("resource")
        OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(tmpFile), Charset.defaultCharset());
        out.append(text).append('\n').close();

        Runtime.getRuntime().exec(buildCommand(smtPath, scriptPath), new String[]{}, buildWorkingDirectory(scriptPath));

        File tmpTranslatedFile = new File(TMP_FILE_NAME + ".translated");

        StringBuilder stringBuilder = new StringBuilder();
        @SuppressWarnings("resource")
        BufferedReader reader = new BufferedReader(new InputStreamReader(
                new FileInputStream(tmpTranslatedFile),
                Charset.defaultCharset()
        ));
        String line;
        while ((line = reader.readLine()) != null) stringBuilder.append(line);

        if (!tmpFile.delete() || !tmpTranslatedFile.delete()){
            throw new IOException("Failed to delete temporary files.");
        }
        return stringBuilder.toString();
    }

    @Override
    public boolean isAvailable() {
        return !smtPath.equals(DEFAULT_PATH) && !scriptPath.equals(DEFAULT_PATH);
    }

    /**
     * Build the command String to be executed.
     * @param smtPath Full path to the jar to run.
     * @param scriptPath Full path to the script to pass to the smt jar.
     * @return String to run on the command line.
     */
    private String buildCommand(String smtPath, String scriptPath) {
        return "java -jar " + smtPath +
                " -c NONE " +
                scriptPath + " " +
                System.getProperty("user.dir") + "/" + TMP_FILE_NAME;
    }

    /**
     * Build the command String to check if we can execute the smt jar.
     * @param smtPath Full path to the jar to run.
     * @return String to run on the command line.
     */
    private String buildCheckCommand(String smtPath) {
        return "java -jar " + smtPath;
    }

    /**
     * Build the File that represents the desired working directory. In this case,
     * the directory the script is in.
     * @param scriptPath Full path to the script passed to the smt jar.
     * @return File of the directory with the script in it.
     */
    private File buildWorkingDirectory(String scriptPath) {
        return new File(scriptPath.substring(0, scriptPath.lastIndexOf("/") + 1));
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy