org.apache.tika.language.translate.MosesTranslator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tika-translate Show documentation
Show all versions of tika-translate Show documentation
This is the translate Apache Tika™ toolkit. Translator implementations may depend on web services.
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.language.translate;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.Properties;
import org.apache.tika.exception.TikaException;
/**
* Translator that uses the Moses decoder for translation.
* Users must install the Moses system before using this Translator. @link http://www.statmt.org/moses/.
*/
public class MosesTranslator extends ExternalTranslator {
private static final String DEFAULT_PATH = "dummy-path";
private static final String TMP_FILE_NAME = "tika.moses.translation.tmp";
private String smtPath = DEFAULT_PATH;
private String scriptPath = DEFAULT_PATH;
/**
* Default constructor that attempts to read the smt jar and script paths from the
* translator.moses.properties file.
*
* @throws java.lang.AssertionError When the properties file is unreadable.
*/
public MosesTranslator() {
Properties config = new Properties();
try {
config.load(MosesTranslator.class
.getResourceAsStream("translator.moses.properties"));
new MosesTranslator(
config.getProperty("translator.smt_path"),
config.getProperty("translator.script_path"));
} catch (IOException e) {
throw new AssertionError("Failed to read translator.moses.properties.");
}
}
/**
* Create a Moses Translator with the specified smt jar and script paths.
*
* @param smtPath Full path to the jar to run.
* @param scriptPath Full path to the script to pass to the smt jar.
*/
public MosesTranslator(String smtPath, String scriptPath) {
this.smtPath = smtPath;
this.scriptPath = scriptPath;
System.out.println(buildCommand(smtPath, scriptPath));
}
@Override
public String translate(String text, String sourceLanguage, String targetLanguage) throws TikaException, IOException {
if (!isAvailable() || !checkCommand(buildCheckCommand(smtPath), 1)) return text;
File tmpFile = new File(TMP_FILE_NAME);
@SuppressWarnings("resource")
OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(tmpFile), Charset.defaultCharset());
out.append(text).append('\n').close();
Runtime.getRuntime().exec(buildCommand(smtPath, scriptPath), new String[]{}, buildWorkingDirectory(scriptPath));
File tmpTranslatedFile = new File(TMP_FILE_NAME + ".translated");
StringBuilder stringBuilder = new StringBuilder();
@SuppressWarnings("resource")
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(tmpTranslatedFile),
Charset.defaultCharset()
));
String line;
while ((line = reader.readLine()) != null) stringBuilder.append(line);
if (!tmpFile.delete() || !tmpTranslatedFile.delete()){
throw new IOException("Failed to delete temporary files.");
}
return stringBuilder.toString();
}
@Override
public boolean isAvailable() {
return !smtPath.equals(DEFAULT_PATH) && !scriptPath.equals(DEFAULT_PATH);
}
/**
* Build the command String to be executed.
* @param smtPath Full path to the jar to run.
* @param scriptPath Full path to the script to pass to the smt jar.
* @return String to run on the command line.
*/
private String buildCommand(String smtPath, String scriptPath) {
return "java -jar " + smtPath +
" -c NONE " +
scriptPath + " " +
System.getProperty("user.dir") + "/" + TMP_FILE_NAME;
}
/**
* Build the command String to check if we can execute the smt jar.
* @param smtPath Full path to the jar to run.
* @return String to run on the command line.
*/
private String buildCheckCommand(String smtPath) {
return "java -jar " + smtPath;
}
/**
* Build the File that represents the desired working directory. In this case,
* the directory the script is in.
* @param scriptPath Full path to the script passed to the smt jar.
* @return File of the directory with the script in it.
*/
private File buildWorkingDirectory(String scriptPath) {
return new File(scriptPath.substring(0, scriptPath.lastIndexOf("/") + 1));
}
}