net.sf.okapi.connectors.google.GoogleMTv2Connector Maven / Gradle / Ivy
/*===========================================================================
Copyright (C) 2011-2017 by the Okapi Framework contributors
-----------------------------------------------------------------------------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===========================================================================*/
package net.sf.okapi.connectors.google;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.query.QueryResult;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.lib.translation.BaseConnector;
import net.sf.okapi.lib.translation.QueryUtil;
public class GoogleMTv2Connector extends BaseConnector {
private static final String BASE_URL = "https://translation.googleapis.com/language/translate/v2";
private final Logger LOG = LoggerFactory.getLogger(getClass());
private GoogleMTv2Parameters params;
private QueryUtil util;
private GoogleMTAPI api;
private int failureCount;
public GoogleMTv2Connector () {
params = new GoogleMTv2Parameters();
util = new QueryUtil();
api = new GoogleMTAPIImpl(BASE_URL, params);
}
public GoogleMTv2Connector(GoogleMTAPI api) {
params = new GoogleMTv2Parameters();
util = new QueryUtil();
this.api = api;
}
@Override
public void setParameters (IParameters params) {
this.params = (GoogleMTv2Parameters)params;
}
@Override
public GoogleMTv2Parameters getParameters () {
return params;
}
@Override
public void close () {
// Nothing to do
}
@Override
public String getName () {
return "Google-MTv2";
}
@Override
public String getSettingsDisplay () {
return "Server: " + BASE_URL;
}
@Override
public void open () {
failureCount = 0;
}
@Override
public int query (String plainText) {
return _query(plainText, plainText, new TextQueryResultBuilder(params, getName(), getWeight()));
}
@Override
public int query (TextFragment frag) {
return _query(util.toCodedHTML(frag), frag, new FragmentQueryResultBuilder(params, getName(), getWeight()));
}
private void retryInterval(int retryCount, String operation) {
LOG.info("{} - retry {} (waiting {} ms)", operation, retryCount, params.getRetryIntervalMs());
try {
Thread.sleep(params.getRetryIntervalMs());
} catch (InterruptedException e) {
throw new OkapiException("Interrupted while trying to contact Google MT");
}
}
protected int _query(String queryText, T originalText, QueryResultBuilder qrBuilder) {
current = -1;
if (queryText.isEmpty()) return 0;
// Check that we have some Key available
if ( Util.isEmpty(params.getApiKey()) ) {
throw new OkapiException("You must have a Google API Key to use this connector.");
}
List queryResults = new ArrayList<>();
GoogleQueryBuilder qb = new GoogleQueryBuilder<>(BASE_URL, getParameters(), srcCode, trgCode);
qb.addQuery(queryText, originalText);
List responses = executeQuery(qb, qrBuilder);
if (responses != null) {
queryResults.addAll(qrBuilder.convertResponses(responses, originalText));
}
else {
// Underlying call failed for some reason, probably a timeout
LOG.error("Received no results for query {}", qb.getQuery());
// Return the source text as a dummy translation so that we can maintain the correct indexing
queryResults.add(qrBuilder.createDummyResponse(originalText));
}
if (queryResults.size() > 0) {
current = 0;
result = queryResults.iterator().next();
return 1;
}
throw new OkapiException("Could not retrieve results from Google after " +
params.getRetryCount() + " attempts.");
}
@Override
public List> batchQueryText(List plainTexts) {
return _batchQuery(plainTexts, plainTexts, new TextQueryResultBuilder(params, getName(), getWeight()));
}
@Override
public List> batchQuery (List fragments) {
return _batchQuery(util.toCodedHTML(fragments), fragments,
new FragmentQueryResultBuilder(params, getName(), getWeight()));
}
protected List> _batchQuery(List texts, List originalTexts,
QueryResultBuilder qrBuilder) {
// Check that we have some Key available
if ( Util.isEmpty(params.getApiKey()) ) {
throw new OkapiException("You must have a Google API Key to use this connector.");
}
GoogleQueryBuilder qb = new GoogleQueryBuilder<>(BASE_URL, params, srcCode, trgCode);
current = -1;
List> queryResults = new ArrayList<>(texts.size());
for (int i = 0; i < texts.size(); i++) {
String sourceText = texts.get(i);
T originalText = originalTexts.get(i);
if (qb.hasCapacity(sourceText)) {
qb.addQuery(sourceText, originalText);
}
else {
queryResults.addAll(flushQuery(qb, qrBuilder));
if (qb.hasCapacity(sourceText)) {
qb.addQuery(sourceText, originalText);
}
else {
// If we still don't have capacity, it's an oversized segment that needs to be POSTed by
// itself.
TranslationResponse response = executeSingleSegmentQuery(qb, sourceText);
if (response != null) {
queryResults.add(qrBuilder.convertResponses(Collections.singletonList(response), originalText));
}
else {
// Underlying call failed for some reason, probably a timeout
LOG.error("Received no results for query {}", qb.getQuery());
// Return the source text as a dummy translation so that we can maintain the correct indexing
queryResults.add(Collections.singletonList(qrBuilder.createDummyResponse(originalText)));
}
}
}
}
queryResults.addAll(flushQuery(qb, qrBuilder));
return queryResults;
}
protected List> flushQuery(GoogleQueryBuilder qb, QueryResultBuilder qrBuilder) {
List> queryResults = new ArrayList<>();
if (qb.getSourceCount() > 0) {
LOG.debug("Flushing batch query of length {}, '{}'", qb.getQuery().length(), qb.getQuery());
List batchResponses = executeQuery(qb, qrBuilder);
if (batchResponses != null) {
for (int j = 0; j < batchResponses.size(); j++) {
queryResults.add(qrBuilder.convertResponses(
Collections.singletonList(batchResponses.get(j)), qb.getSources().get(j)));
}
}
else {
// Underlying call failed for some reason, probably a timeout
LOG.error("Received no results for query {}", qb.getQuery());
// Return the source text as a dummy translation so that we can maintain the correct indexing
for (T source : qb.getSources()) {
queryResults.add(Collections.singletonList(qrBuilder.createDummyResponse(source)));
}
}
qb.reset();
}
return queryResults;
}
protected TranslationResponse executeSingleSegmentQuery(GoogleQueryBuilder qb, String sourceText) {
try {
for (int tries = 0; tries < params.getRetryCount(); tries++) {
try {
return api.translateSingleSegment(qb, sourceText);
}
catch (GoogleMTErrorException e) {
LOG.error("Error {} - {} for query {}", e.getCode(), e.getMessage(), e.getQuery());
}
retryInterval(tries + 1, "_batchQuery");
}
}
catch ( Throwable e) {
throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
}
// All retries have failed
if (( params.getFailuresBeforeAbort() > -1 ) && ( ++failureCount > params.getFailuresBeforeAbort() )) {
throw new OkapiException("Too many retry failures while querying the MT server.");
}
return null;
}
protected List executeQuery(GoogleQueryBuilder qb, QueryResultBuilder qrBuilder) {
try {
for (int tries = 0; tries < params.getRetryCount(); tries++) {
try {
return api.translate(qb);
}
catch (GoogleMTErrorException e) {
LOG.error("Error {} - {} for query {}", e.getCode(), e.getMessage(), e.getQuery());
}
retryInterval(tries + 1, "_batchQuery");
}
}
catch ( Throwable e) {
throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
}
// All retries have failed
if (( params.getFailuresBeforeAbort() > -1 ) && ( ++failureCount > params.getFailuresBeforeAbort() )) {
throw new OkapiException("Too many retry failures while querying the MT server.");
}
return null;
}
public List getSupportedLanguages() {
try {
for (int tries = 0; tries < params.getRetryCount(); tries++) {
List codes = api.getLanguages();
if (codes != null) {
List locales = new ArrayList<>();
for (String code : codes) {
locales.add(convertGoogleLanguageCode(code));
}
return locales;
}
retryInterval(tries + 1, "getSupportedLanguages");
}
}
catch ( Throwable e) {
throw new OkapiException("Error querying the MT server: " + e.getMessage(), e);
}
throw new OkapiException("Could not retrieve language list from Google after " +
params.getRetryCount() + " attempts.");
}
protected LocaleId convertGoogleLanguageCode(String lang) {
return LocaleId.fromBCP47(lang);
}
@Override
public void leverage (ITextUnit tu) {
leverageUsingBatchQuery(tu);
}
@Override
public void batchLeverage (List tuList) {
batchLeverageUsingBatchQuery(tuList);
}
@Override
protected String toInternalCode (LocaleId locale) {
String code = locale.toBCP47();
String codelc = code.toLowerCase();
if ( codelc.startsWith("sr-latn") ) {
code = "sr-Latn";
}
else if ( codelc.startsWith("sr-cyrl") ) {
code = "sr";
}
else {
switch ( codelc ) {
case "zh-hans": code = "zh-CN"; break;
case "zh-hant": code = "zh-TW"; break;
}
if ( !code.startsWith("zh") && ( code.length() > 3 )) {
int p = code.indexOf('-');
if ( p > -1 ) {
code = code.substring(0, p);
}
}
}
return code;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy