com.basistech.rosette.api.HttpRosetteAPI Maven / Gradle / Ivy
Show all versions of rosette-api Show documentation
/*
* Copyright 2024 Basis Technology Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.basistech.rosette.api;
import com.basistech.rosette.RosetteRuntimeException;
import com.basistech.rosette.api.common.AbstractRosetteAPI;
import com.basistech.rosette.apimodel.AdmRequest;
import com.basistech.rosette.apimodel.AdmResponse;
import com.basistech.rosette.apimodel.DocumentRequest;
import com.basistech.rosette.apimodel.ErrorResponse;
import com.basistech.rosette.apimodel.InfoResponse;
import com.basistech.rosette.apimodel.PingResponse;
import com.basistech.rosette.apimodel.Request;
import com.basistech.rosette.apimodel.Response;
import com.basistech.rosette.apimodel.SupportedLanguagePairsResponse;
import com.basistech.rosette.apimodel.SupportedLanguagesResponse;
import com.basistech.rosette.apimodel.jackson.ApiModelMixinModule;
import com.basistech.rosette.apimodel.jackson.DocumentRequestMixin;
import com.basistech.rosette.dm.AnnotatedText;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.AbstractHttpEntity;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.FormBodyPartBuilder;
import org.apache.http.entity.mime.HttpMultipartMode;
import org.apache.http.entity.mime.MIME;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.entity.mime.content.AbstractContentBody;
import org.apache.http.entity.mime.content.ByteArrayBody;
import org.apache.http.entity.mime.content.InputStreamBody;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicHeader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import static java.net.HttpURLConnection.HTTP_OK;
/**
* Access to the Analytics API via HTTP.
*/
public class HttpRosetteAPI extends AbstractRosetteAPI {
public static final String DEFAULT_URL_BASE = "https://analytics.babelstreet.com/rest/v1";
public static final String SERVICE_NAME = "Babel-Street-Analytics-API";
public static final String BINDING_VERSION = getVersion();
public static final String USER_AGENT_STR = SERVICE_NAME + "-Java/" + BINDING_VERSION + "/"
+ System.getProperty("java.version");
private static final Logger LOG = LoggerFactory.getLogger(HttpRosetteAPI.class);
private static final String IO_EXCEPTION_MESSAGE = "IO Exception communicating with the Babel Street Analytics API";
private static final Pattern TRAILING_SLASHES = Pattern.compile("/+$");
private String urlBase = DEFAULT_URL_BASE;
private int failureRetries = 1;
private ObjectMapper mapper;
private CloseableHttpClient httpClient;
private List additionalHeaders;
private int connectionConcurrency = 2;
private boolean closeClientOnClose = true;
private HttpRosetteAPI() {
// use builder
}
/**
* Constructs an Analytics API instance using the builder syntax.
*
* @param key Analytics API key. This may be null for use with an on-premise deployment
* of the Analytics API.
* @param urlToCall Alternate Analytics API URL. {@code null} uses the default, public, URL.
* @param failureRetries Number of times to retry in case of failure; {@code null} uses the
* default value: 1.
* @param connectionConcurrency Number of concurrent connections. Pass this if have subscribed
* to a plan that supports enhanced concurrency, or if you are using
* an on-premise deployment of the Analytics API. {@code null} uses the
* default value: 2.
* @throws HttpRosetteAPIException Problem with the API request
*/
HttpRosetteAPI(String key, String urlToCall, Integer failureRetries,
CloseableHttpClient httpClient, List additionalHeaders,
Integer connectionConcurrency, boolean onlyAcceptKnownFields) throws HttpRosetteAPIException {
urlBase = urlToCall == null ? urlBase : TRAILING_SLASHES.matcher(urlToCall.trim()).replaceAll("");
if (failureRetries != null && failureRetries >= 1) {
this.failureRetries = failureRetries;
}
if (connectionConcurrency != null) {
this.connectionConcurrency = connectionConcurrency;
}
mapper = ApiModelMixinModule.setupObjectMapper(new ObjectMapper());
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, onlyAcceptKnownFields);
if (httpClient == null) {
initClient(key, additionalHeaders);
} else {
this.httpClient = httpClient;
initHeaders(key, additionalHeaders);
closeClientOnClose = false;
}
}
/**
* Returns the version of the binding.
*
* @return version of the binding
*/
private static String getVersion() {
Properties properties = new Properties();
try (InputStream ins = HttpRosetteAPI.class.getClassLoader().getResourceAsStream("version.properties")) {
properties.load(ins);
} catch (IOException e) {
// should not happen
}
return properties.getProperty("version", "undefined");
}
/**
* Returns a byte array from InputStream.
*
* @param is InputStream
* @return byte array
* @throws IOException
*/
private static byte[] getBytes(InputStream is) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] buf = new byte[4096];
while (true) {
int r = is.read(buf);
if (r == -1) {
out.flush();
return out.toByteArray();
}
out.write(buf, 0, r);
}
}
@SuppressWarnings("java:HttpClient_must_be_closed") // This library requires keeping the connection open.
private void initClient(String key, List additionalHeaders) {
HttpClientBuilder builder = HttpClients.custom();
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
cm.setMaxTotal(connectionConcurrency);
cm.setDefaultMaxPerRoute(connectionConcurrency);
builder.setConnectionManager(cm);
initHeaders(key, additionalHeaders);
builder.setDefaultHeaders(this.additionalHeaders);
httpClient = builder.build();
this.additionalHeaders = new ArrayList<>();
}
private void initHeaders(String key, List additionalHeaders) {
this.additionalHeaders = new ArrayList<>();
this.additionalHeaders.add(new BasicHeader(HttpHeaders.USER_AGENT, USER_AGENT_STR));
this.additionalHeaders.add(new BasicHeader(HttpHeaders.ACCEPT_ENCODING, "gzip"));
if (key != null) {
this.additionalHeaders.add(new BasicHeader("X-BabelStreetAPI-Key", key));
this.additionalHeaders.add(new BasicHeader("X-BabelStreetAPI-Binding", "java"));
this.additionalHeaders.add(new BasicHeader("X-BabelStreetAPI-Binding-Version", BINDING_VERSION));
// TODO: Remove in a future release.
this.additionalHeaders.add(new BasicHeader("X-RosetteAPI-Binding", "java"));
this.additionalHeaders.add(new BasicHeader("X-RosetteAPI-Binding-Version", BINDING_VERSION));
}
if (additionalHeaders != null) {
this.additionalHeaders.addAll(additionalHeaders);
}
}
/**
* Return failure retries.
*
* @return failure retries
*/
public int getFailureRetries() {
return failureRetries;
}
/**
* Gets information about the Analytics API, returns name, version, build number and build time.
*
* @return InfoResponse
* @throws HttpRosetteAPIException Analytics specific exception
* @throws IOException General IO exception
*/
public InfoResponse info() throws IOException, HttpRosetteAPIException {
return sendGetRequest(urlBase + INFO_SERVICE_PATH, InfoResponse.class);
}
/**
* Pings the Analytics API for a response indicating that the service is available.
*
* @return PingResponse
* @throws HttpRosetteAPIException Analytics specific exception
* @throws IOException General IO exception
*/
public PingResponse ping() throws IOException, HttpRosetteAPIException {
return sendGetRequest(urlBase + PING_SERVICE_PATH, PingResponse.class);
}
/**
* Gets the set of language and script codes supported by the specified Analytics API endpoint.
*
* @return SupportedLanguagesResponse
* @throws HttpRosetteAPIException for an error returned from the Analytics API.
*/
@Override
public SupportedLanguagesResponse getSupportedLanguages(String endpoint) throws HttpRosetteAPIException {
if (DOC_ENDPOINTS.contains(endpoint) || NAME_DEDUPLICATION_SERVICE_PATH.equals(endpoint)) {
return sendGetRequest(urlBase + endpoint + SUPPORTED_LANGUAGES_SUBPATH,
SupportedLanguagesResponse.class);
} else {
return null;
}
}
/**
* Gets the set of language, script codes and transliteration scheme pairs supported by the specified Analytics API
* endpoint.
*
* @param endpoint Analytics API endpoint.
* @return SupportedLanguagePairsResponse
* @throws HttpRosetteAPIException for an error returned from the Analytics API.
*/
@Override
public SupportedLanguagePairsResponse getSupportedLanguagePairs(String endpoint) throws HttpRosetteAPIException {
if (NAMES_ENDPOINTS.contains(endpoint) && !NAME_DEDUPLICATION_SERVICE_PATH.equals(endpoint)) {
return sendGetRequest(urlBase + endpoint + SUPPORTED_LANGUAGES_SUBPATH,
SupportedLanguagePairsResponse.class);
} else {
return null;
}
}
/**
*
* @param endpoint which endpoint.
* @param request the data for the request.
* @param responseClass the Java {@link Class} object for the response object.
* @param the type of the request object.
* @param the type of the response object.
* @return the response.
* @throws HttpRosetteAPIException for an error returned from the Analytics API.
* @throws RosetteRuntimeException for other errors, such as communications problems with HTTP.
*/
@Override
public ResponseType perform(String endpoint,
RequestType request, Class responseClass) throws HttpRosetteAPIException {
try {
return sendPostRequest(request, urlBase + endpoint, responseClass);
} catch (IOException e) {
throw new RosetteRuntimeException(IO_EXCEPTION_MESSAGE, e);
} catch (URISyntaxException e) {
throw new RosetteRuntimeException("Invalid URI", e);
}
}
/**
*
* @param endpoint which endpoint.
* @param request the data for the request.
* @param the type of the request object.
* @return the response, {@link com.basistech.rosette.dm.AnnotatedText}.
* @throws HttpRosetteAPIException for an error returned from the Analytics API.
* @throws RosetteRuntimeException for other errors, such as communications problems with HTTP.
*/
@Override
public AnnotatedText perform(String endpoint, RequestType request)
throws HttpRosetteAPIException {
try {
return sendPostRequest(request, urlBase + endpoint, AnnotatedText.class);
} catch (IOException e) {
throw new RosetteRuntimeException(IO_EXCEPTION_MESSAGE, e);
} catch (URISyntaxException e) {
throw new RosetteRuntimeException("Invalid URI", e);
}
}
/**
* This method always throws UnsupportedOperationException.
*/
@Override
public Future
performAsync(String endpoint, RequestType request, Class responseClass)
throws HttpRosetteAPIException {
throw new UnsupportedOperationException("Asynchronous operations are not yet supported");
}
/**
* Sends a GET request to Analytics API.
*
* Returns a Response.
*
* @param urlStr Analytics API end point.
* @param clazz Response class
* @return Response
* @throws HttpRosetteAPIException
*/
private T sendGetRequest(String urlStr, Class clazz) throws HttpRosetteAPIException {
HttpGet get = new HttpGet(urlStr);
for (Header header : additionalHeaders) {
get.addHeader(header);
}
try (CloseableHttpResponse httpResponse = httpClient.execute(get)) {
T resp = getResponse(httpResponse, clazz);
responseHeadersToExtendedInformation(resp, httpResponse);
return resp;
} catch (IOException e) {
throw new RosetteRuntimeException(IO_EXCEPTION_MESSAGE, e);
}
}
/**
* Sends a POST request to Analytics API.
*
* Returns a Response.
*
* @param urlStr Analytics API end point.
* @param clazz Response class
* @return Response
* @throws IOException
*/
private T sendPostRequest(Object request, String urlStr, Class clazz)
throws IOException, URISyntaxException {
ObjectWriter writer = mapper.writer().without(JsonGenerator.Feature.AUTO_CLOSE_TARGET);
boolean notPlainText = false;
if (request instanceof DocumentRequest) {
Object rawContent = ((DocumentRequest) request).getRawContent();
if (rawContent instanceof String) {
writer = writer.withView(DocumentRequestMixin.Views.Content.class);
} else if (rawContent != null) {
notPlainText = true;
}
} else if (request instanceof AdmRequest) {
notPlainText = true;
}
URIBuilder uriBuilder = new URIBuilder(urlStr);
if (AnnotatedText.class.isAssignableFrom(clazz) || AdmResponse.class.equals(clazz)) {
//TODO: change output=rosette to Accept: model/vnd.rosette.annotated-data-model header
uriBuilder.addParameter("output", "rosette");
}
final ObjectWriter finalWriter = writer;
HttpPost post = new HttpPost(uriBuilder.build());
for (Header header : additionalHeaders) {
post.addHeader(header);
}
//TODO: add compression!
if (notPlainText) {
setupMultipartRequest((Request) request, finalWriter, post);
} else {
setupPlainRequest(request, finalWriter, post);
}
HttpRosetteAPIException lastException = null;
int numRetries = this.failureRetries;
while (numRetries-- > 0) {
try (CloseableHttpResponse response = httpClient.execute(post)) {
T resp = getResponse(response, clazz);
// TODO: Remove in a future release
Header ridHeader = response.getFirstHeader("X-RosetteAPI-DocumentRequest-Id");
if (ridHeader != null && ridHeader.getValue() != null) {
LOG.debug("DocumentRequest ID {}", ridHeader.getValue());
}
Header bsidHeader = response.getFirstHeader("X-BabelStreetAPI-DocumentRequest-Id");
if (bsidHeader != null && bsidHeader.getValue() != null) {
LOG.debug("DocumentRequest ID {}", bsidHeader.getValue());
}
if (resp instanceof Response) {
responseHeadersToExtendedInformation((Response)resp, response);
}
return resp;
} catch (HttpRosetteAPIException e) {
// only 5xx errors are worthy retrying, others throw right away
if (e.getHttpStatusCode() < 500) {
throw e;
} else {
lastException = e;
}
}
}
throw lastException;
}
@SuppressWarnings("unchecked")
private void responseHeadersToExtendedInformation(T resp, HttpResponse response) {
for (Header header : response.getAllHeaders()) {
if (resp.getExtendedInformation() != null
&& resp.getExtendedInformation().containsKey(header.getName())) {
Set