All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ontotext.s4.service.S4ServiceClient Maven / Gradle / Ivy

/*
 * S4 Java client library
 * Copyright (c) 2014, Ontotext AD, All rights reserved.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3.0 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library.
 */
package com.ontotext.s4.service;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Properties;

import org.apache.commons.io.IOUtils;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonNode;
import com.ontotext.s4.catalog.ServiceDescriptor;
import com.ontotext.s4.catalog.ServicesCatalog;
import com.ontotext.s4.client.HttpClient;
import com.ontotext.s4.client.HttpClientException;
import com.ontotext.s4.common.Parameters;

/**
 * Main entry point for the S4 online API to send individual
 * documents for annotation by a particular pipeline and receive the
 * results immediately.
 * 
 */
public class S4ServiceClient {

	private HttpClient client;

	/**
	 * Construct a S4ServiceClient accessing a specific processing
	 * pipeline on the s4.ontotext.com platform using the given
	 * credentials.
	 * 
	 * @param item the {@link ServiceDescriptor} which
	 *          represents the processing pipeline which will be used
	 * @param apiKeyId API key ID for authentication
	 * @param apiPassword corresponding password
	 */
	public S4ServiceClient(ServiceDescriptor item, String apiKeyId, String apiPassword) {
		URL endpoint;
		try {
			endpoint = new URL(item.onlineUrl);
		} catch(MalformedURLException e) {
			throw new IllegalArgumentException(
					"Invalid ServiceDescriptor specified. No API endpoint specified.", e);
		}
		this.client = new HttpClient(endpoint, apiKeyId, apiPassword);
	}

	/**
	 * Construct a S4ServiceClient accessing a specific processing
	 * pipeline on the s4.ontotext.com platform using the given
	 * credentials.
	 * 
	 * @param endpoint the URL of the pipeline which will be used for
	 *          processing
	 * @param apiKeyId apiKeyId API key ID for authentication
	 * @param apiPassword corresponding password
	 */
	public S4ServiceClient(URL endpoint, String apiKeyId, String apiPassword) {
		this.client = new HttpClient(endpoint, apiKeyId, apiPassword);
	}

	/**
	 * Annotates a single document with the specified MIME type. Returns
	 * an object which allows for convenient access to the annotations in
	 * the annotated document.
	 * 
	 * @param documentText the document content to annotate
	 * @param documentMimeType the MIME type of the document which will be
	 *          annotated
	 * @return an {@link AnnotatedDocument} containing the original
	 *         content as well as the annotations produced
	 * @throws S4ServiceClientException
	 */
	public AnnotatedDocument annotateDocument(String documentText,
			SupportedMimeType documentMimeType) throws S4ServiceClientException {
		ServiceRequest rq =
				new ServiceRequest(documentText, documentMimeType, null);
		return processRequest(rq, true);
	}

	/**
	 * Annotates the contents of a single file with the specified MIME
	 * type. Returns an object which allows for convenient access to the
	 * annotations in the annotated document.
	 * 
	 * @param documentContent the file whose contents will be annotated
	 * @param documentEncoding the encoding of the document file
	 * @param documentMimeType the MIME type of the document to annotated
	 * @return an {@link AnnotatedDocument} containing the original
	 *         content as well as the annotations produced
	 * @throws IOException
	 * @throws S4ServiceClientException
	 */
	public AnnotatedDocument annotateFileContents(File documentContent,
			Charset documentEncoding, SupportedMimeType documentMimeType)
					throws IOException, S4ServiceClientException {

		Path documentPath = documentContent.toPath();
		if(!Files.isReadable(documentPath)) {
			throw new IOException("File " + documentPath.toString()
					+ " is not readable.");
		}
		ByteBuffer buff;
		buff = ByteBuffer.wrap(Files.readAllBytes(documentPath));
		String content = documentEncoding.decode(buff).toString();

		return annotateDocument(content, documentMimeType);
	}

	/**
	 * Annotates a single document publicly available under a given URL.
	 * Returns an object which allows for convenient access to the
	 * annotations in the annotated document
	 * 
	 * @param documentUrl the publicly accessible URL from where the
	 *          document will be downloaded
	 * @param documentMimeType the MIME type of the document which will be
	 *          annotated
	 * @return an {@link AnnotatedDocument} which allows for convenient
	 *         programmatic access to the annotated document
	 * @throws S4ServiceClientException
	 */
	public AnnotatedDocument annotateDocumentFromUrl(URL documentUrl,
			SupportedMimeType documentMimeType) throws S4ServiceClientException {
		ServiceRequest rq =
				new ServiceRequest(documentUrl, documentMimeType, null);
		return processRequest(rq, true);
	}

	/**
	 * Annotates a single document and returns an {@link InputStream} from
	 * which the contents of the serialized annotated document can be read
	 * 
	 * @param documentText the contents of the document which will be
	 *          annotated
	 * @param documentMimeType the MIME type of the file which will be
	 *          annotated
	 * @param serializationFormat the format which will be used for
	 *          serialization of the annotated document
	 * @return an {@link InputStream} from which the serialization of the
	 *         annotated document can be read
	 * @throws S4ServiceClientException
	 */
	public InputStream annotateDocumentAsStream(String documentText,
			SupportedMimeType documentMimeType, ResponseFormat serializationFormat)
					throws S4ServiceClientException {

		ServiceRequest rq =
				new ServiceRequest(documentText, documentMimeType, null);
		try {
			return client.requestForStream("", "POST", rq, "Accept",
					serializationFormat.acceptHeader);
		} catch(HttpClientException e) {
			JsonNode response = e.getResponse();
			if(response == null) {
				throw new S4ServiceClientException(e.getMessage(), e);
			}
			JsonNode msg = response.get("message");
			throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(),
					e);
		}
	}

	/**
	 * Annotates the contents of a single file returning an
	 * {@link InputStream} from which the annotated content can be read
	 * 
	 * @param documentContent the file which will be annotated
	 * @param documentEncoding the encoding of the file which will be
	 *          annotated
	 * @param documentMimeType the MIME type of the file which will be
	 *          annotated
	 * @param serializationFormat the serialization format used for the
	 *          annotated content
	 * @return an {@link InputStream} from which
	 * @throws IOException if there are problems reading the contents of
	 *           the file
	 * @throws S4ServiceClientException
	 */
	public InputStream annotateFileContentsAsStream(File documentContent,
			Charset documentEncoding, SupportedMimeType documentMimeType,
			ResponseFormat serializationFormat) throws IOException,
			S4ServiceClientException {

		Path documentPath = documentContent.toPath();
		if(!Files.isReadable(documentPath)) {
			throw new IOException("File " + documentPath.toString()
					+ " is not readable.");
		}
		ByteBuffer buff;
		buff = ByteBuffer.wrap(Files.readAllBytes(documentPath));
		String content = documentEncoding.decode(buff).toString();
		return annotateDocumentAsStream(content, documentMimeType,
				serializationFormat);
	}

	/**
	 * Annotates a single document publicly available under a given URL.
	 * Returns the annotated document serialized into the specified format
	 * 
	 * @param documentUrl the publicly accessible URL from where the
	 *          document will be downloaded
	 * @param documentMimeType the MIME type of the document which will be
	 *          annotated
	 * @param serializationFormat the serialization format of the output
	 * @return an {@link InputStream} from where the serialized output can
	 *         be read
	 * @throws S4ServiceClientException
	 */
	public InputStream annotateDocumentFromUrlAsStream(URL documentUrl,
			SupportedMimeType documentMimeType, ResponseFormat serializationFormat)
					throws S4ServiceClientException {

		ServiceRequest rq =
				new ServiceRequest(documentUrl, documentMimeType, null);
		try {
			return client.requestForStream("", "POST", rq, "Accept",
					serializationFormat.acceptHeader);
		} catch(HttpClientException e) {
			JsonNode response = e.getResponse();
			if(response == null) {
				throw new S4ServiceClientException(e.getMessage(), e);
			}
			JsonNode msg = response.get("message");
			throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(),
					e);
		}
	}

	/**
	 * This low level method allows the user to explicitly specify all the
	 * parameters sent to the service. This is done by constructing the
	 * appropriate ServiceRequest object. Returns the contents of
	 * the annotated document
	 * 
	 * @param rq the request which will be sent
	 * @param serializationFormat the format in which to output the
	 *          annotated document
	 * @param requestCompression whether to allow GZIP compression for
	 *          large documents
	 * @return an{@link InputStream} for the serialization of the annotated
	 *         document in the specified format
	 * @throws S4ServiceClientException
	 */
	public InputStream processRequestForStream(ServiceRequest rq,
			ResponseFormat serializationFormat, boolean requestCompression)
					throws S4ServiceClientException {

		try {
			if(requestCompression) {
				return client.requestForStream("", "POST", rq, "Accept",
						serializationFormat.acceptHeader, "Accept-Encoding", "gzip");
			} else {
				return client.requestForStream("", "POST", rq, "Accept",
						serializationFormat.acceptHeader);
			}
		} catch(HttpClientException e) {
			JsonNode response = e.getResponse();
			if(response == null) {
				throw new S4ServiceClientException(e.getMessage(), e);
			}
			JsonNode msg = response.get("message");
			throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(),
					e);
		}
	}

	/**
	 * This low level method allows the user to specify every parameter
	 * explicitly by setting the properties of the OnlineService request
	 * object. Returns an object which wraps the annotated document.
	 * 
	 * @param rq the request which will be sent to the service
	 * @param requestCompression whether to allow GZIP compression for
	 *          large documents
	 * @return an {@link AnnotatedDocument} containing the original
	 *         content as well as the annotations produced
	 * @throws S4ServiceClientException
	 */
	public AnnotatedDocument processRequest(ServiceRequest rq,
			boolean requestCompression) throws S4ServiceClientException {
		try {
			if(requestCompression) {
				return client.request("", "POST",
						new TypeReference() {
				}, rq, "Accept", ResponseFormat.JSON.acceptHeader,
				"Accept-Encoding", "gzip");
			} else {
				return client.request("", "POST",
						new TypeReference() {
				}, rq, "Accept", ResponseFormat.JSON.acceptHeader);
			}
		} catch(HttpClientException e) {
			JsonNode response = e.getResponse();
			if(response == null) {
				throw new S4ServiceClientException(e.getMessage(), e);
			}
			JsonNode msg = response.get("message");
			throw new S4ServiceClientException(msg == null ? e.getMessage() : msg.asText(),
					e);
		}
	}

	public static void main(String... args) {
		if (args == null 
				|| args.length == 0) {
			printUsageAndTerminate(null);
		}
		Parameters params = new Parameters(args);
		String serviceID = params.getValue("service");
		if (serviceID == null) {
			printUsageAndTerminate("No service name provided");

		}
		ServiceDescriptor service = null;
		try {
			service = ServicesCatalog.getItem(serviceID);
		}
		catch(UnsupportedOperationException uoe) {
			printUsageAndTerminate("Unsupported service '" + serviceID + '\'');
		}
		SupportedMimeType mimetype = SupportedMimeType.PLAINTEXT;
		if (params.getValue("dtype") != null) {
			try {
				mimetype = SupportedMimeType.valueOf(params.getValue("dtype"));
			}
			catch(IllegalArgumentException iae) {
				printUsageAndTerminate("Unsupported document type (dtype) : " + params.getValue("dtype"));
			}
		}
		String inFile = params.getValue("file");
		String url = params.getValue("url");
		String outFile = params.getValue("out", "result.txt");

		if (inFile != null) {
			if (false == new File(inFile).exists()) {
				printUsageAndTerminate("Input file is not found : " + inFile);
			}
		}
		else {
			if (url == null) {
				printUsageAndTerminate("Neither input file, nor remote URL provided");
			}
		}

		Properties creds = readCredentials(params);
		if (false == creds.containsKey("apikey") 
				|| false == creds.containsKey("secret")) {
			printUsageAndTerminate("No credentials details found");
		}

		S4ServiceClient client = new S4ServiceClient(service, creds.getProperty("apikey"), creds.getProperty("secret"));

		try {
			InputStream resultData = (inFile != null) ?
					client.annotateFileContentsAsStream(new File(inFile), Charset.forName("UTF-8"), mimetype, ResponseFormat.JSON)
					: client.annotateDocumentFromUrlAsStream(new URL(url), mimetype, ResponseFormat.JSON);
			
			FileOutputStream outStream = new FileOutputStream(outFile);
			IOUtils.copy(resultData, outStream);
			
			outStream.close();
			resultData.close();
		}
		catch(IOException ioe) {
			System.out.println(ioe.getMessage());
			System.exit(1);
		}

	}

	private static void printUsageAndTerminate(String error) {
		if (error != null) {
			System.out.println(error);
		}
		System.out.println("Usage: S4ClientService parameter1=value1 parameter2=value2 ...");
		System.out.println("Parameters:");
		System.out.println("  service - the service id to be used (one of:'TwitIE', 'SBT' and 'news')");
		System.out.println("  file    - input file path");
		System.out.println("  url     - input document URL");
		System.out.println("  dtype   - the MIME type of the document (one of:'text/plain', 'text/html', 'application/xml', 'text/xml', 'text/x-pubmed', 'text/x-pubmed', 'text/x-cochrane', 'text/x-mediawiki', 'text/x-json-twitter')");
		System.out.println("  out     - result file name. Defaults to 'result.txt'");
		System.out.println("  apikey  - the api key if credentials file is not used");
		System.out.println("  secret  - the api secret if credentials file is not used");
		System.out.println("  creds   - credentails file path (if apikey and secret parameters are not used)");
		System.exit(1);
	}

	private static Properties readCredentials(Parameters params) {
		Properties props = new Properties();

		if (params.getValue("apikey") != null) {
			if (params.getValue("secret") == null) {
				printUsageAndTerminate("API key secret not provided");
			}
			props.setProperty("apikey", params.getValue("apikey"));
			props.setProperty("secret", params.getValue("secret"));
			return props;
		}
		String credsFile = "s4credentials.properties";
		if (params.getValue("creds") != null) {
			credsFile = params.getValue("creds");
		}
		if (new File(credsFile).exists()) {
			try {
				props.load(new FileInputStream(credsFile));
			}
			catch(IOException ex) {
				printUsageAndTerminate("Error reading credentials file: " + ex.getMessage());
			}
		}
		else {
			InputStream inStr = Thread.currentThread().getContextClassLoader()
					.getResourceAsStream(credsFile);
			if (inStr != null) {
				try {
					props.load(inStr);
				}
				catch(IOException ioe) {
					printUsageAndTerminate("Error reading credentials file: " + ioe.getMessage());
				}
			}
		}
		return props;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy