![JAR search and dependency download from the Maven repository](/logo.png)
org.bitbucket.kienerj.cir.CIRService Maven / Gradle / Ivy
Show all versions of cir-java Show documentation
/*
* Copyright (C) 2013 Joos Kiener
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package org.bitbucket.kienerj.cir;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.bitbucket.kienerj.cir.responsehandler.DoubleResponseHandler;
import org.bitbucket.kienerj.cir.responsehandler.ListResponseHandler;
import org.slf4j.ext.XLogger;
import org.slf4j.ext.XLoggerFactory;
/**
* General low level Class for getting data from Chemical Identifier Resolver
* (CIR).
*
* Note that CIR is mainly a large database and hence certain conversions
* only work for data that is in the database. This includes the conversion from
* as example smiles to iupac name, eg. there is no chemical intelligence for
* getting iupac name.
*
* The Class has some convenience methods for the most common conversion
* calls. Other calls must use the getData() method and supply the desired
* ResponseHandler. Example:
*
*
*
* String identifier = "c1ccccc1";
* List chemspiderIds = cirService.getData(new ListResponseHandler(),
* identifier, CIRRepresentation.CHEMSPIDER_ID);
*
*
*
*
* All methods throw an HttpResponseException in case of http error code
* (4xx, 5xx) and hence if a request returns a 404 Not Found, an exception is
* thrown. It is hence up to the caller to deal with such issues.
*
* @author Joos Kiener
*/
public class CIRService {
private static final XLogger logger = XLoggerFactory.getXLogger("CIRService");
private static final int MAX_CONNECTIONS = 100;
private static final String CIR_URL = "http://cactus.nci.nih.gov/chemical/structure/";
private static final String PROXY_EMPTY_STRING_MESSAGE = "proxy can not be an empty String.";
private static final String SCHEME_EMPTY_STRING_MESSAGE = "scheme can not be an empty String.";
private static final String USER_EMPTY_STRING_MESSAGE = "username can not be an empty String.";
private final DefaultHttpClient httpClient;
public CIRService() {
PoolingClientConnectionManager cm = new PoolingClientConnectionManager();
cm.setMaxTotal(MAX_CONNECTIONS);
this.httpClient = new DefaultHttpClient(cm);
}
public CIRService(String proxy, int port, String scheme) {
Preconditions.checkNotNull(proxy);
Preconditions.checkNotNull(scheme);
Preconditions.checkArgument(!proxy.isEmpty(), PROXY_EMPTY_STRING_MESSAGE);
Preconditions.checkArgument(!scheme.isEmpty(), SCHEME_EMPTY_STRING_MESSAGE);
PoolingClientConnectionManager cm = new PoolingClientConnectionManager();
cm.setMaxTotal(MAX_CONNECTIONS);
this.httpClient = new DefaultHttpClient();
HttpHost proxyServer = new HttpHost(proxy, port, scheme);
httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxyServer);
}
public CIRService(String proxy, int port, String scheme, String username,
String password) {
Preconditions.checkNotNull(proxy);
Preconditions.checkNotNull(scheme);
Preconditions.checkNotNull(username);
Preconditions.checkNotNull(password);
Preconditions.checkArgument(!proxy.isEmpty(), PROXY_EMPTY_STRING_MESSAGE);
Preconditions.checkArgument(!scheme.isEmpty(), SCHEME_EMPTY_STRING_MESSAGE);
Preconditions.checkArgument(username.isEmpty(), USER_EMPTY_STRING_MESSAGE);
PoolingClientConnectionManager cm = new PoolingClientConnectionManager();
cm.setMaxTotal(MAX_CONNECTIONS);
this.httpClient = new DefaultHttpClient(cm);
//sets credentials for proxy
if (username != null && password != null) {
httpClient.getCredentialsProvider().setCredentials(
new AuthScope(proxy, port),
new UsernamePasswordCredentials(username, password));
}
if (proxy != null && scheme != null) {
HttpHost proxyServer = new HttpHost(proxy, port, scheme);
httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxyServer);
}
}
public boolean testConnection() {
try {
getIupacName("c1ccccc1");
return true;
} catch (CIRException ex) {
return false;
}
}
/**
* Gets the IUPAC Name for the given identifier.
Identifier can
* be a smiles, sdf or trivial name.
*
* @param identifier the identifier for which to get the data
* @return the IUPAC name of the given identifier
*/
public String getIupacName(String identifier) {
logger.entry(identifier);
String iuapcName = getData(identifier, CIRRepresentation.IUPAC_NAME);
logger.exit(iuapcName);
return iuapcName;
}
/**
* Gets the SMILES for the given identifier.
Identifier can be
* an smiles, name or registration number
*
* @param identifier the identifier for which to get the data
* @return the smiles of the given identifier
*/
public String getSmiles(String identifier) {
logger.entry(identifier);
String smiles = getData(identifier, CIRRepresentation.SMILES);
logger.exit(smiles);
return smiles;
}
/**
* Gets the standard InChi for the given identifier.
Identifier
* can be a smiles, a name or registration number
*
* @param identifier the identifier for which to get the data
* @return the standard InChi of the given identifier
*/
public String getStandardInchi(String identifier) {
logger.entry(identifier);
String inchi = getData(identifier, CIRRepresentation.STDINCHI);
logger.exit(inchi);
return inchi;
}
/**
* Gets the standard InChi for the given identifier.
Identifier
* can be a smiles, a name or registration number
*
* @param identifier the identifier for which to get the data
* @return the standard InChikey of the given identifier
*/
public String getStandardInchikey(String identifier) {
logger.entry(identifier);
String inchikey = getData(identifier, CIRRepresentation.STDINCHIKEY);
logger.exit(inchikey);
return inchikey;
}
/**
* Gets the standard InChi for the given identifier.
Identifier
* can be a smiles, a name or registration number
*
* @param identifier the identifier for which to get the data
* @return the sd-file of the given identifier
*/
public String getSdf(String identifier) {
logger.entry(identifier);
String sdf = getData(identifier, CIRRepresentation.SDF);
logger.exit(sdf);
return sdf;
}
/**
* Gets a List of casNumbers for the given identifier. The List contains
* trivial casNumbers, systematic casNumbers, registry numbers and original
* structure provider IDs.
*
*
* @param identifier the identifier for which to get the data
* @return the list of names for the given identifier
*/
public List getNames(String identifier) {
logger.entry(identifier);
List names = getData(new ListResponseHandler(), identifier,
CIRRepresentation.NAMES);
logger.exit(names);
return names;
}
/**
* Gets a list of casNumbers for the given identifier. The list contains
* all found cas numbers or is empty in case none are found.
*
* @param identifier the identifier for which to get the data
* @return the list of cas numbers for the given identifier
*/
public List getCasNumbers(String identifier) {
logger.entry(identifier);
List casNumbers = getData(new ListResponseHandler(), identifier,
CIRRepresentation.CAS);
logger.exit(casNumbers);
return casNumbers;
}
/**
* Gets the molecular weight for the given identifier.
* Identifier can be a smiles, a name or registration number
*
* @param identifier the identifier for which to get the data
* @return the molecular weight of the given identifier
*/
public double getMolecularWeigth(String identifier) {
logger.entry(identifier);
double mw = getData(new DoubleResponseHandler(), identifier, CIRRepresentation.MW);
logger.exit(mw);
return mw;
}
/**
* Get data from CIR as String.
*
* Use this method for any representations that return text/plain and the
* response is one single value like for smiles or sd-file.
*
* Other representations like casNumbers can return multiple values
* where each line in the String is one value or binary data for images. For
* these use method
* {@link #getData(ResponseHandler, String, CIRRepresentation) getData} with
* ListResponseHandler
.
*
* The
* identifier
is the current representation like smiles or sdf
* and
* representation
is the request output format.
*
* @param identifier the object to get the representation for
* @param representation how to represent the requested identifier
* @return the requested representation as one String
*/
public String getData(String identifier,
CIRRepresentation representation) {
return getData(new BasicResponseHandler(), identifier, representation, null);
}
/**
* Get data from CIR and returns it in a format as defined by
* responseHandler
.
*
* @param identifier the identifier to get the data for
* @param representation the the of data to get
* @return requested data as InputStream
*/
public T getData(ResponseHandler responseHandler, String identifier,
CIRRepresentation representation) {
return this.getData(responseHandler, identifier, representation, null);
}
/**
* Get data from CIR and returns it in a format as defined by
* responseHandler
.
*
* For
* CIRRepresentation.NAMES
use
* ListResponseHandler
which returns a List where each
* element is a name, for
* CIRRepresentation.IMAGE
use
* ImageResponseHandler
or
* OutputStreamResponseHandler
. If you want a different return
* type create your own implementation.
*
* The
* identifier
is the current representation like smiles or sdf
* and
* representation
is the request output format.
* queryString
will be used as uri query string and can contain
* additional options. See documentation of CIR. Example:
* http://cactus.nci.nih.gov/chemical/structure/aspirin/image?format=png&width=500&height=500"
* which gets an 500x500 image as png (default is gif).
*
* @param identifier the identifier to get the data for
* @param representation the the of data to get
* @return requested data as InputStream
*/
public T getData(ResponseHandler responseHandler, String identifier,
CIRRepresentation representation, String queryString) {
logger.entry(responseHandler, identifier, representation, queryString);
Preconditions.checkNotNull(responseHandler);
Preconditions.checkNotNull(identifier);
Preconditions.checkNotNull(representation);
try {
logger.debug("Building URI");
URI uri = buildUri(identifier, representation, queryString);
HttpGet httpGet = new HttpGet(uri);
logger.debug("Executing HTTP Request");
T response = httpClient.execute(httpGet, responseHandler);
logger.debug("Returning Response");
logger.exit(response);
return response;
} catch (URISyntaxException | IOException ex) {
throw new CIRException(ex);
}
}
private URI buildUri(String identifier, CIRRepresentation representation, String queryString)
throws URISyntaxException, MalformedURLException, UnsupportedEncodingException {
// see http://stackoverflow.com/questions/724043/http-url-address-encoding-in-java
// properly url encode any invalid chars in smiles
// Note URLEncoder encodes a query string and not a URL!!!
logger.entry(identifier, representation, queryString);
URL url;
if (queryString == null) {
url = new URL(CIR_URL + identifier + "/" + representation.toString().toLowerCase());
} else {
url = new URL(CIR_URL + identifier + "/" + representation.toString().toLowerCase() + "?" + queryString);
}
URI uri = new URI("http", url.getUserInfo(), url.getHost(),
url.getPort(), url.getPath(), url.getQuery(), url.getRef());
logger.exit(uri);
return uri;
}
}