All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bitbucket.kienerj.cir.CIRService Maven / Gradle / Ivy

The newest version!
/*
 *  Copyright (C) 2013 Joos Kiener 
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.

 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see .
 */
package org.bitbucket.kienerj.cir;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.bitbucket.kienerj.cir.responsehandler.DoubleResponseHandler;
import org.bitbucket.kienerj.cir.responsehandler.ListResponseHandler;
import org.slf4j.ext.XLogger;
import org.slf4j.ext.XLoggerFactory;

/**
 * 

General low level Class for getting data from Chemical Identifier Resolver * (CIR).

* *

Note that CIR is mainly a large database and hence certain conversions * only work for data that is in the database. This includes the conversion from * as example smiles to iupac name, eg. there is no chemical intelligence for * getting iupac name.

* *

The Class has some convenience methods for the most common conversion * calls. Other calls must use the getData() method and supply the desired * ResponseHandler. Example: *

 *
 * 
 * String identifier = "c1ccccc1";
 * List chemspiderIds = cirService.getData(new ListResponseHandler(),
 *      identifier, CIRRepresentation.CHEMSPIDER_ID);
 * 
 *
 * 

* *

All methods throw an HttpResponseException in case of http error code * (4xx, 5xx) and hence if a request returns a 404 Not Found, an exception is * thrown. It is hence up to the caller to deal with such issues.

* * @author Joos Kiener */ public class CIRService { private static final XLogger logger = XLoggerFactory.getXLogger("CIRService"); private static final int MAX_CONNECTIONS = 100; private static final String CIR_URL = "http://cactus.nci.nih.gov/chemical/structure/"; private static final String PROXY_EMPTY_STRING_MESSAGE = "proxy can not be an empty String."; private static final String SCHEME_EMPTY_STRING_MESSAGE = "scheme can not be an empty String."; private static final String USER_EMPTY_STRING_MESSAGE = "username can not be an empty String."; private final DefaultHttpClient httpClient; public CIRService() { PoolingClientConnectionManager cm = new PoolingClientConnectionManager(); cm.setMaxTotal(MAX_CONNECTIONS); this.httpClient = new DefaultHttpClient(cm); } public CIRService(String proxy, int port, String scheme) { Preconditions.checkNotNull(proxy); Preconditions.checkNotNull(scheme); Preconditions.checkArgument(!proxy.isEmpty(), PROXY_EMPTY_STRING_MESSAGE); Preconditions.checkArgument(!scheme.isEmpty(), SCHEME_EMPTY_STRING_MESSAGE); PoolingClientConnectionManager cm = new PoolingClientConnectionManager(); cm.setMaxTotal(MAX_CONNECTIONS); this.httpClient = new DefaultHttpClient(); HttpHost proxyServer = new HttpHost(proxy, port, scheme); httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxyServer); } public CIRService(String proxy, int port, String scheme, String username, String password) { Preconditions.checkNotNull(proxy); Preconditions.checkNotNull(scheme); Preconditions.checkNotNull(username); Preconditions.checkNotNull(password); Preconditions.checkArgument(!proxy.isEmpty(), PROXY_EMPTY_STRING_MESSAGE); Preconditions.checkArgument(!scheme.isEmpty(), SCHEME_EMPTY_STRING_MESSAGE); Preconditions.checkArgument(username.isEmpty(), USER_EMPTY_STRING_MESSAGE); PoolingClientConnectionManager cm = new PoolingClientConnectionManager(); cm.setMaxTotal(MAX_CONNECTIONS); this.httpClient = new DefaultHttpClient(cm); //sets credentials for proxy if (username != null && password != null) { httpClient.getCredentialsProvider().setCredentials( new AuthScope(proxy, port), new UsernamePasswordCredentials(username, password)); } if (proxy != null && scheme != null) { HttpHost proxyServer = new HttpHost(proxy, port, scheme); httpClient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY, proxyServer); } } public boolean testConnection() { try { getIupacName("c1ccccc1"); return true; } catch (CIRException ex) { return false; } } /** *

Gets the IUPAC Name for the given identifier.

Identifier can * be a smiles, sdf or trivial name.

* * @param identifier the identifier for which to get the data * @return the IUPAC name of the given identifier */ public String getIupacName(String identifier) { logger.entry(identifier); String iuapcName = getData(identifier, CIRRepresentation.IUPAC_NAME); logger.exit(iuapcName); return iuapcName; } /** *

Gets the SMILES for the given identifier.

Identifier can be * an smiles, name or registration number

* * @param identifier the identifier for which to get the data * @return the smiles of the given identifier */ public String getSmiles(String identifier) { logger.entry(identifier); String smiles = getData(identifier, CIRRepresentation.SMILES); logger.exit(smiles); return smiles; } /** *

Gets the standard InChi for the given identifier.

Identifier * can be a smiles, a name or registration number

* * @param identifier the identifier for which to get the data * @return the standard InChi of the given identifier */ public String getStandardInchi(String identifier) { logger.entry(identifier); String inchi = getData(identifier, CIRRepresentation.STDINCHI); logger.exit(inchi); return inchi; } /** *

Gets the standard InChi for the given identifier.

Identifier * can be a smiles, a name or registration number

* * @param identifier the identifier for which to get the data * @return the standard InChikey of the given identifier */ public String getStandardInchikey(String identifier) { logger.entry(identifier); String inchikey = getData(identifier, CIRRepresentation.STDINCHIKEY); logger.exit(inchikey); return inchikey; } /** *

Gets the standard InChi for the given identifier.

Identifier * can be a smiles, a name or registration number

* * @param identifier the identifier for which to get the data * @return the sd-file of the given identifier */ public String getSdf(String identifier) { logger.entry(identifier); String sdf = getData(identifier, CIRRepresentation.SDF); logger.exit(sdf); return sdf; } /** *

Gets a List of casNumbers for the given identifier. The List contains * trivial casNumbers, systematic casNumbers, registry numbers and original * structure provider IDs.

* * * @param identifier the identifier for which to get the data * @return the list of names for the given identifier */ public List getNames(String identifier) { logger.entry(identifier); List names = getData(new ListResponseHandler(), identifier, CIRRepresentation.NAMES); logger.exit(names); return names; } /** *

Gets a list of casNumbers for the given identifier. The list contains * all found cas numbers or is empty in case none are found.

* * @param identifier the identifier for which to get the data * @return the list of cas numbers for the given identifier */ public List getCasNumbers(String identifier) { logger.entry(identifier); List casNumbers = getData(new ListResponseHandler(), identifier, CIRRepresentation.CAS); logger.exit(casNumbers); return casNumbers; } /** *

Gets the molecular weight for the given identifier.

* Identifier can be a smiles, a name or registration number

* * @param identifier the identifier for which to get the data * @return the molecular weight of the given identifier */ public double getMolecularWeigth(String identifier) { logger.entry(identifier); double mw = getData(new DoubleResponseHandler(), identifier, CIRRepresentation.MW); logger.exit(mw); return mw; } /** *

Get data from CIR as String.

* *

Use this method for any representations that return text/plain and the * response is one single value like for smiles or sd-file.

* *

Other representations like casNumbers can return multiple values * where each line in the String is one value or binary data for images. For * these use method * {@link #getData(ResponseHandler, String, CIRRepresentation) getData} with * ListResponseHandler.

* *

The * identifier is the current representation like smiles or sdf * and * representation is the request output format.

* * @param identifier the object to get the representation for * @param representation how to represent the requested identifier * @return the requested representation as one String */ public String getData(String identifier, CIRRepresentation representation) { return getData(new BasicResponseHandler(), identifier, representation, null); } /** *

Get data from CIR and returns it in a format as defined by * responseHandler.

* * @param identifier the identifier to get the data for * @param representation the the of data to get * @return requested data as InputStream */ public T getData(ResponseHandler responseHandler, String identifier, CIRRepresentation representation) { return this.getData(responseHandler, identifier, representation, null); } /** *

Get data from CIR and returns it in a format as defined by * responseHandler.

* *

For * CIRRepresentation.NAMES use * ListResponseHandler which returns a List where each * element is a name, for * CIRRepresentation.IMAGE use * ImageResponseHandler or * OutputStreamResponseHandler. If you want a different return * type create your own implementation.

* *

The * identifier is the current representation like smiles or sdf * and * representation is the request output format. * queryString will be used as uri query string and can contain * additional options. See documentation of CIR. Example:
* http://cactus.nci.nih.gov/chemical/structure/aspirin/image?format=png&width=500&height=500"
* which gets an 500x500 image as png (default is gif).

* * @param identifier the identifier to get the data for * @param representation the the of data to get * @return requested data as InputStream */ public T getData(ResponseHandler responseHandler, String identifier, CIRRepresentation representation, String queryString) { logger.entry(responseHandler, identifier, representation, queryString); Preconditions.checkNotNull(responseHandler); Preconditions.checkNotNull(identifier); Preconditions.checkNotNull(representation); try { logger.debug("Building URI"); URI uri = buildUri(identifier, representation, queryString); HttpGet httpGet = new HttpGet(uri); logger.debug("Executing HTTP Request"); T response = httpClient.execute(httpGet, responseHandler); logger.debug("Returning Response"); logger.exit(response); return response; } catch (URISyntaxException | IOException ex) { throw new CIRException(ex); } } private URI buildUri(String identifier, CIRRepresentation representation, String queryString) throws URISyntaxException, MalformedURLException, UnsupportedEncodingException { // see http://stackoverflow.com/questions/724043/http-url-address-encoding-in-java // properly url encode any invalid chars in smiles // Note URLEncoder encodes a query string and not a URL!!! logger.entry(identifier, representation, queryString); URL url; if (queryString == null) { url = new URL(CIR_URL + identifier + "/" + representation.toString().toLowerCase()); } else { url = new URL(CIR_URL + identifier + "/" + representation.toString().toLowerCase() + "?" + queryString); } URI uri = new URI("http", url.getUserInfo(), url.getHost(), url.getPort(), url.getPath(), url.getQuery(), url.getRef()); logger.exit(uri); return uri; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy