All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.bioclipse.managers.PubChemManager Maven / Gradle / Ivy

There is a newer version: 1.0.3
Show newest version
/* Copyright (c) 2006-2020  Egon Willighagen 
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contact: http://www.bioclipse.net/
 */
package net.bioclipse.managers;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.PatternSyntaxException;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.eclipse.core.runtime.CoreException;
import org.openscience.cdk.io.formats.IChemFormat;

import io.github.egonw.bacting.IBactingManager;
import net.bioclipse.cdk.domain.ICDKMolecule;
import net.bioclipse.core.business.BioclipseException;
import net.bioclipse.core.domain.IMolecule;
import net.bioclipse.rdf.business.IRDFStore;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Nodes;
import nu.xom.ParsingException;
import nu.xom.ValidityException;

/**
 * Bioclipse manager that provides functionality to interact with the
 * PubChem database.
 */
public class PubChemManager implements IBactingManager {

    private final static String EUTILS_URL_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
    private final static String PUBCHEM_URL_BASE = "https://pubchem.ncbi.nlm.nih.gov/";
    private final static String PUBCHEMRDF_URL_BASE = "https://rdf.ncbi.nlm.nih.gov/pubchem/compound/";

    private final static String TOOL = "bioclipse.net";

    private String workspaceRoot;
	private CDKManager cdk;
	private RDFManager rdf;
	private UIManager ui;

	/**
     * Creates a new {@link PubChemManager}.
     *
     * @param workspaceRoot location of the workspace, e.g. "."
     */
    public PubChemManager(String workspaceRoot) {
		this.workspaceRoot = workspaceRoot;
		this.cdk = new CDKManager(this.workspaceRoot);
		this.rdf = new RDFManager(this.workspaceRoot);
		this.ui = new UIManager(this.workspaceRoot);
	}

    private String replaceSpaces(String molecule2) {
        StringBuffer result = new StringBuffer();
        for (int i=0; i search(String query)
            throws IOException, BioclipseException, CoreException {
        int max = 50;

        List results = new ArrayList();

        String db = "pccompound";
        query = replaceSpaces(query);

        String esearch = EUTILS_URL_BASE + "/esearch.fcgi?" +
            "db=" + db + "&retmax=" + max + "&usehistory=y&tool=" + TOOL + "&term=" + query;

        URL queryURL = new URL(esearch);
        URLConnection connection = queryURL.openConnection();

        Builder parser = new Builder();
        Document doc;
        try {
            doc = parser.build(connection.getInputStream());
            Nodes countNodes = doc.query("/eSearchResult/Count");
            if (countNodes.size() > 0) {
                // System.out.println(countNodes.get(0).toString());
            } else {
                return results;
            }

            Nodes cidNodes = doc.query("/eSearchResult/IdList/Id");

            for (int cidCount=0; cidCount doi() {
		return Collections.emptyList();
	}

    public IMolecule download(Integer cid)
        throws IOException, BioclipseException, CoreException {
    	String molstring = downloadAsString(cid);
        if ( molstring == null || molstring.isEmpty() ) {
            throw new BioclipseException( "Could not read molecule from" + cid );
        }
        IChemFormat format = cdk.getFormat( "PubChemCompoundXMLFormat" );
        ICDKMolecule molecule = cdk.loadMolecule(
        	new ByteArrayInputStream( molstring.getBytes() ), format
        );
        return molecule;
    }

    public String downloadAsString(Integer cid)
        throws IOException, BioclipseException, CoreException {
        return downloadAsString(cid, "DisplayXML");
    }

    private String downloadAsString(Integer cid, String type)
            throws IOException, BioclipseException, CoreException {
        String efetch = PUBCHEM_URL_BASE + "summary/summary.cgi?cid=" +
            cid + "&disopt=" + type;
        return downloadAsString(efetch, null);
    }

    private String downloadAsString(String URL, String accepts)
            throws IOException, BioclipseException, CoreException {
        HttpClient client = HttpClientBuilder.create()
            .useSystemProperties()
            .disableAutomaticRetries()
            .setDefaultRequestConfig(RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build())
            .build();
        String fileContent = "";
        try {
            HttpGet method = new HttpGet(URL);
			if (accepts != null) {
				method.addHeader("Accept", accepts);
				method.addHeader("Content-Type", accepts);
			}
			HttpResponse response = client.execute(method);
			StatusLine statusLine = response.getStatusLine();
			int statusCode = statusLine.getStatusCode();
			if (statusCode != 200) throw new BioclipseException(
					"Expected HTTP 200, but got a " + statusCode + ": " + statusLine.getReasonPhrase()
					);

			HttpEntity responseEntity = response.getEntity();
			ByteArrayOutputStream buffer = new ByteArrayOutputStream();
			responseEntity.writeTo(buffer);
			buffer.flush();
			fileContent = new String(buffer.toByteArray());
        } catch (PatternSyntaxException exception) {
            exception.printStackTrace();
            throw new BioclipseException("Invalid Pattern.", exception);
        } catch (MalformedURLException exception) {
            exception.printStackTrace();
            throw new BioclipseException("Invalid URL.", exception);
        }
        return fileContent;
    }

    public IRDFStore downloadRDF(Integer cid, IRDFStore store)
        throws IOException, BioclipseException, CoreException {
        String downloadURI = PUBCHEMRDF_URL_BASE + "CID" + cid;
        String rdfContent = downloadAsString(downloadURI, "application/rdf+xml");
        rdf.importFromString(store, rdfContent, "RDF/XML");
        return store;
    }

    public IMolecule download3d(Integer cid)
        throws IOException, BioclipseException, CoreException{
    	String molstring = download3dAsString(cid);

    	// convert the returned SD file into a MDL molfile by stripping the
    	// $$$$ and beyond
    	molstring = molstring.substring(0, molstring.indexOf("$$$$"));

    	ICDKMolecule molecule = cdk.fromString(molstring);
    	return molecule;
    }

    public String download3dAsString(Integer cid)
        throws IOException, BioclipseException, CoreException{
        return downloadAsString(cid, "3DDisplaySDF");
    }

    public List download(List cids)
    				throws IOException, BioclipseException, CoreException {
    	List results = new ArrayList();
    	for (Integer cid : cids) {
    		results.add(download(cid));
    	}
    	return results;
    }

    public List download3d(List cids)
    	throws IOException, BioclipseException, CoreException {
    	List results = new ArrayList();
    	for (Integer cid : cids) {
    		results.add(download3d(cid));
    	}
    	return results;
    }

    public String loadCompound(int cid, String target)
    throws IOException, BioclipseException, CoreException {
        return loadCompoundAny(cid, target, "DisplayXML");
    }

    public String loadCompound3d(int cid, String target)
        throws IOException, BioclipseException, CoreException {
        return loadCompoundAny(cid, target, "3DDisplaySDF");
    }

    private String loadCompoundAny(int cid, String target, String type)
            throws IOException, BioclipseException, CoreException {
        if (target == null) {
            throw new BioclipseException("Cannot save to a NULL file.");
        }
        String molString = downloadAsString(cid, type);
        if (ui.fileExists(target)) {
            ui.renewFile(target);
            ui.append(target, molString);
        } else {
            ui.newFile(target, molString);
        }
        return target;
    }

    public String loadCompoundRDF(int cid, String target)
        throws IOException, BioclipseException, CoreException {
        if (target == null) {
            throw new BioclipseException("Cannot save to a NULL file.");
        }

        String downloadURI = PUBCHEMRDF_URL_BASE + "CID" + cid;
        String rdfString = downloadAsString(downloadURI, "application/rdf+xml");

        if (ui.fileExists(target)) {
            ui.renewFile(target);
            ui.append(target, rdfString);
        } else {
            ui.newFile(target, rdfString);
        }
        return target;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy