All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.io.mmcif.DownloadChemCompProvider Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.io.mmcif;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.GZIPOutputStream;

import org.biojava.nbio.core.util.InputStreamProvider;
import org.biojava.nbio.structure.align.util.HTTPConnectionTools;
import org.biojava.nbio.structure.align.util.UserConfiguration;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;



/** This provider of chemical components can download and cache chemical component definition files from the RCSB PDB web site.
 *  It is the default way to access these definitions.
 *  If this provider is called he first time, it will download and install all chemical
 *  component definitions in a local directory.
 *  Once the definition files have been installed, it has quick startup time and low memory requirements.
 *
 *  An alternative provider, that keeps all definitions in memory is the {@link AllChemCompProvider}. Another provider, that
 *  does not require any network access, but only can support a limited set of chemical component definitions, is the {@link ReducedChemCompProvider}.
 *
 *
 * @author Andreas Prlic
 *
 */
public class DownloadChemCompProvider implements ChemCompProvider {

	private static final Logger logger = LoggerFactory.getLogger(DownloadChemCompProvider.class);

	public static final String CHEM_COMP_CACHE_DIRECTORY = "chemcomp";

	public static final String SERVER_LOCATION = "http://www.rcsb.org/pdb/files/ligand/";


	private static File path;
	//private static final String FILE_SEPARATOR = System.getProperty("file.separator");
	private static final String NEWLINE = System.getProperty("line.separator");


	// flags to make sure there is only one thread running that is loading the dictionary
	static AtomicBoolean loading = new AtomicBoolean(false);

	static final List protectedIDs = new ArrayList ();
	static {
		protectedIDs.add("CON");
		protectedIDs.add("PRN");
		protectedIDs.add("AUX");
		protectedIDs.add("NUL");
	}

	/** by default we will download only some of the files. User has to request that all files should be downloaded...
	 *
	 */
	boolean downloadAll = false;

	public DownloadChemCompProvider(){
		logger.debug("Initialising DownloadChemCompProvider");

		// note that path is static, so this is just to make sure that all non-static methods will have path initialised
		initPath();
	}

	public DownloadChemCompProvider(String cacheFilePath){
		logger.debug("Initialising DownloadChemCompProvider");

		// note that path is static, so this is just to make sure that all non-static methods will have path initialised
		path = new File(cacheFilePath);
	}

	private static void initPath(){

		if (path==null) {
			UserConfiguration config = new UserConfiguration();
			path = new File(config.getCacheFilePath());
		}
	}

	/**
	 * Checks if the chemical components already have been installed into the PDB directory.
	 * If not, will download the chemical components definitions file and split it up into small
	 * subfiles.
	 */
	public void checkDoFirstInstall(){

		if ( ! downloadAll ) {
			return;
		}


		// this makes sure there is a file separator between every component,
		// if path has a trailing file separator or not, it will work for both cases
		File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY);
		File f = new File(dir, "components.cif.gz");

		if ( ! f.exists()) {

			downloadAllDefinitions();

		} else {
			// file exists.. did it get extracted?

			FilenameFilter filter =new FilenameFilter() {

				@Override
				public boolean accept(File dir, String file) {
					return file.endsWith(".cif.gz");
				}
			};
			String[] files = dir.list(filter);
			if ( files.length < 500) {
				// not all did get unpacked
				try {
					split();
				} catch (IOException e) {
					logger.error("Could not split file {} into individual chemical component files. Error: {}",
							f.toString(), e.getMessage());
				}
			}
		}
	}

	private void split() throws IOException {

		logger.info("Installing individual chem comp files ...");

		File dir = new File(path, CHEM_COMP_CACHE_DIRECTORY);
		File f = new File(dir, "components.cif.gz");


		int counter = 0;
		InputStreamProvider prov = new InputStreamProvider();

		try( BufferedReader buf = new BufferedReader (new InputStreamReader (prov.getInputStream(f)));
				) {
			String line = null;
			line = buf.readLine ();
			StringWriter writer = new StringWriter();

			String currentID = null;
			while (line != null){

				if ( line.startsWith("data_")) {
					// a new record found!

					if ( currentID != null) {
						writeID(writer.toString(), currentID);
						counter++;
					}

					currentID = line.substring(5);
					writer = new StringWriter();
				}

				writer.append(line);
				writer.append(NEWLINE);

				line = buf.readLine ();
			}

			// write the last record...
			writeID(writer.toString(),currentID);
			counter++;

		}

		logger.info("Created " + counter + " chemical component files.");
	}

	/**
	 * Output chemical contents to a file
	 * @param contents File contents
	 * @param currentID Chemical ID, used to determine the filename
	 * @throws IOException
	 */
	private void writeID(String contents, String currentID) throws IOException{

		String localName = DownloadChemCompProvider.getLocalFileName(currentID);

		try ( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(localName))) ) {

			pw.print(contents.toString());
			pw.flush();
		}
	}

	/**
	 * Loads the definitions for this {@link ChemComp} from a local file and instantiates a new object.
	 *
	 * @param recordName the ID of the {@link ChemComp}
	 * @return a new {@link ChemComp} definition.
	 */
	@Override
	public  ChemComp getChemComp(String recordName) {

		// make sure we work with upper case records
		recordName = recordName.toUpperCase().trim();

		boolean haveFile = true;
		if ( recordName.equals("?")){
			return null;
		}

		if ( ! fileExists(recordName)) {
			// check if we should install all components
			checkDoFirstInstall();
		}
		if ( ! fileExists(recordName)) {
			// we previously have installed already the definitions,
			// just do an incrememntal update
			haveFile = downloadChemCompRecord(recordName);
		}

		// Added check that download was successful and chemical component is available.
		if (haveFile) {
			String filename = getLocalFileName(recordName);
			InputStream inStream = null;
			try {

				InputStreamProvider isp = new InputStreamProvider();

				inStream = isp.getInputStream(filename);

				MMcifParser parser = new SimpleMMcifParser();

				ChemCompConsumer consumer = new ChemCompConsumer();

				// The Consumer builds up the BioJava - structure object.
				// you could also hook in your own and build up you own data model.
				parser.addMMcifConsumer(consumer);

				parser.parse(new BufferedReader(new InputStreamReader(inStream)));

				ChemicalComponentDictionary dict = consumer.getDictionary();

				ChemComp chemComp = dict.getChemComp(recordName);

				return chemComp;

			} catch (IOException e) {

				logger.error("Could not parse chemical component file {}. Error: {}. "
						+ "There will be no chemical component info available for {}", filename, e.getMessage(), recordName);

			}
			finally{
				// Now close it
				if(inStream!=null){
					try {
						inStream.close();
					} catch (IOException e) {
						// This would be weird...
						logger.error("Could not close chemical component file {}. A resource leak could occur!!", filename);
					}
				}

			}
		}

		// see https://github.com/biojava/biojava/issues/315
		// probably a network error happened. Try to use the ReducedChemCOmpProvider
		ReducedChemCompProvider reduced = new ReducedChemCompProvider();

		return reduced.getChemComp(recordName);

	}

	/** Returns the file name that contains the definition for this {@link ChemComp}
	 *
	 * @param recordName the ID of the {@link ChemComp}
	 * @return full path to the file
	 */
	public static String getLocalFileName(String recordName){

		if ( protectedIDs.contains(recordName)){
			recordName = "_" + recordName;
		}

		initPath();

		File f = new File(path, CHEM_COMP_CACHE_DIRECTORY);
		if (! f.exists()){
			logger.info("Creating directory " + f);

			boolean success = f.mkdir();
			// we've checked in initPath that path is writable, so there's no need to check if it succeeds
			// in the unlikely case that in the meantime it isn't writable at least we log an error
			if (!success) logger.error("Directory {} could not be created",f);

		}

		File theFile = new File(f,recordName + ".cif.gz");

		return theFile.toString();
	}

	private static  boolean fileExists(String recordName){

		String fileName = getLocalFileName(recordName);

		File f = new File(fileName);

		return f.exists();

	}

	/**
	 * @param recordName : three-letter name
	 * @return true if successful download
	 */
	private static boolean downloadChemCompRecord(String recordName) {

		String localName = getLocalFileName(recordName);
		File newFile;
		try{
			newFile = File.createTempFile("chemcomp"+recordName, "cif");
		}
		catch(IOException e){
			logger.error("Could not write to temp directory {} to create the chemical component download temp file", System.getProperty("java.io.tmpdir"));
			return false;
		}
		String u = SERVER_LOCATION + recordName + ".cif";

		logger.debug("downloading " + u);

		URL url = null;


		try {
			url = new URL(u);

			HttpURLConnection uconn = HTTPConnectionTools.openHttpURLConnection(url);

			try( PrintWriter pw = new PrintWriter(new GZIPOutputStream(new FileOutputStream(newFile)));
					BufferedReader fileBuffer = new BufferedReader(new InputStreamReader(uconn.getInputStream()));
					) {

				String line;

				while ((line = fileBuffer.readLine()) != null) {
					pw.println(line);
				}

				pw.flush();
				// Now we move this across to where it actually wants to be
				boolean couldRename = newFile.renameTo(new File(localName));

				if (!couldRename) {

					throw new IOException("Could not rename temp file "+newFile.toString()+" to file " + localName);
				}

				return true;
			}
		}  catch (IOException e){
			logger.error("Could not download "+url.toString()+" OR store locally to "+localName+" Error ="+e.getMessage());
			newFile.delete();
		}
		return false;
	}

	private void downloadAllDefinitions() {

		if ( loading.get()){
			logger.info("Waiting for other thread to install chemical components...");
		}

		while ( loading.get() ) {

			// another thread is already downloading the components definitions
			// wait for the other thread to finish...

			try {
				// wait half a second

				Thread.sleep(500);
			} catch (InterruptedException e) {
				//e.printStackTrace();
				logger.error("Thread interrupted "+e.getMessage());
			}

			logger.info("Another thread installed the chemical components.");
			return;

		}

		loading.set(true);
		long timeS = System.currentTimeMillis();

		logger.info("Performing first installation of chemical components.");
		logger.info("Downloading components.cif.gz ...");


		try {
			AllChemCompProvider.downloadFile();
		} catch (IOException e){
			logger.error("Could not download the all chemical components file. Error: {}. "
					+ "Chemical components information won't be available", e.getMessage());
			// no point in trying to split if the file could not be downloaded
			loading.set(false);
			return;
		}
		try {
			split();
		} catch (IOException e) {
			logger.error("Could not split all chem comp file into individual chemical component files. Error: {}",
				 e.getMessage());
			// no point in reporting time
			loading.set(false);
			return;
		}
		long timeE = System.currentTimeMillis();
		logger.info("time to install chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");
		loading.set(false);

	}

	/** By default this provider will download only some of the {@link ChemComp} files.
	 * The user has to request that all files should be downloaded by setting this parameter to true.
	 *
	 *  @return flag if the all components should be downloaded and installed at startup. (default: false)
	 */
	public boolean isDownloadAll() {
		return downloadAll;
	}

	/** By default this provider will download only some of the {@link ChemComp} files.
	 * The user has to request that all files should be downloaded by setting this parameter to true.
	 *
	 * @param  flag if the all components should be downloaded and installed at startup. (default: false)
	 */
	public void setDownloadAll(boolean downloadAll) {
		this.downloadAll = downloadAll;
	}





}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy