All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.io.mmcif.AllChemCompProvider Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.io.mmcif;

import org.biojava.nbio.structure.align.util.UserConfiguration;
import org.biojava.nbio.structure.io.LocalPDBDirectory;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.biojava.nbio.core.util.InputStreamProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.net.URL;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 * A ChemComp provider that downloads and caches the components.cif file from the wwPDB site. It then loads
 * all chemical components at startup and keeps them in memory. This provider is not used as a default
 * since it is slower at startup and requires more memory than the {@link DownloadChemCompProvider} that is used by default.
 *
 * @author Andreas Prlic
 *
 */
public class AllChemCompProvider implements ChemCompProvider, Runnable{

	private static final Logger logger = LoggerFactory.getLogger(AllChemCompProvider.class);

	public static final String COMPONENTS_FILE_LOCATION = "pub/pdb/data/monomers/components.cif.gz";


	private static String path;

	private static String serverName;


	// there will be only one copy of the dictionary across all instances
	// to reduce memory impact
	static ChemicalComponentDictionary dict;

	// flags to make sure there is only one thread running that is loading the dictionary
	static AtomicBoolean loading       = new AtomicBoolean(false);
	static AtomicBoolean isInitialized = new AtomicBoolean(false);

	public AllChemCompProvider(){

		if ( loading.get()) {
			logger.warn("other thread is already loading all chemcomps, no need to init twice");
			return;
		}
		if ( isInitialized.get())
			return;

		loading.set(true);

		Thread t = new Thread(this);
		t.start();

	}


	/** make sure all paths are initialized correctly
	 *
	 */
	private static void initPath(){

		if (path==null) {
			UserConfiguration config = new UserConfiguration();
			path = config.getCacheFilePath();
		}
	}

	private static void initServerName() {

		if (serverName==null) {
			serverName = LocalPDBDirectory.getServerName();
		}
	}

	private void ensureFileExists() {


		String fileName = getLocalFileName();
		File f = new File(fileName);

		if ( ! f.exists()) {
			try {
			downloadFile();
			} catch (IOException e) {
				logger.error("Caught IOException",e);
			}
		}



	}

	/** Downloads the components.cif.gz file from the wwPDB site.
	 *
	 */
	public static void downloadFile() throws IOException {

		initPath();

		initServerName();

		String localName = getLocalFileName();

		String u = serverName + "/" + COMPONENTS_FILE_LOCATION;

		downloadFileFromRemote(new URL(u), new File(localName));


	}


	private static  void downloadFileFromRemote(URL remoteURL, File localFile) throws FileNotFoundException, IOException{
		logger.info("Downloading " + remoteURL + " to: " + localFile);
		FileOutputStream out = new FileOutputStream(localFile);

		InputStream in = remoteURL.openStream();
		byte[] buf = new byte[4 * 1024]; // 4K buffer
		int bytesRead;
		while ((bytesRead = in.read(buf)) != -1) {
			out.write(buf, 0, bytesRead);
		}
		in.close();
		out.close();


	}


	private static String getLocalFileName(){

		File dir = new File(path, DownloadChemCompProvider.CHEM_COMP_CACHE_DIRECTORY);

		if (! dir.exists()){
			logger.info("Creating directory {}", dir.toString());
			dir.mkdir();
		}

		String fileName = new File(dir, "components.cif.gz").toString();

		return fileName;
	}

	/** Load all {@link ChemComp} definitions into memory.
	 *
	 */
	private void loadAllChemComps() throws IOException {
		String fileName = getLocalFileName();
		logger.debug("Loading " + fileName);
		InputStreamProvider isp = new InputStreamProvider();


		InputStream inStream = isp.getInputStream(fileName);

		MMcifParser parser = new SimpleMMcifParser();

		ChemCompConsumer consumer = new ChemCompConsumer();

		// The Consumer builds up the BioJava - structure object.
		// you could also hook in your own and build up you own data model.
		parser.addMMcifConsumer(consumer);

		parser.parse(new BufferedReader(new InputStreamReader(inStream)));

		dict = consumer.getDictionary();

		inStream.close();

	}


	/** {@inheritDoc}
	 *
	 */
	@Override
	public ChemComp getChemComp(String recordName) {

		while ( loading.get()) {

			// another thread is still initializing the definitions
			try {
				// wait half a second

				Thread.sleep(500);
			} catch (InterruptedException e) {
				logger.error("Interrepted thread while waiting: "+e.getMessage());
				//e.printStackTrace();
			}
		}



		return dict.getChemComp(recordName);
	}


	/** Do the actual loading of the dictionary in a thread.
	 *
	 */
	@Override
	public void run() {
		long timeS = System.currentTimeMillis();

		initPath();

		ensureFileExists();

		try {
			loadAllChemComps();

			long timeE = System.currentTimeMillis();
			logger.debug("Time to init chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");


		} catch (IOException e) {
			logger.error("Could not load chemical components definition file {}. Error: {}", getLocalFileName(), e.getMessage());

		} finally {
			loading.set(false);
			isInitialized.set(true);
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy