
org.biojava.nbio.structure.io.mmcif.AllChemCompProvider Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.io.mmcif;
import org.biojava.nbio.structure.align.util.UserConfiguration;
import org.biojava.nbio.structure.io.LocalPDBDirectory;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.biojava.nbio.core.util.InputStreamProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.net.URL;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* A ChemComp provider that downloads and caches the components.cif file from the wwPDB site. It then loads
* all chemical components at startup and keeps them in memory. This provider is not used as a default
* since it is slower at startup and requires more memory than the {@link DownloadChemCompProvider} that is used by default.
*
* @author Andreas Prlic
*
*/
public class AllChemCompProvider implements ChemCompProvider, Runnable{
private static final Logger logger = LoggerFactory.getLogger(AllChemCompProvider.class);
public static final String COMPONENTS_FILE_LOCATION = "pub/pdb/data/monomers/components.cif.gz";
private static String path;
private static String serverName;
// there will be only one copy of the dictionary across all instances
// to reduce memory impact
static ChemicalComponentDictionary dict;
// flags to make sure there is only one thread running that is loading the dictionary
static AtomicBoolean loading = new AtomicBoolean(false);
static AtomicBoolean isInitialized = new AtomicBoolean(false);
public AllChemCompProvider(){
if ( loading.get()) {
logger.warn("other thread is already loading all chemcomps, no need to init twice");
return;
}
if ( isInitialized.get())
return;
loading.set(true);
Thread t = new Thread(this);
t.start();
}
/** make sure all paths are initialized correctly
*
*/
private static void initPath(){
if (path==null) {
UserConfiguration config = new UserConfiguration();
path = config.getCacheFilePath();
}
}
private static void initServerName() {
if (serverName==null) {
serverName = LocalPDBDirectory.getServerName();
}
}
private void ensureFileExists() {
String fileName = getLocalFileName();
File f = new File(fileName);
if ( ! f.exists()) {
try {
downloadFile();
} catch (IOException e) {
logger.error("Caught IOException",e);
}
}
}
/** Downloads the components.cif.gz file from the wwPDB site.
*
*/
public static void downloadFile() throws IOException {
initPath();
initServerName();
String localName = getLocalFileName();
String u = serverName + "/" + COMPONENTS_FILE_LOCATION;
downloadFileFromRemote(new URL(u), new File(localName));
}
private static void downloadFileFromRemote(URL remoteURL, File localFile) throws FileNotFoundException, IOException{
logger.info("Downloading " + remoteURL + " to: " + localFile);
FileOutputStream out = new FileOutputStream(localFile);
InputStream in = remoteURL.openStream();
byte[] buf = new byte[4 * 1024]; // 4K buffer
int bytesRead;
while ((bytesRead = in.read(buf)) != -1) {
out.write(buf, 0, bytesRead);
}
in.close();
out.close();
}
private static String getLocalFileName(){
File dir = new File(path, DownloadChemCompProvider.CHEM_COMP_CACHE_DIRECTORY);
if (! dir.exists()){
logger.info("Creating directory {}", dir.toString());
dir.mkdir();
}
String fileName = new File(dir, "components.cif.gz").toString();
return fileName;
}
/** Load all {@link ChemComp} definitions into memory.
*
*/
private void loadAllChemComps() throws IOException {
String fileName = getLocalFileName();
logger.debug("Loading " + fileName);
InputStreamProvider isp = new InputStreamProvider();
InputStream inStream = isp.getInputStream(fileName);
MMcifParser parser = new SimpleMMcifParser();
ChemCompConsumer consumer = new ChemCompConsumer();
// The Consumer builds up the BioJava - structure object.
// you could also hook in your own and build up you own data model.
parser.addMMcifConsumer(consumer);
parser.parse(new BufferedReader(new InputStreamReader(inStream)));
dict = consumer.getDictionary();
inStream.close();
}
/** {@inheritDoc}
*
*/
@Override
public ChemComp getChemComp(String recordName) {
while ( loading.get()) {
// another thread is still initializing the definitions
try {
// wait half a second
Thread.sleep(500);
} catch (InterruptedException e) {
logger.error("Interrepted thread while waiting: "+e.getMessage());
//e.printStackTrace();
}
}
return dict.getChemComp(recordName);
}
/** Do the actual loading of the dictionary in a thread.
*
*/
@Override
public void run() {
long timeS = System.currentTimeMillis();
initPath();
ensureFileExists();
try {
loadAllChemComps();
long timeE = System.currentTimeMillis();
logger.debug("Time to init chem comp dictionary: " + (timeE - timeS) / 1000 + " sec.");
} catch (IOException e) {
logger.error("Could not load chemical components definition file {}. Error: {}", getLocalFileName(), e.getMessage());
} finally {
loading.set(false);
isInitialized.set(true);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy