
org.biojava.nbio.structure.io.mmcif.ZipChemCompProvider Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.io.mmcif;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.HashSet;
import java.util.Set;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.biojava.nbio.structure.io.mmcif.chem.PolymerType;
import org.biojava.nbio.structure.io.mmcif.chem.ResidueType;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** This chemical component provider retrieves and caches chemical component definition files from a
* zip archive specified in its construction. If the archive does not contain the record, an attempt is
* made to download it using DownloadChemCompProvider. The downloaded file is then added to the archive.
*
* The class is thread-safe and the same ZipChemCompProvider should be used by all threads to prevent
* simultaneous read or write to the zip archive. A zip archive will be created if missing.
*
* @author edlunde
* @author larsonm
* @since 12/05/12
* updated 3/5/2016 for Java 7 ZipFileSystem
*/
public class ZipChemCompProvider implements ChemCompProvider{
private static final Logger s_logger = LoggerFactory.getLogger(ZipChemCompProvider.class);
private final Path m_tempDir; // Base path where $m_zipRootDir/ will be downloaded to.
private final Path m_zipRootDir;
private final Path m_zipFile;
private final DownloadChemCompProvider m_dlProvider;
private boolean m_removeCif;
// Missing IDs from library that cannot be download added here to prevent delays.
private Set unavailable = new HashSet();
/**
* ZipChemCompProvider is a Chemical Component provider that stores chemical components
* in a zip archive. Missing chemical components are downloaded and appended to the
* archive. If non-existent a new zip archive will be created.
*
* @param chemicalComponentDictionaryFile : path to zip archive for chemical components.
* @param tempDir : path for temporary directory, (null) defaults to path in property "java.io.tmpdir".
* @throws IOException
*/
public ZipChemCompProvider(String chemicalComponentDictionaryFile, String tempDir) throws IOException {
this.m_zipFile = Paths.get(chemicalComponentDictionaryFile);
// Use a default temporary directory if not passed a value.
if (tempDir == null || tempDir.equals("")) {
this.m_tempDir = Paths.get(System.getProperty("java.io.tmpdir"));
} else {
this.m_tempDir = Paths.get(tempDir);
}
this.m_zipRootDir = Paths.get("chemcomp");
// Setup an instance of the download chemcomp provider.
this.m_dlProvider = new DownloadChemCompProvider(m_tempDir.toString());
this.m_removeCif = true;
initializeZip();
}
// See comments in addToZipFileSystem for why initialization is required with
// ZipFileSystems - due to URI issues in Java7.
private void initializeZip() throws IOException {
s_logger.info("Using chemical component dictionary: " + m_zipFile.toString());
final File f = m_zipFile.toFile();
if (!f.exists()) {
s_logger.info("Creating missing zip archive: " + m_zipFile.toString());
FileOutputStream fo = new FileOutputStream(f);
ZipOutputStream zip = new ZipOutputStream(new BufferedOutputStream(fo));
try {
zip.putNextEntry(new ZipEntry("chemcomp/"));
zip.closeEntry();
} finally {
zip.close();
}
}
}
/**
* Remove downloaded .cif.gz after adding to zip archive?
* Default is true.
* @param doRemove
*/
public void setRemoveCif(boolean doRemove) {
m_removeCif = doRemove;
}
/* (non-Javadoc)
* @see org.biojava.nbio.structure.io.mmcif.ChemCompProvider#getChemComp(java.lang.String)
*
* @param recordName : three letter PDB name for a residue
* @return ChemComp from .zip or ChemComp from repository. Will return empty ChemComp when unable to find a residue and will return null if not provided a valid recordName.
*/
@Override
public ChemComp getChemComp(String recordName) {
if (null == recordName) return null;
// handle non-existent ChemComp codes and do not repeatedly attempt to add these.
for (String str : unavailable) {
if (recordName.equals(str)) return getEmptyChemComp(recordName);
}
// Try to pull from zip, if fail then download.
ChemComp cc = getFromZip(recordName);
if (cc == null) {
s_logger.info("File "+recordName+" not found in archive. Attempting download from PDB.");
cc = downloadAndAdd(recordName);
}
// If a null record or an empty chemcomp, return a default ChemComp and blacklist.
if (cc == null || (null == cc.getName() && cc.getAtoms().size() == 0)) {
s_logger.info("Unable to find or download " + recordName + " - excluding from future searches.");
unavailable.add(recordName);
return getEmptyChemComp(recordName);
}
return cc;
}
/** Use DownloadChemCompProvider to grab a gzipped cif record from the PDB.
* Zip all downloaded cif.gz files into the dictionary.
*
* @param recordName is the three-letter chemical component code (i.e. residue name).
* @return ChemComp matching recordName
*/
private ChemComp downloadAndAdd(String recordName){
final ChemComp cc = m_dlProvider.getChemComp(recordName);
// final File [] files = finder(m_tempDir.resolve("chemcomp").toString(), "cif.gz");
final File [] files = new File[1];
Path cif = m_tempDir.resolve("chemcomp").resolve(recordName + ".cif.gz");
files[0] = cif.toFile();
if (files != null) {
addToZipFileSystem(m_zipFile, files, m_zipRootDir);
if (m_removeCif) for (File f : files) f.delete();
}
return cc;
}
/**
* Cleanup chemical component (.cif.gz) files downloaded to tmpdir.
* @param tempdir : path to temporary directory for chemical components
*/
public static void purgeTempFiles(String tempdir) {
if (tempdir == null) return;
s_logger.info("Removing: "+tempdir);
Path dlPath = Paths.get(tempdir).resolve("chemcomp");
File[] chemCompOutFiles = finder(dlPath.toString(), "cif.gz");
if (null != chemCompOutFiles) for (File f : chemCompOutFiles) f.delete();
dlPath.toFile().delete();
}
/**
* Return an empty ChemComp group for a three-letter resName.
* @param resName
* @return
*/
private ChemComp getEmptyChemComp(String resName){
String pdbName = ""; // Empty string is default
if (null != resName && resName.length() >= 3) {
pdbName = resName.substring(0,3);
}
final ChemComp comp = new ChemComp();
comp.setOne_letter_code("?");
comp.setThree_letter_code(pdbName);
comp.setPolymerType(PolymerType.unknown);
comp.setResidueType(ResidueType.atomn);
return comp;
}
/**
* Return File(s) in dirName that match suffix.
* @param dirName
* @param suffix
* @return
*/
static private File[] finder( String dirName, final String suffix){
if (null == dirName || null == suffix) {
return null;
}
final File dir = new File(dirName);
return dir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String filename)
{ return filename.endsWith(suffix); }
} );
}
/**
* This is synchronized, along with addToFileSystem to prevent simulatenous reading/writing.
* @param recordName to find in zipfile.
* @return ChemComp if found or null if missing.
*/
private synchronized ChemComp getFromZip(String recordName) {
ChemComp cc = null;
if (!m_zipFile.toFile().exists()) return cc;
final String filename = "chemcomp/" + recordName+".cif.gz";
// try with resources block to read from the filesystem.
try (FileSystem fs = FileSystems.newFileSystem(m_zipFile, null)) {
Path cif = fs.getPath(filename);
if (Files.exists(cif)) {
final InputStream zipStream = Files.newInputStream(cif);
final InputStream inputStream = new GZIPInputStream(zipStream);
s_logger.debug("reading " + recordName + " from " + m_zipFile);
final MMcifParser parser = new SimpleMMcifParser();
final ChemCompConsumer consumer = new ChemCompConsumer();
parser.addMMcifConsumer(consumer);
parser.parse(inputStream);
inputStream.close();
final ChemicalComponentDictionary dict = consumer.getDictionary();
cc = dict.getChemComp(recordName);
}
} catch (IOException e) {
s_logger.error("Unable to read from zip file : " + e.getMessage());
}
return cc;
}
/**
* Add an array of files to a zip archive.
* Synchronized to prevent simultaneous reading/writing.
*
* @param zipFile is a destination zip archive
* @param files is an array of files to be added
* @param pathWithinArchive is the path within the archive to add files to
* @return true if successfully appended these files.
*/
private synchronized boolean addToZipFileSystem(Path zipFile, File[] files, Path pathWithinArchive) {
boolean ret = false;
/* URIs in Java 7 cannot have spaces, must use Path instead
* and so, cannot use the properties map to describe need to create
* a new zip archive. ZipChemCompProvider.initilizeZip to creates the
* missing zip file */
/*
// convert the filename to a URI
String uriString = "jar:file:" + zipFile.toUri().getPath();
final URI uri = URI.create(uriString);
// if filesystem doesn't exist, create one.
final Map env = new HashMap<>();
// Create a new zip if one isn't present.
if (!zipFile.toFile().exists()) {
System.out.println("Need to create " + zipFile.toString());
}
env.put("create", String.valueOf(!zipFile.toFile().exists()));
// Specify the encoding as UTF -8
env.put("encoding", "UTF-8");
*/
// Copy in each file.
try (FileSystem zipfs = FileSystems.newFileSystem(zipFile, null)) {
Files.createDirectories(pathWithinArchive);
for (File f : files) {
if (!f.isDirectory() && f.exists()) {
Path externalFile = f.toPath();
Path pathInZipFile = zipfs.getPath(pathWithinArchive.resolve(f.getName()).toString());
Files.copy(externalFile, pathInZipFile,
StandardCopyOption.REPLACE_EXISTING);
}
}
ret = true;
} catch (IOException ex) {
s_logger.error("Unable to add entries to Chemical Component zip archive : " + ex.getMessage());
ret = false;
}
return ret;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy