All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.chem.ZipChemCompProvider Maven / Gradle / Ivy

The newest version!
package org.biojava.nbio.structure.chem;

import org.biojava.nbio.structure.io.cif.ChemCompConverter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.HashSet;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

/**
 * This chemical component provider retrieves and caches chemical component definition files from a
 * zip archive specified in its construction.  If the archive does not contain the record, an attempt is
 * made to download it using DownloadChemCompProvider. The downloaded file is then added to the archive.
 *
 * The class is thread-safe and the same ZipChemCompProvider should be used by all threads to prevent
 * simultaneous read or write to the zip archive.  A zip archive will be created if missing.
 *
 * @author edlunde
 * @author larsonm
 * @since 12/05/12
 * updated 3/5/2016 for Java 7 ZipFileSystem
 */
public class ZipChemCompProvider implements ChemCompProvider{
    private static final Logger s_logger = LoggerFactory.getLogger(ZipChemCompProvider.class);

    private final Path m_tempDir;  // Base path where $m_zipRootDir/ will be downloaded to.
    private final Path m_zipRootDir;
    private final Path m_zipFile;
    private final DownloadChemCompProvider m_dlProvider;

    private boolean m_removeCif;

    // Missing IDs from library that cannot be download added here to prevent delays.
    private Set unavailable = new HashSet<>();

    /**
     * ZipChemCompProvider is a Chemical Component provider that stores chemical components
     * in a zip archive.  Missing chemical components are downloaded and appended to the
     * archive.  If non-existent a new zip archive will be created.
     *
     * @param chemicalComponentDictionaryFile : path to zip archive for chemical components.
     * @param tempDir : path for temporary directory, (null) defaults to path in property "java.io.tmpdir".
     * @throws IOException
     */
    public ZipChemCompProvider(String chemicalComponentDictionaryFile, String tempDir) throws IOException {
        this.m_zipFile = Paths.get(chemicalComponentDictionaryFile);

        // Use a default temporary directory if not passed a value.
        if (tempDir == null || "".equals(tempDir)) {
            this.m_tempDir = Paths.get(System.getProperty("java.io.tmpdir"));
        } else {
            this.m_tempDir = Paths.get(tempDir);
        }

        this.m_zipRootDir = Paths.get("chemcomp");

        // Setup an instance of the download chemcomp provider.
        this.m_dlProvider = new DownloadChemCompProvider(m_tempDir.toString());
        this.m_removeCif = true;
        initializeZip();
    }

    // See comments in addToZipFileSystem for why initialization is required with
    // ZipFileSystems - due to URI issues in Java7.
    private void initializeZip() throws IOException {
        s_logger.info("Using chemical component dictionary: {}", m_zipFile.toString());
        final File f = m_zipFile.toFile();
        if (!f.exists()) {
            s_logger.info("Creating missing zip archive: {}", m_zipFile.toString());
            FileOutputStream fo = new FileOutputStream(f);
            try (ZipOutputStream zip = new ZipOutputStream(new BufferedOutputStream(fo))) {
                zip.putNextEntry(new ZipEntry("chemcomp/"));
                zip.closeEntry();
            }
        }
    }

    /**
     * Remove downloaded .cif.gz after adding to zip archive?
     * Default is true.
     * @param doRemove
     */
    public void setRemoveCif(boolean doRemove) {
        m_removeCif = doRemove;
    }

    /**
     * (non-Javadoc)
     * @see ChemCompProvider#getChemComp(java.lang.String)
     *
     * @param recordName : three letter PDB name for a residue
     * @return ChemComp from .zip or ChemComp from repository.  Will return empty ChemComp when unable to find a residue and will return null if not provided a valid recordName.
     */
    @Override
    public ChemComp getChemComp(String recordName) {
        if (null == recordName) return null;

        // handle non-existent ChemComp codes and do not repeatedly attempt to add these.
        for (String str : unavailable) {
            if (recordName.equals(str)) return getEmptyChemComp(recordName);
        }

        // Try to pull from zip, if fail then download.
        ChemComp cc = getFromZip(recordName);
        if (cc == null) {
            s_logger.info("File {} not found in archive. Attempting download from PDB.", recordName);
            cc = downloadAndAdd(recordName);
        }

        // If a null record or an empty chemcomp, return a default ChemComp and blacklist.
        if (cc == null || (null == cc.getName() && cc.getAtoms().size() == 0)) {
            s_logger.info("Unable to find or download {} - excluding from future searches.", recordName);
            unavailable.add(recordName);
            return getEmptyChemComp(recordName);
        }
        return cc;
    }

    /** Use DownloadChemCompProvider to grab a gzipped cif record from the PDB.
     *  Zip all downloaded cif.gz files into the dictionary.
     *
     * @param recordName is the three-letter chemical component code (i.e. residue name).
     * @return ChemComp matching recordName
     */
    private ChemComp downloadAndAdd(String recordName){
        final ChemComp cc = m_dlProvider.getChemComp(recordName);

        // final File [] files = finder(m_tempDir.resolve("chemcomp").toString(), "cif.gz");
        final File [] files = new File[1];
        Path cif = m_tempDir.resolve("chemcomp").resolve(recordName + ".cif.gz");
        files[0] = cif.toFile();
        if (files[0] != null) {
            addToZipFileSystem(m_zipFile, files, m_zipRootDir);
            if (m_removeCif) for (File f : files) f.delete();
        }
        return cc;
    }

    /**
     * Cleanup chemical component (.cif.gz) files downloaded to tmpdir.
     * @param tempdir : path to temporary directory for chemical components
     */
    public static void purgeTempFiles(String tempdir) {
        if (tempdir == null) return;

        s_logger.info("Removing: "+tempdir);
        Path dlPath = Paths.get(tempdir).resolve("chemcomp");
        File[] chemCompOutFiles = finder(dlPath.toString(), "cif.gz");
        if (null != chemCompOutFiles) for (File f : chemCompOutFiles) f.delete();
        dlPath.toFile().delete();
    }

    /**
     * Return an empty ChemComp group for a three-letter resName.
     * @param resName
     * @return
     */
    private ChemComp getEmptyChemComp(String resName){
        String pdbName = ""; // Empty string is default
        if (null != resName && resName.length() >= 3) {
            pdbName = resName.substring(0,3);
        }
        final ChemComp comp = new ChemComp();
        comp.setOneLetterCode("?");
        comp.setThreeLetterCode(pdbName);
        comp.setPolymerType(PolymerType.unknown);
        comp.setResidueType(ResidueType.atomn);
        return comp;
    }

    /**
     * Return File(s) in dirName that match suffix.
     * @param dirName
     * @param suffix
     * @return
     */
    static private File[] finder(String dirName, final String suffix) {
        if (null == dirName || null == suffix) {
            return null;
        }

        final File dir = new File(dirName);
        return dir.listFiles((dir1, filename) -> filename.endsWith(suffix));
    }

    /**
     * This is synchronized, along with addToFileSystem to prevent simulatenous reading/writing.
     * @param recordName to find in zipfile.
     * @return ChemComp if found or null if missing.
     */
    private synchronized ChemComp getFromZip(String recordName) {
        ChemComp cc = null;
        if (!m_zipFile.toFile().exists()) return cc;
        final String filename = "chemcomp/" + recordName + ".cif.gz";

        // try with resources block to read from the filesystem.
        // Don't remove the (ClassLoader) cast! It is required for openjdk 11.
        try (FileSystem fs = FileSystems.newFileSystem(m_zipFile, (ClassLoader)null)) {
            Path cif = fs.getPath(filename);

            if (Files.exists(cif)) {
                s_logger.debug("reading {} from {}", recordName, m_zipFile);
                final ChemicalComponentDictionary dict = ChemCompConverter.fromPath(cif);
                cc = dict.getChemComp(recordName);
            }
        } catch (IOException e) {
            s_logger.error("Unable to read from zip file : {}", e.getMessage());
        }

        return cc;
    }

    /**
     * Add an array of files to a zip archive.
     * Synchronized to prevent simultaneous reading/writing.
     *
     * @param zipFile is a destination zip archive
     * @param files is an array of files to be added
     * @param pathWithinArchive is the path within the archive to add files to
     * @return true if successfully appended these files.
     */
    private synchronized boolean addToZipFileSystem(Path zipFile, File[] files, Path pathWithinArchive) {
        boolean ret = false;

        /* URIs in Java 7 cannot have spaces, must use Path instead
         * and so, cannot use the properties map to describe need to create
         * a new zip archive.  ZipChemCompProvider.initilizeZip to creates the
         * missing zip file */

		/*
		// convert the filename to a URI
		String uriString = "jar:file:" + zipFile.toUri().getPath();
		final URI uri = URI.create(uriString);

		// if filesystem doesn't exist, create one.
		final Map env = new HashMap<>();
		// Create a new zip if one isn't present.
		if (!zipFile.toFile().exists()) {
			System.out.println("Need to create " + zipFile.toString());
		}
		env.put("create", String.valueOf(!zipFile.toFile().exists()));
		// Specify the encoding as UTF -8
		env.put("encoding", "UTF-8");
		*/

        // Copy in each file.
        // Don't remove the (ClassLoader) cast! It is required for openjdk 11.
        try (FileSystem zipfs = FileSystems.newFileSystem(zipFile, (ClassLoader)null)) {
            Files.createDirectories(pathWithinArchive);
            for (File f : files) {
                if (!f.isDirectory() && f.exists()) {
                    Path externalFile = f.toPath();
                    Path pathInZipFile = zipfs.getPath(pathWithinArchive.resolve(f.getName()).toString());
                    Files.copy(externalFile, pathInZipFile,
                            StandardCopyOption.REPLACE_EXISTING);
                }
            }
            ret = true;
        } catch (IOException ex) {
            s_logger.error("Unable to add entries to Chemical Component zip archive : {}", ex.getMessage());
            ret = false;
        }
        return ret;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy