All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.StructureIO Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure;

import java.io.IOException;
import java.util.Collections;
import java.util.List;

import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.io.MMCIFFileReader;
import org.biojava.nbio.structure.io.PDBFileReader;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.quaternary.io.BioUnitDataProvider;
import org.biojava.nbio.structure.quaternary.io.BioUnitDataProviderFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A class that provides static access methods for easy lookup of protein structure related components
 *
 * @author Andreas Prlic
 *
 * @since 3.0.5
 */
public class StructureIO {

	private static final Logger logger = LoggerFactory.getLogger(StructureIO.class);

	private static AtomCache cache ;


	/** Loads a structure based on a name. Supported naming conventions are:
	 *
	 *  * 
		Formal specification for how to specify the name:

		name     := pdbID
		               | pdbID '.' chainID
		               | pdbID '.' range
		               | scopID
		               | biol
		               | pdp
		range         := '('? range (',' range)? ')'?
		               | chainID
		               | chainID '_' resNum '-' resNum
		pdbID         := [0-9][a-zA-Z0-9]{3}
		chainID       := [a-zA-Z0-9]
		scopID        := 'd' pdbID [a-z_][0-9_]
		biol		  := 'BIOL:' pdbID [:]? [0-9]+
		pdp			  := 'PDP:' pdbID[A-Za-z0-9_]+
		resNum        := [-+]?[0-9]+[A-Za-z]?


		Example structures:
		1TIM     	#whole structure - asym unit
		4HHB.C     	#single chain
		4GCR.A_1-83 #one domain, by residue number
		3AA0.A,B    #two chains treated as one structure
		d2bq6a1     #scop domain
		BIOL:1fah   #biological assembly nr 1 for 1fah
		BIOL:1fah:0 #asym unit for 1fah
		BIOL:1fah:1 #biological assembly nr 1 for 1fah
		BIOL:1fah:2 #biological assembly nr 2 for 1fah

		
* * With the additional set of rules: * *
    *
  • If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR). *
  • Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A
  • *
  • To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, see {@link #setStrictSCOP(boolean)}
  • *
  • URLs are accepted as well
  • *
* * @param name * @return a Structure object, or null if name appears improperly formated (eg too short, etc) * @throws IOException The PDB file cannot be cached due to IO errors * @throws StructureException The name appeared valid but did not correspond to a structure. * Also thrown by some submethods upon errors, eg for poorly formatted subranges. */ public static Structure getStructure(String name) throws IOException, StructureException{ checkInitAtomCache(); // delegate this functionality to AtomCache... return cache.getStructure(name); } private static void checkInitAtomCache() { if ( cache == null){ cache = new AtomCache(); } } public static void setAtomCache(AtomCache c){ cache = c; } /** * Returns the first biologicalAssembly that is available for a protein structure. For more documentation on quaternary structures see: * {@link http://www.pdb.org/pdb/101/static101.do?p=education_discussion/Looking-at-Structures/bioassembly_tutorial.html} * * * @param pdbId * @return a Structure object or null if that assembly is not available * @throws StructureException * @throws IOException */ public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException{ return getBiologicalAssembly(pdbId,1); } /** * By default the getStructure method loads asym units. This access method allows to recreate the quaternary structure for a protein if it is available. * * @param pdbId * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). * @return a Structure object or null if that assembly is not available * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it * @throws IOException */ public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException { checkInitAtomCache(); return getBiologicalAssembly(pdbId,biolAssemblyNr,StructureIO.cache); } public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, AtomCache cache) throws StructureException { BioUnitDataProvider provider = null; try { provider = BioUnitDataProviderFactory.getBioUnitDataProvider(); provider.setAtomCache(cache); Structure bio = getBiologicalAssembly(pdbId, biolAssemblyNr,cache,BioUnitDataProviderFactory.getBioUnitDataProvider()); return bio; } finally { if(provider != null) { //cleanup to avoid memory leaks provider.setAsymUnit(null); provider.setAtomCache(null); } } } public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, AtomCache cache, BioUnitDataProvider provider) throws StructureException { pdbId = pdbId.toLowerCase(); Structure asymUnit = provider.getAsymUnit(pdbId); // 0 ... asym unit if ( biolAssemblyNr == 0) { logger.info("Requested biological assembly 0 for PDB id "+pdbId+", returning asymmetric unit"); return asymUnit; } // does it exist? if (!asymUnit.getPDBHeader().getBioAssemblies().containsKey(biolAssemblyNr)) { throw new StructureException("No biological assembly available for biological assembly nr " + biolAssemblyNr + " of " + pdbId); } List transformations = asymUnit.getPDBHeader().getBioAssemblies().get(biolAssemblyNr).getTransforms(); if ( transformations == null || transformations.size() == 0){ throw new StructureException("Could not load transformations to recreate biological assembly nr " + biolAssemblyNr + " of " + pdbId); } BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder(); return builder.rebuildQuaternaryStructure(asymUnit, transformations); } /** * Does the provider PDB ID have a biological assembly? * * @param pdbId * @return flag if one or more biological assemblies are available */ public static boolean hasBiologicalAssembly(String pdbId){ pdbId = pdbId.toLowerCase(); BioUnitDataProvider provider = BioUnitDataProviderFactory.getBioUnitDataProvider(); checkInitAtomCache(); provider.setAtomCache(cache); return provider.hasBiolAssembly(pdbId); } public static int getNrBiologicalAssemblies(String pdbId){ pdbId = pdbId.toLowerCase(); BioUnitDataProvider provider = BioUnitDataProviderFactory.getBioUnitDataProvider(); checkInitAtomCache(); provider.setAtomCache(cache); return provider.getNrBiolAssemblies(pdbId); } private static final String FILE_SEPARATOR = System.getProperty("file.separator"); /** * Utility method to set the location where PDB files can be found * * @param pathToPDBFiles */ public static void setPdbPath(String pathToPDBFiles){ if ( ! pathToPDBFiles.endsWith(FILE_SEPARATOR)) pathToPDBFiles += FILE_SEPARATOR; } public static enum StructureFiletype { PDB( (new PDBFileReader()).getExtensions()), CIF( new MMCIFFileReader().getExtensions()), UNKNOWN(Collections.emptyList()); private List extensions; /** * @param extensions List of supported extensions, including leading period */ private StructureFiletype(List extensions) { this.extensions = extensions; } /** * @return a list of file extensions associated with this type */ public List getExtensions() { return extensions; } } /** * Attempts to guess the type of a structure file based on the extension * @param filename * @return */ public static StructureFiletype guessFiletype(String filename) { String lower = filename.toLowerCase(); for(StructureFiletype type : StructureFiletype.values()) { for(String ext : type.getExtensions()) { if(lower.endsWith(ext.toLowerCase())) { return type; } } } return StructureFiletype.UNKNOWN; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy