All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.StructureIO Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure;

import org.biojava.nbio.structure.align.util.AtomCache;
import org.biojava.nbio.structure.io.StructureFiletype;

import java.io.IOException;
import java.util.List;

/**
 * A class that provides static access methods for easy lookup of protein structure related components
 *
 * @author Andreas Prlic
 *
 * @since 3.0.5
 */
public class StructureIO {
	private static AtomCache cache ;

	/**
	 * Loads a structure based on a name. Supported naming conventions are:
	 *
	 *  
		Formal specification for how to specify the name:

		name     := pdbID
					   | pdbID '.' chainID
					   | pdbID '.' range
					   | scopID
					   | biol
					   | pdp
		range         := '('? range (',' range)? ')'?
					   | chainID
					   | chainID '_' resNum '-' resNum
		pdbID         := [1-9][a-zA-Z0-9]{3}
					   | PDB_[a-zA-Z0-9]{8}
		chainID       := [a-zA-Z0-9]
		scopID        := 'd' pdbID [a-z_][0-9_]
		biol		  := 'BIO:' pdbID [:]? [0-9]+
		resNum        := [-+]?[0-9]+[A-Za-z]?


		Example structures:
		1TIM                #whole structure - asym unit (short format)
		4HHB.C              #single chain
		4GCR.A_1-83         #one domain, by residue number
		3AA0.A,B            #two chains treated as one structure
		PDB_00001TIM        #whole structure - asym unit (extended format)
		PDB_00004HHB.C      #single chain
		PDB_00004GCR.A_1-83 #one domain, by residue number
		PDB_00003AA0.A,B    #two chains treated as one structure
		d2bq6a1     #scop domain
		BIO:1fah   #biological assembly nr 1 for 1fah
		BIO:1fah:0 #asym unit for 1fah
		BIO:1fah:1 #biological assembly nr 1 for 1fah
		BIO:1fah:2 #biological assembly nr 2 for 1fah

	 * 
* * With the additional set of rules: * *
    *
  • If only a PDB code is provided, the whole structure will be return including ligands, but the first model only (for NMR). *
  • Chain IDs are case sensitive, PDB ids are not. To specify a particular chain write as: 4hhb.A or 4HHB.A
  • *
  • To specify a SCOP domain write a scopId e.g. d2bq6a1. Some flexibility can be allowed in SCOP domain names, see {@link #setStrictSCOP(boolean)}
  • *
  • URLs are accepted as well
  • *
* * @param name * @return a Structure object, or null if name appears improperly formated (eg too short, etc) * @throws IOException The PDB file cannot be cached due to IO errors * @throws StructureException The name appeared valid but did not correspond to a structure. * Also thrown by some submethods upon errors, eg for poorly formatted subranges. */ public static Structure getStructure(String name) throws IOException, StructureException { checkInitAtomCache(); // delegate this functionality to AtomCache... return cache.getStructure(name); } private static void checkInitAtomCache() { if (cache == null) { cache = new AtomCache(); } } public static void setAtomCache(AtomCache c){ cache = c; } public static AtomCache getAtomCache() { checkInitAtomCache(); return cache; } /** * Returns the first biological assembly that is available for the given PDB id. *

* The output Structure will be different depending on the multiModel parameter: *

  • * the symmetry-expanded chains are added as new models, one per transformId. All original models but * the first one are discarded. *
  • *
  • * as original with symmetry-expanded chains added with renamed chain ids and names (in the form * originalAsymId_transformId and originalAuthId_transformId) *
  • *

    * For more documentation on quaternary structures see: * {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies} * * * @param pdbId * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). * @return a Structure object or null if that assembly is not available * @throws StructureException * @throws IOException */ public static Structure getBiologicalAssembly(String pdbId, boolean multiModel) throws IOException, StructureException { checkInitAtomCache(); pdbId = pdbId.toLowerCase(); return cache.getBiologicalAssembly(pdbId, multiModel); } /** * Returns the first biological assembly that is available for the given PDB id, * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} *

    * For more documentation on quaternary structures see: * {@link http://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/biological-assemblies} * * * @param pdbId * @return a Structure object or null if that assembly is not available * @throws StructureException * @throws IOException */ public static Structure getBiologicalAssembly(String pdbId) throws IOException, StructureException { return getBiologicalAssembly(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); } /** * Returns the biological assembly for the given PDB id and bioassembly identifier. *

    * The output Structure will be different depending on the multiModel parameter: *

  • * the symmetry-expanded chains are added as new models, one per transformId. All original models but * the first one are discarded. *
  • *
  • * as original with symmetry-expanded chains added with renamed chain ids and names (in the form * originalAsymId_transformId and originalAuthId_transformId) *
  • * @param pdbId * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). * @return a Structure object or null if that assembly is not available * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it * @throws IOException */ public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr, boolean multiModel) throws IOException, StructureException { checkInitAtomCache(); pdbId = pdbId.toLowerCase(); return cache.getBiologicalAssembly(pdbId, biolAssemblyNr, multiModel); } /** * Returns the biological assembly for the given PDB id and bioassembly identifier, * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} * @param pdbId * @param biolAssemblyNr - the ith biological assembly that is available for a PDB ID (we start counting at 1, 0 represents the asym unit). * @return a Structure object or null if that assembly is not available * @throws StructureException if there is no bioassembly available for given biolAssemblyNr or some other problems encountered while loading it * @throws IOException */ public static Structure getBiologicalAssembly(String pdbId, int biolAssemblyNr) throws IOException, StructureException { return getBiologicalAssembly(pdbId, biolAssemblyNr, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); } /** * Returns all biological assemblies for the given PDB id. *

    * The output Structure will be different depending on the multiModel parameter: *

  • * the symmetry-expanded chains are added as new models, one per transformId. All original models but * the first one are discarded. *
  • *
  • * as original with symmetry-expanded chains added with renamed chain ids and names (in the form * originalAsymId_transformId and originalAuthId_transformId) *
  • * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead. * @param pdbId * @param multiModel if true the output Structure will be a multi-model one with one transformId per model, * if false the outputStructure will be as the original with added chains with renamed asymIds (in the form originalAsymId_transformId and originalAuthId_transformId). * @return * @throws IOException * @throws StructureException * @since 5.0 */ public static List getBiologicalAssemblies(String pdbId, boolean multiModel) throws IOException, StructureException { checkInitAtomCache(); pdbId = pdbId.toLowerCase(); return cache.getBiologicalAssemblies(pdbId, multiModel); } /** * Returns all biological assemblies for the given PDB id, * using multiModel={@value AtomCache#DEFAULT_BIOASSEMBLY_STYLE} *

    * If only one biological assembly is required use {@link #getBiologicalAssembly(String)} or {@link #getBiologicalAssembly(String, int)} instead. * @param pdbId * @return * @throws IOException * @throws StructureException * @since 5.0 */ public static List getBiologicalAssemblies(String pdbId) throws IOException, StructureException { return getBiologicalAssemblies(pdbId, AtomCache.DEFAULT_BIOASSEMBLY_STYLE); } /** * Attempts to guess the type of a structure file based on the extension * @param filename * @return */ public static StructureFiletype guessFiletype(String filename) { String lower = filename.toLowerCase(); for (StructureFiletype type : StructureFiletype.values()) { for (String ext : type.getExtensions()) { if (lower.endsWith(ext.toLowerCase())) { return type; } } } return StructureFiletype.UNKNOWN; } }





    © 2015 - 2024 Weber Informatics LLC | Privacy Policy