All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.io.BondMaker Maven / Gradle / Ivy

There is a newer version: 7.2.2
Show newest version
/*
 *                  BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * Created on Mar. 6, 2014
 *
 */
package org.biojava.nbio.structure.io;

import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.io.mmcif.ChemCompGroupFactory;
import org.biojava.nbio.structure.io.mmcif.ChemCompProvider;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
import org.biojava.nbio.structure.io.mmcif.model.StructConn;
import org.biojava.nbio.structure.io.util.PDBTemporaryStorageUtils.LinkRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * Adds polymer bonds for peptides and nucleotides based on distance cutoffs and
 * intra-group (residue) bonds based on data from the Chemical Component Dictionary
 * to the Structure object.
 *
 * TODO the current implementation adds bonds to the first model only. This
 * should be sufficient for homogeneous models, but here are a few inhomogeneous models
 * in the PDB. A better handling of models should be considered in the future.
 *
 * @author Peter Rose
 * @author Ulysse Carion
 *
 */
public class BondMaker {


	private static final Logger logger = LoggerFactory.getLogger(BondMaker.class);

	/**
	 * The types of bonds that are read from struct_conn (type specified in field conn_type_id)
	 */
	public static final Set BOND_TYPES_TO_PARSE;
	static {
		BOND_TYPES_TO_PARSE = new HashSet<>();
		BOND_TYPES_TO_PARSE.add("disulf");
		BOND_TYPES_TO_PARSE.add("covale");
		BOND_TYPES_TO_PARSE.add("covale_base");
		BOND_TYPES_TO_PARSE.add("covale_phosphate");
		BOND_TYPES_TO_PARSE.add("covale_sugar");
		BOND_TYPES_TO_PARSE.add("modres");
	}


	/**
	 * Maximum peptide (C - N) bond length considered for bond formation
	 */
	private static final double MAX_PEPTIDE_BOND_LENGTH = 1.8;
	/**
	 * Maximum nucleotide (P - O3') bond length considered for bond formation
	 */
	private static final double MAX_NUCLEOTIDE_BOND_LENGTH = 2.1;

	private Structure structure;
	private FileParsingParameters params;

	public BondMaker(Structure structure, FileParsingParameters params) {
		this.structure = structure;
		this.params = params;
	}

	/**
	 * Creates bond objects and corresponding references in Atom objects:
	 * 
  • * peptide bonds: inferred from sequence and distances *
  • *
  • * nucleotide bonds: inferred from sequence and distances *
  • *
  • * intra-group (residue) bonds: read from the chemical component dictionary, via {@link ChemCompProvider} *
  • */ public void makeBonds() { formPeptideBonds(); formNucleotideBonds(); formIntraResidueBonds(); trimBondLists(); } private void formPeptideBonds() { for (Chain chain : structure.getChains()) { List groups = chain.getSeqResGroups(); for (int i = 0; i < groups.size() - 1; i++) { if (!(groups.get(i) instanceof AminoAcidImpl) || !(groups.get(i + 1) instanceof AminoAcidImpl)) continue; AminoAcidImpl tail = (AminoAcidImpl) groups.get(i); AminoAcidImpl head = (AminoAcidImpl) groups.get(i + 1); // atoms with no residue number don't have atom information if (tail.getResidueNumber() == null || head.getResidueNumber() == null) { continue; } Atom carboxylC; Atom aminoN; carboxylC = tail.getC(); aminoN = head.getN(); if (carboxylC == null || aminoN == null) { // some structures may be incomplete and not store info // about all of their atoms continue; } if (Calc.getDistance(carboxylC, aminoN) < MAX_PEPTIDE_BOND_LENGTH) { new BondImpl(carboxylC, aminoN, 1); } } } } private void formNucleotideBonds() { for (Chain chain : structure.getChains()) { List groups = chain.getSeqResGroups(); for (int i = 0; i < groups.size() - 1; i++) { if (!(groups.get(i) instanceof NucleotideImpl) || !(groups.get(i + 1) instanceof NucleotideImpl)) continue; NucleotideImpl tail = (NucleotideImpl) groups.get(i); NucleotideImpl head = (NucleotideImpl) groups.get(i + 1); // atoms with no residue number don't have atom information if (tail.getResidueNumber() == null || head.getResidueNumber() == null) { continue; } Atom phosphorous = head.getP(); Atom oThreePrime = tail.getO3Prime(); if (phosphorous == null || oThreePrime == null) { continue; } if (Calc.getDistance(phosphorous, oThreePrime) < MAX_NUCLEOTIDE_BOND_LENGTH) { new BondImpl(phosphorous, oThreePrime, 1); } } } } private void formIntraResidueBonds() { for (Chain chain : structure.getChains()) { List groups = chain.getAtomGroups(); for (Group mainGroup : groups) { // atoms with no residue number don't have atom information if (mainGroup.getResidueNumber() == null) { continue; } // Now add support for altLocGroup List totList = new ArrayList(); totList.add(mainGroup); for(Group altLoc: mainGroup.getAltLocs()){ totList.add(altLoc); } // Now iterate through this list for(Group group : totList){ ChemComp aminoChemComp = ChemCompGroupFactory.getChemComp(group .getPDBName()); for (ChemCompBond chemCompBond : aminoChemComp.getBonds()) { Atom a = group.getAtom(chemCompBond.getAtom_id_1()); Atom b = group.getAtom(chemCompBond.getAtom_id_2()); if ( a != null && b != null){ int bondOrder = chemCompBond.getNumericalBondOrder(); new BondImpl(a, b, bondOrder); } else { // Some of the atoms were missing. That's fine, there's // nothing to do in this case. } } } } } } private void trimBondLists() { for (Chain chain : structure.getChains()) { for (Group group : chain.getAtomGroups()) { for (Atom atom : group.getAtoms()) { if (atom.getBonds()!=null && atom.getBonds().size() > 0) { ((ArrayList) atom.getBonds()).trimToSize(); } } } } } /** * Creates disulfide bond objects and references in the corresponding Atoms objects, given * a list of {@link SSBondImpl}s parsed from a PDB/mmCIF file. * @param disulfideBonds */ public void formDisulfideBonds(List disulfideBonds) { List bonds = new ArrayList<>(); for (SSBondImpl disulfideBond : disulfideBonds) { Bond bond = formDisulfideBond(disulfideBond); if (bond!=null) bonds.add(bond); } structure.setSSBonds(bonds); } private Bond formDisulfideBond(SSBondImpl disulfideBond) { try { Atom a = getAtomFromRecord("SG", "", "CYS", disulfideBond.getChainID1(), disulfideBond.getResnum1(), disulfideBond.getInsCode1()); Atom b = getAtomFromRecord("SG", "", "CYS", disulfideBond.getChainID2(), disulfideBond.getResnum2(), disulfideBond.getInsCode2()); Bond ssbond = new BondImpl(a, b, 1); structure.addSSBond(ssbond); return ssbond; } catch (StructureException e) { // Note, in Calpha only mode the CYS SG's are not present. if (! params.isParseCAOnly()) { logger.warn("Could not find atoms specified in SSBOND record: {}",disulfideBond.toString()); } else { logger.debug("Could not find atoms specified in SSBOND record while parsing in parseCAonly mode."); } return null; } } /** * Creates bond objects from a LinkRecord as parsed from a PDB file * @param linkRecord */ public void formLinkRecordBond(LinkRecord linkRecord) { // only work with atoms that aren't alternate locations if (linkRecord.getAltLoc1().equals(" ") || linkRecord.getAltLoc2().equals(" ")) return; try { Atom a = getAtomFromRecord(linkRecord.getName1(), linkRecord.getAltLoc1(), linkRecord.getResName1(), linkRecord.getChainID1(), linkRecord.getResSeq1(), linkRecord.getiCode1()); Atom b = getAtomFromRecord(linkRecord.getName2(), linkRecord.getAltLoc2(), linkRecord.getResName2(), linkRecord.getChainID2(), linkRecord.getResSeq2(), linkRecord.getiCode2()); // TODO determine what the actual bond order of this bond is; for // now, we're assuming they're single bonds new BondImpl(a, b, 1); } catch (StructureException e) { // Note, in Calpha only mode the link atoms may not be present. if (! params.isParseCAOnly()) { logger.warn("Could not find atoms specified in LINK record: {}",linkRecord.toString()); } else { logger.debug("Could not find atoms specified in LINK record while parsing in parseCAonly mode."); } } } public void formBondsFromStructConn(List structConn) { final String symop = "1_555"; // For now - accept bonds within origin asymmetric unit. List ssbonds = new ArrayList<>(); for (StructConn conn : structConn) { if (!BOND_TYPES_TO_PARSE.contains(conn.getConn_type_id())) continue; String chainId1; String chainId2; if(params.isUseInternalChainId()){ chainId1 = conn.getPtnr1_label_asym_id(); chainId2 = conn.getPtnr2_label_asym_id(); } else{ chainId1 = conn.getPtnr1_auth_asym_id(); chainId2 = conn.getPtnr2_auth_asym_id(); } String insCode1 = ""; if (!conn.getPdbx_ptnr1_PDB_ins_code().equals("?")) insCode1 = conn.getPdbx_ptnr1_PDB_ins_code(); String insCode2 = ""; if (!conn.getPdbx_ptnr2_PDB_ins_code().equals("?")) insCode2 = conn.getPdbx_ptnr2_PDB_ins_code(); String seqId1 = conn.getPtnr1_auth_seq_id(); String seqId2 = conn.getPtnr2_auth_seq_id(); String resName1 = conn.getPtnr1_label_comp_id(); String resName2 = conn.getPtnr2_label_comp_id(); String atomName1 = conn.getPtnr1_label_atom_id(); String atomName2 = conn.getPtnr2_label_atom_id(); String altLoc1 = ""; if (!conn.getPdbx_ptnr1_label_alt_id().equals("?")) altLoc1 = conn.getPdbx_ptnr1_label_alt_id(); String altLoc2 = ""; if (!conn.getPdbx_ptnr2_label_alt_id().equals("?")) altLoc2 = conn.getPdbx_ptnr2_label_alt_id(); // TODO: when issue 220 is implemented, add robust symmetry handling to allow bonds between symmetry-related molecules. if (!conn.getPtnr1_symmetry().equals(symop) || !conn.getPtnr2_symmetry().equals(symop) ) { logger.info("Skipping bond between atoms {}(residue {}{}) and {}(residue {}{}) belonging to different symmetry partners, because it is not supported yet", atomName1, seqId1, insCode1, atomName2, seqId2, insCode2); continue; } String altLocStr1 = altLoc1.isEmpty()? "" : "(alt loc "+altLoc1+")"; String altLocStr2 = altLoc2.isEmpty()? "" : "(alt loc "+altLoc2+")"; Atom a1 = null; Atom a2 = null; try { a1 = getAtomFromRecord(atomName1, altLoc1, resName1, chainId1, seqId1, insCode1); } catch (StructureException e) { logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId1, insCode1, resName1, chainId1, atomName1, altLocStr1); continue; } try { a2 = getAtomFromRecord(atomName2, altLoc2, resName2, chainId2, seqId2, insCode2); } catch (StructureException e) { logger.warn("Could not find atom specified in struct_conn record: {}{}({}) in chain {}, atom {} {}", seqId2, insCode2, resName2, chainId2, atomName2, altLocStr2); continue; } if (a1==null) { // we couldn't find the atom, something must be wrong with the file logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName1, altLocStr1, seqId1, insCode1, resName1, chainId1); continue; } if (a2==null) { // we couldn't find the atom, something must be wrong with the file logger.warn("Could not find atom {} {} from residue {}{}({}) in chain {} to create bond specified in struct_conn", atomName2, altLocStr2, seqId2, insCode2, resName2, chainId2); continue; } // assuming order 1 for all bonds, no information is provided by struct_conn Bond bond = new BondImpl(a1, a2, 1); if (conn.getConn_type_id().equals("disulf")) { ssbonds.add(bond); } } // only for ss bonds we add a specific map in structure, all the rests are linked only from Atom.getBonds structure.setSSBonds(ssbonds); } private Atom getAtomFromRecord(String name, String altLoc, String resName, String chainID, String resSeq, String iCode) throws StructureException { if (iCode==null || iCode.isEmpty()) { iCode = " "; // an insertion code of ' ' is ignored } Chain chain = structure.getChainByPDB(chainID); ResidueNumber resNum = new ResidueNumber(chainID, Integer.parseInt(resSeq), iCode.charAt(0)); Group group = chain.getGroupByPDB(resNum); Group g = group; // there is an alternate location if (!altLoc.isEmpty()) { g = group.getAltLocGroup(altLoc.charAt(0)); if (g==null) throw new StructureException("Could not find altLoc code "+altLoc+" in group "+resSeq+iCode+" of chain "+ chainID); } return g.getAtom(name); } }




    © 2015 - 2025 Weber Informatics LLC | Privacy Policy