
org.biojava.nbio.structure.io.mmcif.SimpleMMcifConsumer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
* created at Apr 26, 2008
*/
package org.biojava.nbio.structure.io.mmcif;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.vecmath.Matrix4d;
import org.biojava.nbio.structure.AminoAcid;
import org.biojava.nbio.structure.AminoAcidImpl;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.AtomImpl;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.ChainImpl;
import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.EntityType;
import org.biojava.nbio.structure.DBRef;
import org.biojava.nbio.structure.Element;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.GroupType;
import org.biojava.nbio.structure.HetatomImpl;
import org.biojava.nbio.structure.NucleotideImpl;
import org.biojava.nbio.structure.PDBHeader;
import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.SeqMisMatch;
import org.biojava.nbio.structure.SeqMisMatchImpl;
import org.biojava.nbio.structure.Site;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureImpl;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.io.BondMaker;
import org.biojava.nbio.structure.io.ChargeAdder;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.SeqRes2AtomAligner;
import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor;
import org.biojava.nbio.structure.io.mmcif.model.Cell;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom;
import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor;
import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark;
import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev;
import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord;
import org.biojava.nbio.structure.io.mmcif.model.Entity;
import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq;
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen;
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat;
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn;
import org.biojava.nbio.structure.io.mmcif.model.Exptl;
import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor;
import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier;
import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly;
import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme;
import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme;
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly;
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen;
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList;
import org.biojava.nbio.structure.io.mmcif.model.Refine;
import org.biojava.nbio.structure.io.mmcif.model.Struct;
import org.biojava.nbio.structure.io.mmcif.model.StructAsym;
import org.biojava.nbio.structure.io.mmcif.model.StructConn;
import org.biojava.nbio.structure.io.mmcif.model.StructKeywords;
import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper;
import org.biojava.nbio.structure.io.mmcif.model.StructRef;
import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq;
import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif;
import org.biojava.nbio.structure.io.mmcif.model.StructSite;
import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen;
import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.xtal.CrystalCell;
import org.biojava.nbio.structure.xtal.SpaceGroup;
import org.biojava.nbio.structure.xtal.SymoplibParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A MMcifConsumer implementation that builds an in-memory representation of the
* content of a mmcif file as a BioJava Structure object.
*
* @author Andreas Prlic
* @since 1.7
*/
public class SimpleMMcifConsumer implements MMcifConsumer {
private static final Logger logger = LoggerFactory.getLogger(SimpleMMcifConsumer.class);
private Structure structure;
private Chain current_chain;
private Group current_group;
private List current_model;
private List entities;
private List strucRefs;
private List seqResChains;
private List entityChains; // needed to link entities, chains and compounds...
private List structAsyms; // needed to link entities, chains and compounds...
private List structOpers ; //
private List strucAssemblies;
private List strucAssemblyGens;
private List entitySrcGens;
private List entitySrcNats;
private List entitySrcSyns;
private List structConn;
private List structNcsOper;
private List sequenceDifs;
private List structSiteGens;
/**
* A map of asym ids (internal chain ids) to strand ids (author chain ids)
* extracted from pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme categories
*/
private Map asymStrandId;
/**
* A map of asym ids (internal chain ids) to strand ids (author chain ids)
* extracted from the information in _atom_sites category. Will be used
* if no mapping is found in pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme
*/
private Map asymId2StrandIdFromAtomSites;
/**
* A map of asym ids (internal chain ids) to entity ids extracted from
* the _struct_asym category
*/
private Map asymId2entityId;
private String current_nmr_model ;
private FileParsingParameters params;
public SimpleMMcifConsumer(){
params = new FileParsingParameters();
documentStart();
}
@Override
public void newEntity(Entity entity) {
logger.debug("New entity: {}",entity.toString());
entities.add(entity);
}
@Override
public void newPdbxStructOperList(PdbxStructOperList structOper){
structOpers.add(structOper);
}
@Override
public void newStructAsym(StructAsym sasym){
structAsyms.add(sasym);
}
private Entity getEntity(int entity_id){
try {
for (Entity e: entities){
int eId = Integer.parseInt(e.getId());
if (eId== entity_id){
return e;
}
}
} catch (NumberFormatException e) {
logger.warn("Entity id does not look like a number:", e.getMessage());
}
return null;
}
@Override
public void newStructKeywords(StructKeywords kw){
PDBHeader header = structure.getPDBHeader();
if ( header == null)
header = new PDBHeader();
header.setDescription(kw.getPdbx_keywords());
header.setClassification(kw.getPdbx_keywords());
}
@Override
public void setStruct(Struct struct) {
PDBHeader header = structure.getPDBHeader();
if ( header == null)
header = new PDBHeader();
header.setTitle(struct.getTitle());
header.setIdCode(struct.getEntry_id());
//header.setDescription(struct.getPdbx_descriptor());
//header.setClassification(struct.getPdbx_descriptor());
//header.setDescription(struct.getPdbx_descriptor());
structure.setPDBHeader(header);
structure.setPDBCode(struct.getEntry_id());
}
/** initiate new group, either Hetatom, Nucleotide, or AminoAcid */
private Group getNewGroup(String recordName,Character aminoCode1, long seq_id,String groupCode3) {
Group g = ChemCompGroupFactory.getGroupFromChemCompDictionary(groupCode3);
if ( g != null && !g.getChemComp().isEmpty()) {
if ( g instanceof AminoAcidImpl) {
AminoAcidImpl aa = (AminoAcidImpl) g;
aa.setId(seq_id);
} else if ( g instanceof NucleotideImpl) {
NucleotideImpl nuc = (NucleotideImpl) g;
nuc.setId(seq_id);
} else if ( g instanceof HetatomImpl) {
HetatomImpl het = (HetatomImpl)g;
het.setId(seq_id);
}
return g;
}
Group group;
if ( recordName.equals("ATOM") ) {
if (StructureTools.isNucleotide(groupCode3)) {
// it is a nucleotide
NucleotideImpl nu = new NucleotideImpl();
group = nu;
nu.setId(seq_id);
} else if (aminoCode1==null || aminoCode1 == StructureTools.UNKNOWN_GROUP_LABEL){
HetatomImpl h = new HetatomImpl();
h.setId(seq_id);
group = h;
} else {
AminoAcidImpl aa = new AminoAcidImpl() ;
aa.setAminoType(aminoCode1);
aa.setId(seq_id);
group = aa ;
}
}
else {
if (StructureTools.isNucleotide(groupCode3)) {
// it is a nucleotide
NucleotideImpl nu = new NucleotideImpl();
group = nu;
nu.setId(seq_id);
}
else if (aminoCode1 != null ) {
AminoAcidImpl aa = new AminoAcidImpl() ;
aa.setAminoType(aminoCode1);
aa.setId(seq_id);
group = aa ;
} else {
HetatomImpl h = new HetatomImpl();
h.setId(seq_id);
group = h;
}
}
return group ;
}
/**
* Test if the given chainID is already present in the list of chains given. If yes, returns the chain
* otherwise returns null.
*/
private static Chain isKnownChain(String chainID, List chains){
for (int i = 0; i< chains.size();i++){
Chain testchain = chains.get(i);
//System.out.println("comparing chainID >"+chainID+"< against testchain " + i+" >" +testchain.getName()+"<");
if (chainID.equals(testchain.getChainID())) {
//System.out.println("chain "+ chainID+" already known ...");
return testchain;
}
}
return null;
}
@Override
public void newAtomSite(AtomSite atom) {
if (params.isHeaderOnly()) return;
// Warning: getLabel_asym_id is not the "chain id" in the PDB file
// it is the internally used chain id.
// later on we will fix this...
// later one needs to map the asym id to the pdb_strand_id
//TODO: add support for FileParsingParams.getMaxAtoms()
boolean startOfNewChain = false;
String chain_id = atom.getLabel_asym_id();
String recordName = atom.getGroup_PDB();
String residueNumberS = atom.getAuth_seq_id();
Integer residueNrInt = Integer.parseInt(residueNumberS);
// the 3-letter name of the group:
String groupCode3 = atom.getLabel_comp_id();
Character aminoCode1 = null;
if ( recordName.equals("ATOM") )
aminoCode1 = StructureTools.get1LetterCodeAmino(groupCode3);
else {
aminoCode1 = StructureTools.get1LetterCodeAmino(groupCode3);
// for nucleotides this will be null..
if (aminoCode1 != null && aminoCode1.equals(StructureTools.UNKNOWN_GROUP_LABEL))
aminoCode1 = null;
}
String insCodeS = atom.getPdbx_PDB_ins_code();
Character insCode = null;
if (! insCodeS.equals("?")) {
insCode = insCodeS.charAt(0);
}
// we store the internal seq id in the Atom._id field
// this is not a PDB file field but we need this to internally assign the insertion codes later
// from the pdbx_poly_seq entries..
long seq_id = -1;
try {
seq_id = Long.parseLong(atom.getLabel_seq_id());
} catch (NumberFormatException e){
// non polymer chains (ligands and small molecules) will have a label_seq_id set to '.', thus it is ok to
// silently ignore this
//logger.debug("Could not parse number for _atom_site.label_seq_id: "+e.getMessage());
}
String nmrModel = atom.getPdbx_PDB_model_num();
if ( current_nmr_model == null) {
current_nmr_model = nmrModel;
}
if (! current_nmr_model.equals(nmrModel)){
current_nmr_model = nmrModel;
// add previous data
if ( current_chain != null ) {
current_chain.addGroup(current_group);
current_group.trimToSize();
}
// we came to the beginning of a new NMR model
structure.addModel(current_model);
current_model = new ArrayList();
current_chain = null;
current_group = null;
}
if (current_chain == null) {
current_chain = new ChainImpl();
current_chain.setChainID(chain_id);
current_model.add(current_chain);
startOfNewChain = true;
}
//System.out.println("BEFORE: " + chain_id + " " + current_chain.getName());
if ( ! chain_id.equals(current_chain.getChainID()) ) {
startOfNewChain = true;
// end up old chain...
current_chain.addGroup(current_group);
// see if old chain is known ...
Chain testchain ;
testchain = isKnownChain(current_chain.getChainID(),current_model);
//System.out.println("trying to re-using known chain " + current_chain.getName() + " " + chain_id);
if ( testchain != null && testchain.getChainID().equals(chain_id)){
//System.out.println("re-using known chain " + current_chain.getName() + " " + chain_id);
} else {
testchain = isKnownChain(chain_id,current_model);
}
if ( testchain == null) {
//System.out.println("unknown chain. creating new chain.");
current_chain = new ChainImpl();
current_chain.setChainID(chain_id);
} else {
current_chain = testchain;
}
if ( ! current_model.contains(current_chain))
current_model.add(current_chain);
}
ResidueNumber residueNumber = new ResidueNumber(chain_id,residueNrInt, insCode);
if (current_group == null) {
current_group = getNewGroup(recordName,aminoCode1,seq_id, groupCode3);
current_group.setResidueNumber(residueNumber);
current_group.setPDBName(groupCode3);
}
// SET UP THE ALT LOC GROUP
Group altGroup = null;
String altLocS = atom.getLabel_alt_id();
Character altLoc = ' ';
if ( altLocS.length()>0) {
altLoc = altLocS.charAt(0);
if ( altLoc.equals('.') )
altLoc = ' ';
}
// If it's the start of the new chain
if ( startOfNewChain){
current_group = getNewGroup(recordName,aminoCode1,seq_id, groupCode3);
current_group.setResidueNumber(residueNumber);
current_group.setPDBName(groupCode3);
}
// ANTHONY BRADLEY ADDED THIS -> WE ONLY WAN'T TO CHECK FOR ALT LOCS WHEN IT's NOT THE FIRST GROUP IN CHAIN
else{
// check if residue number is the same ...
// insertion code is part of residue number
if ( ! residueNumber.equals(current_group.getResidueNumber())) {
//System.out.println("end of residue: "+current_group.getPDBCode()+" "+residueNrInt);
current_chain.addGroup(current_group);
current_group.trimToSize();
current_group = getNewGroup(recordName,aminoCode1,seq_id,groupCode3);
current_group.setPDBName(groupCode3);
current_group.setResidueNumber(residueNumber);
// System.out.println("Made new group: " + groupCode3 + " " + resNum + " " + iCode);
} else {
// same residueNumber, but altLocs...
// test altLoc
if ( ! altLoc.equals(' ') && ( ! altLoc.equals('.'))) {
logger.debug("found altLoc! " + altLoc + " " + current_group + " " + altGroup);
altGroup = getCorrectAltLocGroup( altLoc,recordName,aminoCode1,groupCode3, seq_id);
if (altGroup.getChain()==null) {
altGroup.setChain(current_chain);
}
}
}
}
//atomCount++;
//System.out.println("fixing atom name for >" + atom.getLabel_atom_id() + "< >" + fullname + "<");
if ( params.isParseCAOnly() ){
// yes , user wants to get CA only
// only parse CA atoms...
if (! (atom.getLabel_atom_id().equals(StructureTools.CA_ATOM_NAME) && atom.getType_symbol().equals("C"))) {
//System.out.println("ignoring " + line);
//atomCount--;
return;
}
}
// filling the map in case there's no pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme in the file
asymId2StrandIdFromAtomSites.put(atom.getLabel_asym_id(), atom.getAuth_asym_id());
//see if chain_id is one of the previous chains ...
Atom a = convertAtom(atom);
//see if chain_id is one of the previous chains ...
if ( altGroup != null) {
altGroup.addAtom(a);
altGroup = null;
}
else {
current_group.addAtom(a);
}
// make sure that main group has all atoms
// GitHub issue: #76
// Unless it's microheterogenity https://github.com/rcsb/codec-devel/issues/81
if ( ! current_group.hasAtom(a.getName())) {
if (current_group.getPDBName().equals(a.getGroup().getPDBName())) {
current_group.addAtom(a);
}
}
//System.out.println(">" + atom.getLabel_atom_id()+"< " + a.getGroup().getPDBName() + " " + a.getGroup().getChemComp() );
//System.out.println(current_group);
}
/** convert a MMCif AtomSite object to a BioJava Atom object
*
* @param atom the mmmcif AtomSite record
* @return an Atom
*/
private Atom convertAtom(AtomSite atom){
Atom a = new AtomImpl();
a.setPDBserial(Integer.parseInt(atom.getId()));
a.setName(atom.getLabel_atom_id());
double x = Double.parseDouble (atom.getCartn_x());
double y = Double.parseDouble (atom.getCartn_y());
double z = Double.parseDouble (atom.getCartn_z());
a.setX(x);
a.setY(y);
a.setZ(z);
float occupancy = Float.parseFloat (atom.getOccupancy());
a.setOccupancy(occupancy);
float temp = Float.parseFloat (atom.getB_iso_or_equiv());
a.setTempFactor(temp);
String alt = atom.getLabel_alt_id();
if (( alt != null ) && ( alt.length() > 0) && (! alt.equals("."))){
a.setAltLoc(new Character(alt.charAt(0)));
} else {
a.setAltLoc(new Character(' '));
}
Element element = Element.R;
try {
element = Element.valueOfIgnoreCase(atom.getType_symbol());
} catch (IllegalArgumentException e) {
logger.info("Element {} was not recognised as a BioJava-known element, the element will be represented as the generic element {}", atom.getType_symbol(), Element.R.name());
}
a.setElement(element);
return a;
}
private Group getCorrectAltLocGroup( Character altLoc,
String recordName, Character aminoCode1, String groupCode3, long seq_id) {
// see if we know this altLoc already;
List atoms = current_group.getAtoms();
if ( atoms.size() > 0) {
Atom a1 = atoms.get(0);
// we are just adding atoms to the current group
// probably there is a second group following later...
if (a1.getAltLoc().equals(altLoc)) {
return current_group;
}
}
List altLocs = current_group.getAltLocs();
for ( Group altLocG : altLocs ){
atoms = altLocG.getAtoms();
if ( atoms.size() > 0) {
for ( Atom a1 : atoms) {
if (a1.getAltLoc().equals( altLoc)) {
return altLocG;
}
}
}
}
// no matching altLoc group found.
// build it up.
if ( groupCode3.equals(current_group.getPDBName())) {
if ( current_group.getAtoms().size() == 0) {
//System.out.println("current group is empty " + current_group + " " + altLoc);
return current_group;
}
//System.out.println("cloning current group " + current_group + " " + current_group.getAtoms().get(0).getAltLoc() + " altLoc " + altLoc);
Group altLocG = (Group) current_group.clone();
// drop atoms from cloned group...
// https://redmine.open-bio.org/issues/3307
altLocG.setAtoms(new ArrayList());
altLocG.getAltLocs().clear();
current_group.addAltLoc(altLocG);
return altLocG;
}
// System.out.println("new group " + recordName + " " + aminoCode1 + " " +groupCode3);
//String recordName,Character aminoCode1, long seq_id,String groupCode3) {
Group altLocG = getNewGroup(recordName,aminoCode1,seq_id,groupCode3);
altLocG.setPDBName(groupCode3);
altLocG.setResidueNumber(current_group.getResidueNumber());
current_group.addAltLoc(altLocG);
return altLocG;
}
/** Start the parsing
*
*/
@Override
public void documentStart() {
structure = new StructureImpl();
current_chain = null;
current_group = null;
current_nmr_model = null;
//atomCount = 0;
current_model = new ArrayList();
entities = new ArrayList();
strucRefs = new ArrayList();
seqResChains = new ArrayList();
entityChains = new ArrayList();
structAsyms = new ArrayList();
asymStrandId = new HashMap();
asymId2StrandIdFromAtomSites = new HashMap();
asymId2entityId = new HashMap();
structOpers = new ArrayList();
strucAssemblies = new ArrayList();
strucAssemblyGens = new ArrayList();
entitySrcGens = new ArrayList();
entitySrcNats = new ArrayList();
entitySrcSyns = new ArrayList();
structConn = new ArrayList();
structNcsOper = new ArrayList();
sequenceDifs = new ArrayList();
structSiteGens = new ArrayList();
}
@Override
public void documentEnd() {
// Expected that there is one current_chain that needs to be added to the model
// When in headerOnly mode, no Atoms are read, and there will not be an active
// current_chain.
if ( current_chain != null ) {
current_chain.addGroup(current_group);
if (isKnownChain(current_chain.getChainID(),current_model) == null) {
current_model.add(current_chain);
}
} else if (!params.isHeaderOnly()){
logger.warn("current chain is null at end of document.");
}
structure.addModel(current_model);
// Goal is to reproduce the PDB files exactly:
// What has to be done is to use the auth_mon_id for the assignment. For this
// map entities to Chains and Compound objects...
for (StructAsym asym : structAsyms) {
logger.debug("Entity {} matches asym_id: {}", asym.getEntity_id(), asym.getId() );
asymId2entityId.put(asym.getId(), asym.getEntity_id());
Chain s = getEntityChain(asym.getEntity_id());
Chain seqres = (Chain)s.clone();
// to solve issue #160 (e.g. 3u7t)
seqres = removeSeqResHeterogeneity(seqres);
seqres.setChainID(asym.getId());
seqResChains.add(seqres);
logger.debug(" seqres: " + asym.getId() + " " + seqres + "<") ;
// adding the compounds (entities)
addCompounds(asym);
}
if (structAsyms.isEmpty()) {
logger.warn("No _struct_asym category in file, no SEQRES groups will be added.");
}
// Only align if requested (default) and not when headerOnly mode with no Atoms.
// Otherwise, we store the empty SeqRes Groups unchanged in the right chains.
if ( params.isAlignSeqRes() && !params.isHeaderOnly() ){
logger.debug("Parsing mode align_seqres, will parse SEQRES and align to ATOM sequence");
alignSeqRes();
} else {
logger.debug("Parsing mode unalign_seqres, will parse SEQRES but not align it to ATOM sequence");
SeqRes2AtomAligner.storeUnAlignedSeqRes(structure, seqResChains, params.isHeaderOnly());
}
if (asymStrandId.isEmpty()) {
logger.warn("No pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme categories present. Will use chain id mapping from _atom_sites category");
asymStrandId = asymId2StrandIdFromAtomSites;
}
// If we only parse the header - we have no option but to use the other mapping (which can be broken)
if (asymId2StrandIdFromAtomSites.isEmpty()){
logger.warn("No _atom_sites category auth to asymid mappings. Will use chain id mapping from pdbx_poly_seq_scheme/pdbx_non_poly_seq_scheme categories");
asymId2StrandIdFromAtomSites = asymStrandId;
}
// mismatching Author assigned chain IDS and PDB internal chain ids:
// fix the chain IDS in the current model:
if(params.isUseInternalChainId()==false){
for (int i =0; i< structure.nrModels() ; i++){
List model = structure.getModel(i);
List pdbChains = new ArrayList();
for (Chain chain : model) {
for (String asym : asymId2StrandIdFromAtomSites.keySet()) {
if ( chain.getChainID().equals(asym)){
String newChainId = asymId2StrandIdFromAtomSites.get(asym);
logger.debug("Renaming chain with asym_id {} ({} atom groups) to author_asym_id/strand_id {}",
asym, chain.getAtomGroups().size(), newChainId);
chain.setChainID(newChainId);
chain.setInternalChainID(asym);
// set chain of all groups
for(Group g : chain.getAtomGroups()) {
ResidueNumber resNum = g.getResidueNumber();
if(resNum != null)
resNum.setChainId(newChainId);
}
for(Group g : chain.getSeqResGroups()) {
ResidueNumber resNum = g.getResidueNumber();
if(resNum != null)
resNum.setChainId(newChainId);
}
Chain known = isKnownChain(chain.getChainID(), pdbChains);
if ( known == null ){
pdbChains.add(chain);
} else {
// and now we join the 2 chains together again, because in cif files the data can be split up...
for ( Group g : chain.getAtomGroups()){
known.addGroup(g);
}
}
break;
}
}
}
structure.setModel(i,pdbChains);
}
}
else{
// Just set the internal id as the auth id -> if we're using the asymid
for (int i =0; i< structure.nrModels() ; i++){
List model = structure.getModel(i);
for (Chain chain : model) {
for (String asym : asymId2StrandIdFromAtomSites.keySet()) {
if (chain.getChainID().equals(asym)){
String authChainId = asymId2StrandIdFromAtomSites.get(asym);
chain.setInternalChainID(authChainId);
break;
}
}
}
}
}
// Now make sure all altlocgroups have all the atoms in all the groups
correctAltLocGroups();
// NOTE bonds and charges can only be done at this point that the chain id mapping is properly sorted out
if (!params.isHeaderOnly()) {
if ( params.shouldCreateAtomBonds()) {
addBonds();
}
if ( params.shouldCreateAtomCharges()) {
addCharges();
}
}
// compounds (entities)
// In addCompounds above we created the compounds if they were present in the file
// Now we need to make sure that they are linked to chains and also that if they are not present in the file we need to add them now
linkEntities();
if (!params.isHeaderOnly()) {
// Do structure.setSites(sites) after any chain renaming to be like PDB.
addSites();
}
// set the oligomeric state info in the header...
if (params.isParseBioAssembly()) {
// the more detailed mapping of chains to rotation operations happens in StructureIO...
Map bioAssemblies = new HashMap();
for ( PdbxStructAssembly psa : strucAssemblies){
List psags = new ArrayList(1);
for ( PdbxStructAssemblyGen psag: strucAssemblyGens ) {
if ( psag.getAssembly_id().equals(psa.getId())) {
psags.add(psag);
}
}
BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();
// these are the transformations that need to be applied to our model
List transformations = builder.getBioUnitTransformationList(psa, psags, structOpers);
int mmSize = 0;
int bioAssemblyId = -1;
try {
bioAssemblyId = Integer.parseInt(psa.getId());
} catch (NumberFormatException e) {
logger.info("Could not parse a numerical bio assembly id from '{}'",psa.getId());
}
try {
mmSize = Integer.parseInt(psa.getOligomeric_count());
} catch (NumberFormatException e) {
if (bioAssemblyId!=-1)
// if we have a numerical id, then it's unusual to have no oligomeric size: we warn about it
logger.warn("Could not parse oligomeric count from '{}' for biological assembly id {}",
psa.getOligomeric_count(),psa.getId());
else
// no numerical id (PAU,XAU in virus entries), it's normal to have no oligomeric size
logger.info("Could not parse oligomeric count from '{}' for biological assembly id {}",
psa.getOligomeric_count(),psa.getId());
}
// if bioassembly id is not numerical we throw it away
// this happens usually for viral capsid entries, like 1ei7
// see issue #230 in github
if (bioAssemblyId!=-1) {
BioAssemblyInfo bioAssembly = new BioAssemblyInfo();
bioAssembly.setId(bioAssemblyId);
bioAssembly.setMacromolecularSize(mmSize);
bioAssembly.setTransforms(transformations);
bioAssemblies.put(bioAssemblyId,bioAssembly);
}
}
structure.getPDBHeader().setBioAssemblies(bioAssemblies);
}
ArrayList ncsOperators = new ArrayList();
for (StructNcsOper sNcsOper:structNcsOper) {
if (sNcsOper.getCode().equals("generate")) {
ncsOperators.add(sNcsOper.getOperator());
}
}
// we only set it if not empty, otherwise remains null
if (ncsOperators.size()>0) {
structure.getCrystallographicInfo().setNcsOperators(
ncsOperators.toArray(new Matrix4d[ncsOperators.size()]));
}
Map> misMatchMap = new HashMap>();
for (StructRefSeqDif sdif : sequenceDifs) {
SeqMisMatch misMatch = new SeqMisMatchImpl();
misMatch.setDetails(sdif.getDetails());
String insCode = sdif.getPdbx_pdb_ins_code();
if ( insCode != null && insCode.equals("?"))
insCode = null;
misMatch.setInsCode(insCode);
misMatch.setOrigGroup(sdif.getDb_mon_id());
misMatch.setPdbGroup(sdif.getMon_id());
misMatch.setPdbResNum(sdif.getPdbx_auth_seq_num());
misMatch.setUniProtId(sdif.getPdbx_seq_db_accession_code());
misMatch.setSeqNum(sdif.getSeq_num());
List mms = misMatchMap.get(sdif.getPdbx_pdb_strand_id());
if ( mms == null) {
mms = new ArrayList();
misMatchMap.put(sdif.getPdbx_pdb_strand_id(),mms);
}
mms.add(misMatch);
}
for (String chainId : misMatchMap.keySet()){
try {
Chain c = structure.getChainByPDB(chainId);
c.setSeqMisMatches(misMatchMap.get(chainId));
} catch (Exception e){
logger.warn("could not set mismatches for chain " + chainId);
}
}
}
/**
* Ensure that all the alt loc groups have all the atoms in the main group
*/
private void correctAltLocGroups() {
for (int i =0; i< structure.nrModels() ; i++){
for (Chain chain : structure.getModel(i)) {
for (Group group : chain.getAtomGroups()) {
for (Group altLocGroup : group.getAltLocs()) {
for ( Atom groupAtom : group.getAtoms()) {
// If this alt loc doesn't have this atom
if (! altLocGroup.hasAtom(groupAtom.getName())) {
if (altLocGroup.getPDBName().equals(group.getPDBName())) {
altLocGroup.addAtom(groupAtom);
}
}
}
}
}
}
}
}
/**
* Here we link entities to chains.
* Also if entities are not present in file, this initialises the entities with some heuristics, see {@link EntityFinder}
*/
private void linkEntities() {
for (int i =0; i< structure.nrModels() ; i++){
for (Chain chain : structure.getModel(i)) {
String entityId;
if( params.isUseInternalChainId()){
entityId = asymId2entityId.get(chain.getChainID());
}
else{
entityId = asymId2entityId.get(chain.getInternalChainID());
}
if (entityId==null) {
// this can happen for instance if the cif file didn't have _struct_asym category at all
// and thus we have no asymId2entityId mapping at all
logger.warn("No entity id could be found for chain {}", chain.getInternalChainID());
continue;
}
int eId = Integer.parseInt(entityId);
// Entities are not added for non-polymeric entities, if a chain is non-polymeric its entity won't be found.
// TODO: add all entities and unique compounds and add methods to directly get polymer or non-polymer
// asyms (chains). Either create a unique StructureImpl or modify existing for a better representation of the
// mmCIF internal data structures but is compatible with Structure interface.
// Some examples of PDB entries with this kind of problem:
// - 2uub: asym_id X, chainId Z, entity_id 24: fully non-polymeric but still with its own chainId
// - 3o6j: asym_id K, chainId Z, entity_id 6 : a single water molecule
// - 1dz9: asym_id K, chainId K, entity_id 6 : a potassium ion alone
EntityInfo entityInfo = structure.getEntityById(eId);
if (entityInfo==null) {
// Supports the case where the only chain members were from non-polymeric entity that is missing.
// Solved by creating a new Compound(entity) to which this chain will belong.
logger.warn("Could not find an Entity for entity_id {}, for chain id {}, creating a new Entity.",
eId, chain.getChainID());
entityInfo = new EntityInfo();
entityInfo.setMolId(eId);
entityInfo.addChain(chain);
if (StructureTools.isChainWaterOnly(chain)) {
entityInfo.setType(EntityType.WATER);
} else {
entityInfo.setType(EntityType.NONPOLYMER);
}
chain.setEntityInfo(entityInfo);
structure.addEntityInfo(entityInfo);
} else {
logger.debug("Adding chain with chain id {} (asym id {}) to Entity with entity_id {}",
chain.getChainID(), chain.getInternalChainID(), eId);
entityInfo.addChain(chain);
chain.setEntityInfo(entityInfo);
}
}
}
// to make sure we have Entities linked to chains, we call getEntityInfos() which will lazily initialise the
// compounds using heuristics (see EntityFinder) in the case that they were not explicitly present in the file
List entities = structure.getEntityInfos();
// final sanity check: it can happen that from the annotated entities some are not linked to any chains
// e.g. 3s26: a sugar entity does not have any chains associated to it (it seems to be happening with many sugar compounds)
// we simply log it, this can sign some other problems if the entities are used down the line
for (EntityInfo e:entities) {
if (e.getChains().isEmpty()) {
logger.info("Entity {} '{}' has no chains associated to it",
e.getId()==null?"with no entity id":e.getId(), e.getDescription());
}
}
}
private void addCharges() {
ChargeAdder adder = new ChargeAdder(structure);
adder.addCharges();
}
/**
* The method will return a new reference to a Chain with any consecutive groups
* having same residue numbers removed.
* This is necessary to solve the microheterogeneity issue in entries like 3u7t (see github issue #160)
* @param c
* @return
*/
private Chain removeSeqResHeterogeneity(Chain c) {
Chain trimmedChain = new ChainImpl();
ResidueNumber lastResNum = null;
for (Group g:c.getAtomGroups()) {
// note we have to deep copy this, otherwise they stay linked and would get altered in addGroup(g)
ResidueNumber currentResNum = new ResidueNumber(
g.getResidueNumber().getChainId(),
g.getResidueNumber().getSeqNum(),
g.getResidueNumber().getInsCode());
if (lastResNum == null || !lastResNum.equals(currentResNum) ) {
trimmedChain.addGroup(g);
} else {
logger.debug("Removing seqres group because it seems to be repeated in entity_poly_seq, most likely has hetero='y': "+g);
}
lastResNum = currentResNum;
}
return trimmedChain;
}
private void addBonds() {
BondMaker maker = new BondMaker(structure, params);
maker.makeBonds();
maker.formBondsFromStructConn(structConn);
}
private void alignSeqRes() {
logger.debug("Parsing mode align_seqres, will align to ATOM to SEQRES sequence");
// fix SEQRES residue numbering for all models
for (int model=0;model atomList = structure.getModel(model);
for (Chain seqResChain: seqResChains){
// this extracts the matching atom chain from atomList
Chain atomChain = SeqRes2AtomAligner.getMatchingAtomRes(seqResChain, atomList);
if (atomChain == null) {
// most likely there's no observed residues at all for the seqres chain: can't map
// e.g. 3zyb: chains with asym_id L,M,N,O,P have no observed residues
logger.warn("Could not map SEQRES chain with asym_id={} to any ATOM chain. Most likely there's no observed residues in the chain.",
seqResChain.getChainID());
continue;
}
//map the atoms to the seqres...
// we need to first clone the seqres so that they stay independent for different models
List seqResGroups = new ArrayList();
for (int i=0;i revRecords = header.getRevisionRecords();
if ( revRecords == null) {
revRecords = new ArrayList();
header.setRevisionRecords(revRecords);
}
revRecords.add(record);
}
@Override
public void newDatabasePDBrev(DatabasePDBrev dbrev) {
//System.out.println("got a database revision:" + dbrev);
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd",Locale.US);
PDBHeader header = structure.getPDBHeader();
if ( header == null) {
header = new PDBHeader();
}
if (dbrev.getNum().equals("1")){
try {
Date dep = dateFormat.parse(dbrev.getDate_original());
header.setDepDate(dep);
} catch (ParseException e){
logger.warn("Could not parse date string '{}', deposition date will be unavailable", dbrev.getDate_original());
}
try {
Date mod = dateFormat.parse(dbrev.getDate());
header.setModDate(mod);
} catch (ParseException e){
logger.warn("Could not parse date string '{}', modification date will be unavailable", dbrev.getDate());
}
} else {
try {
Date mod = dateFormat.parse(dbrev.getDate());
header.setModDate(mod);
} catch (ParseException e){
logger.warn("Could not parse date string '{}', modification date will be unavailable", dbrev.getDate());
}
}
structure.setPDBHeader(header);
}
@Override
public void newDatabasePDBremark(DatabasePDBremark remark) {
//System.out.println(remark);
String id = remark.getId();
if (id.equals("2")){
//this remark field contains the resolution information:
String line = remark.getText();
int i = line.indexOf("ANGSTROM");
if ( i > 5) {
// line contains ANGSTROM info...
String resolution = line.substring(i-5,i).trim();
// convert string to float
float res = 99 ;
try {
res = Float.parseFloat(resolution);
} catch (NumberFormatException e) {
logger.info("could not parse resolution from line and ignoring it " + line);
return ;
}
// support for old style header
PDBHeader pdbHeader = structure.getPDBHeader();
pdbHeader.setResolution(res);
}
}
}
@Override
public void newRefine(Refine r){
PDBHeader pdbHeader = structure.getPDBHeader();
// RESOLUTION
// in very rare cases (for instance hybrid methods x-ray + neutron diffraction, e.g. 3ins, 4n9m)
// there are 2 resolution values, one for each method
// we take the last one found so that behaviour is like in PDB file parsing
if (pdbHeader.getResolution()!=PDBHeader.DEFAULT_RESOLUTION) {
logger.warn("More than 1 resolution value present, will use last one {} and discard previous {} "
,r.getLs_d_res_high(), String.format("%4.2f",pdbHeader.getResolution()));
}
try {
pdbHeader.setResolution(Float.parseFloat(r.getLs_d_res_high()));
} catch (NumberFormatException e){
logger.info("Could not parse resolution from " + r.getLs_d_res_high() + " " + e.getMessage());
}
// RFREE
if (pdbHeader.getRfree()!=PDBHeader.DEFAULT_RFREE) {
logger.warn("More than 1 Rfree value present, will use last one {} and discard previous {} ",
r.getLs_R_factor_R_free(), String.format("%4.2f",pdbHeader.getRfree()));
}
if (r.getLs_R_factor_R_free()==null) {
// some entries like 2ifo haven't got this field at all
logger.info("_refine.ls_R_factor_R_free not present, not parsing Rfree value");
} else {
try {
pdbHeader.setRfree(Float.parseFloat(r.getLs_R_factor_R_free()));
} catch (NumberFormatException e){
// no rfree present ('?') is very usual, that's why we set it to debug
logger.debug("Could not parse Rfree from string '{}'", r.getLs_R_factor_R_free());
}
}
}
@Override
public void newAuditAuthor(AuditAuthor aa){
String name = aa.getName();
StringBuffer famName = new StringBuffer();
StringBuffer initials = new StringBuffer();
boolean afterComma = false;
for ( char c: name.toCharArray()) {
if ( c == ' ')
continue;
if ( c == ','){
afterComma = true;
continue;
}
if ( afterComma)
initials.append(c);
else
famName.append(c);
}
StringBuffer newaa = new StringBuffer();
newaa.append(initials);
newaa.append(famName);
PDBHeader header = structure.getPDBHeader();
String auth = header.getAuthors();
if (auth == null) {
header.setAuthors(newaa.toString());
}else {
auth += "," + newaa.toString();
header.setAuthors(auth);
}
}
@Override
public void newExptl(Exptl exptl) {
PDBHeader pdbHeader = structure.getPDBHeader();
String method = exptl.getMethod();
pdbHeader.setExperimentalTechnique(method);
}
@Override
public void newCell(Cell cell) {
try {
float a = Float.parseFloat(cell.getLength_a());
float b = Float.parseFloat(cell.getLength_b());
float c = Float.parseFloat(cell.getLength_c());
float alpha = Float.parseFloat(cell.getAngle_alpha());
float beta = Float.parseFloat(cell.getAngle_beta());
float gamma = Float.parseFloat(cell.getAngle_gamma());
CrystalCell xtalCell = new CrystalCell();
xtalCell.setA(a);
xtalCell.setB(b);
xtalCell.setC(c);
xtalCell.setAlpha(alpha);
xtalCell.setBeta(beta);
xtalCell.setGamma(gamma);
if (!xtalCell.isCellReasonable()) {
// If the entry describes a structure determined by a technique other than X-ray crystallography,
// cell is (sometimes!) a = b = c = 1.0, alpha = beta = gamma = 90 degrees
// if so we don't add and CrystalCell will be null
logger.debug("The crystal cell read from file does not have reasonable dimensions (at least one dimension is below {}), discarding it.",
CrystalCell.MIN_VALID_CELL_SIZE);
return;
}
structure.getPDBHeader().getCrystallographicInfo().setCrystalCell(xtalCell);
} catch (NumberFormatException e){
structure.getPDBHeader().getCrystallographicInfo().setCrystalCell(null);
logger.info("could not parse some cell parameters ("+e.getMessage()+"), ignoring _cell ");
}
}
@Override
public void newSymmetry(Symmetry symmetry) {
String spaceGroup = symmetry.getSpace_group_name_H_M();
SpaceGroup sg = SymoplibParser.getSpaceGroup(spaceGroup);
if (sg==null) logger.warn("Space group '"+spaceGroup+"' not recognised as a standard space group");
structure.getPDBHeader().getCrystallographicInfo().setSpaceGroup(sg);
}
@Override
public void newStructNcsOper(StructNcsOper sNcsOper) {
structNcsOper.add(sNcsOper);
}
@Override
public void newStructRef(StructRef sref) {
logger.debug(sref.toString());
strucRefs.add(sref);
}
private StructRef getStructRef(String ref_id){
for (StructRef structRef : strucRefs) {
if (structRef.getId().equals(ref_id)){
return structRef;
}
}
return null;
}
/**
* create a DBRef record from the StrucRefSeq record:
*
* PDB record DBREF
* Field Name mmCIF Data Item
* Section n.a.
* PDB_ID_Code _struct_ref_seq.pdbx_PDB_id_code
* Strand_ID _struct_ref_seq.pdbx_strand_id
* Begin_Residue_Number _struct_ref_seq.pdbx_auth_seq_align_beg
* Begin_Ins_Code _struct_ref_seq.pdbx_seq_align_beg_ins_code
* End_Residue_Number _struct_ref_seq.pdbx_auth_seq_align_end
* End_Ins_Code _struct_ref_seq.pdbx_seq_align_end_ins_code
* Database _struct_ref.db_name
* Database_Accession_No _struct_ref_seq.pdbx_db_accession
* Database_ID_Code _struct_ref.db_code
* Database_Begin_Residue_Number _struct_ref_seq.db_align_beg
* Databaes_Begin_Ins_Code _struct_ref_seq.pdbx_db_align_beg_ins_code
* Database_End_Residue_Number _struct_ref_seq.db_align_end
* Databaes_End_Ins_Code _struct_ref_seq.pdbx_db_align_end_ins_code
*
*
*
*/
@Override
public void newStructRefSeq(StructRefSeq sref) {
//if (DEBUG)
// System.out.println(sref);
DBRef r = new DBRef();
//if (DEBUG)
// System.out.println( " " + sref.getPdbx_PDB_id_code() + " " + sref.getPdbx_db_accession());
r.setIdCode(sref.getPdbx_PDB_id_code());
r.setDbAccession(sref.getPdbx_db_accession());
r.setDbIdCode(sref.getPdbx_db_accession());
r.setChainId(sref.getPdbx_strand_id());
StructRef structRef = getStructRef(sref.getRef_id());
if (structRef == null){
logger.warn("could not find StructRef " + sref.getRef_id() + " for StructRefSeq " + sref);
} else {
r.setDatabase(structRef.getDb_name());
r.setDbIdCode(structRef.getDb_code());
}
int seqbegin = Integer.parseInt(sref.getPdbx_auth_seq_align_beg());
int seqend = Integer.parseInt(sref.getPdbx_auth_seq_align_end());
Character begin_ins_code = new Character(sref.getPdbx_seq_align_beg_ins_code().charAt(0));
Character end_ins_code = new Character(sref.getPdbx_seq_align_end_ins_code().charAt(0));
if (begin_ins_code == '?')
begin_ins_code = ' ';
if (end_ins_code == '?')
end_ins_code = ' ';
r.setSeqBegin(seqbegin);
r.setInsertBegin(begin_ins_code);
r.setSeqEnd(seqend);
r.setInsertEnd(end_ins_code);
int dbseqbegin = Integer.parseInt(sref.getDb_align_beg());
int dbseqend = Integer.parseInt(sref.getDb_align_end());
Character db_begin_in_code = new Character(sref.getPdbx_db_align_beg_ins_code().charAt(0));
Character db_end_in_code = new Character(sref.getPdbx_db_align_end_ins_code().charAt(0));
if (db_begin_in_code == '?')
db_begin_in_code = ' ';
if (db_end_in_code == '?')
db_end_in_code = ' ';
r.setDbSeqBegin(dbseqbegin);
r.setIdbnsBegin(db_begin_in_code);
r.setDbSeqEnd(dbseqend);
r.setIdbnsEnd(db_end_in_code);
List dbrefs = structure.getDBRefs();
if ( dbrefs == null)
dbrefs = new ArrayList();
dbrefs.add(r);
logger.debug(r.toPDB());
structure.setDBRefs(dbrefs);
}
@Override
public void newStructRefSeqDif(StructRefSeqDif sref) {
sequenceDifs.add(sref);
}
private Chain getEntityChain(String entity_id){
for (Chain chain : entityChains) {
if ( chain.getChainID().equals(entity_id)){
return chain;
}
}
// does not exist yet, so create...
Chain chain = new ChainImpl();
chain.setChainID(entity_id);
entityChains.add(chain);
return chain;
}
//private Chain getSeqResChain(String chainID){
// return getChainFromList(seqResChains, chainID);
//}
/**
* Data items in the ENTITY_SRC_GEN category record details of
* the source from which the entity was obtained in cases
* where the source was genetically manipulated. The
* following are treated separately: items pertaining to the tissue
* from which the gene was obtained, items pertaining to the host
* organism for gene expression and items pertaining to the actual
* producing organism (plasmid).
*/
@Override
public void newEntitySrcGen(EntitySrcGen entitySrcGen){
// add to internal list. Map to Compound object later on...
entitySrcGens.add(entitySrcGen);
}
@Override
public void newEntitySrcNat(EntitySrcNat entitySrcNat){
// add to internal list. Map to Compound object later on...
entitySrcNats.add(entitySrcNat);
}
@Override
public void newEntitySrcSyn(EntitySrcSyn entitySrcSyn){
// add to internal list. Map to Compound object later on...
entitySrcSyns.add(entitySrcSyn);
}
/**
* The EntityPolySeq object provide the amino acid sequence objects for the Entities.
* Later on the entities are mapped to the BioJava {@link Chain} and {@link EntityInfo} objects.
* @param epolseq the EntityPolySeq record for one amino acid
*/
@Override
public void newEntityPolySeq(EntityPolySeq epolseq) {
logger.debug("NEW entity poly seq " + epolseq);
int eId = -1;
try {
eId = Integer.parseInt(epolseq.getEntity_id());
} catch (NumberFormatException e) {
logger.warn("Could not parse entity id from EntityPolySeq: "+e.getMessage());
}
Entity e = getEntity(eId);
if (e == null){
logger.info("Could not find entity "+ epolseq.getEntity_id()+". Can not match sequence to it.");
return;
}
Chain entityChain = getEntityChain(epolseq.getEntity_id());
// first we check through the chemcomp provider, if it fails we do some heuristics to guess the type of group
// TODO some of this code is analogous to getNewGroup() and we should try to unify them - JD 2016-03-08
Group g = ChemCompGroupFactory.getGroupFromChemCompDictionary(epolseq.getMon_id());
//int seqId = Integer.parseInt(epolseq.getNum());
if ( g != null && !g.getChemComp().isEmpty()) {
if ( g instanceof AminoAcidImpl) {
AminoAcidImpl aa = (AminoAcidImpl) g;
aa.setRecordType(AminoAcid.SEQRESRECORD);
//aa.setId(seqId);
}
} else {
if (epolseq.getMon_id().length()==3 && StructureTools.get1LetterCodeAmino(epolseq.getMon_id())!=null){
AminoAcidImpl a = new AminoAcidImpl();
a.setRecordType(AminoAcid.SEQRESRECORD);
Character code1 = StructureTools.get1LetterCodeAmino(epolseq.getMon_id());
a.setAminoType(code1);
g = a;
} else if ( StructureTools.isNucleotide(epolseq.getMon_id())) {
// the group is actually a nucleotide group...
NucleotideImpl n = new NucleotideImpl();
g = n;
} else {
logger.debug("Residue {} {} is not a standard aminoacid or nucleotide, will create a het group for it", epolseq.getNum(),epolseq.getMon_id());
HetatomImpl h = new HetatomImpl();
g = h;
}
}
// at this stage we don't know about author residue numbers (insertion codes)
// we abuse now the ResidueNumber field setting the internal residue numbers (label_seq_id, strictly sequential and follow the seqres sequence 1 to n)
// later the actual ResidueNumbers (author residue numbers) have to be corrected in alignSeqRes()
g.setResidueNumber(ResidueNumber.fromString(epolseq.getNum()));
g.setPDBName(epolseq.getMon_id());
entityChain.addGroup(g);
}
@Override
public void newPdbxPolySeqScheme(PdbxPolySeqScheme ppss) {
//if ( headerOnly)
// return;
// replace the group asym ids with the real PDB ids!
// replaceGroupSeqPos(ppss); // This might be incorrect in some pdb, to use auth_seq_id of the pdbx_poly_seq_scheme.
// merge the EntityPolySeq info and the AtomSite chains into one...
//already known ignore:
if (asymStrandId.containsKey(ppss.getAsym_id()))
return;
// this is one of the internal mmcif rules it seems...
if ( ppss.getPdb_strand_id() == null) {
asymStrandId.put(ppss.getAsym_id(), ppss.getAuth_mon_id());
return;
}
//System.out.println(ppss.getAsym_id() + " = " + ppss.getPdb_strand_id());
asymStrandId.put(ppss.getAsym_id(), ppss.getPdb_strand_id());
}
@Override
public void newPdbxNonPolyScheme(PdbxNonPolyScheme ppss) {
//if (headerOnly)
// return;
// merge the EntityPolySeq info and the AtomSite chains into one...
//already known ignore:
if (asymStrandId.containsKey(ppss.getAsym_id()))
return;
// this is one of the interal mmcif rules it seems...
if ( ppss.getPdb_strand_id() == null) {
asymStrandId.put(ppss.getAsym_id(), ppss.getAsym_id());
return;
}
asymStrandId.put(ppss.getAsym_id(), ppss.getPdb_strand_id());
}
@Override
public void newPdbxEntityNonPoly(PdbxEntityNonPoly pen){
// TODO: do something with them...
// not implemented yet...
//System.out.println(pen.getEntity_id() + " " + pen.getName() + " " + pen.getComp_id());
}
@Override
public void newChemComp(ChemComp c) {
// TODO: do something with them...
}
@Override
public void newGenericData(String category, List loopFields,
List lineData) {
//logger.debug("unhandled category so far: " + category);
}
@Override
public FileParsingParameters getFileParsingParameters()
{
return params;
}
@Override
public void setFileParsingParameters(FileParsingParameters params)
{
this.params = params;
}
@Override
public void newChemCompDescriptor(ChemCompDescriptor ccd) {
// TODO nothing happening here yet.
}
public List getStructOpers() {
return structOpers;
}
@Override
public void newPdbxStrucAssembly(PdbxStructAssembly strucAssembly) {
strucAssemblies.add(strucAssembly);
}
public List getStructAssemblies(){
return strucAssemblies;
}
@Override
public void newPdbxStrucAssemblyGen(PdbxStructAssemblyGen strucAssembly) {
strucAssemblyGens.add(strucAssembly);
}
public List getStructAssemblyGens(){
return strucAssemblyGens;
}
@Override
public void newChemCompAtom(ChemCompAtom atom) {
}
@Override
public void newPdbxChemCompIndentifier(PdbxChemCompIdentifier id) {
}
@Override
public void newChemCompBond(ChemCompBond bond) {
}
@Override
public void newPdbxChemCompDescriptor(PdbxChemCompDescriptor desc) {
}
@Override
public void newStructConn(StructConn structConn) {
this.structConn.add(structConn);
}
@Override
public void newStructSiteGen(StructSiteGen siteGen) { this.structSiteGens.add(siteGen); }
@Override
public void newStructSite(StructSite structSite) {
if (params.isHeaderOnly()) {
return;
}
// Simply implement the method.
List sites = structure.getSites();
if (sites == null) sites = new ArrayList();
Site site = null;
for (Site asite : sites) {
if (asite.getSiteID().equals(structSite.getId())) {
site = asite; // Prevent duplicate siteIds
}
}
boolean addSite = false;
if (site == null) { site = new Site(); addSite = true; }
site.setSiteID(structSite.getId());
site.setDescription(structSite.getDetails());
// site.setPdbxEvidenceCode(structSite.getPdbxEvidenceCode()); // TODO - add addition fields in Sites
if (addSite) sites.add(site);
structure.setSites(sites);
}
/**
* Build sites in a BioJava Structure using the original author chain id & residue numbers.
* Sites are built from struct_site_gen records that have been parsed.
*/
private void addSites() {
List sites = structure.getSites();
if (sites == null) sites = new ArrayList();
for (StructSiteGen siteGen : structSiteGens) {
// For each StructSiteGen, find the residues involved, if they exist then
String site_id = siteGen.getSite_id(); // multiple could be in same site.
if (site_id == null) site_id = "";
String comp_id = siteGen.getLabel_comp_id(); // PDBName
// Assumption: the author chain ID and residue number for the site is consistent with the original
// author chain id and residue numbers.
String chain_id;
if (params.isUseInternalChainId()){
chain_id = siteGen.getLabel_asym_id();
}
else {
chain_id = siteGen.getAuth_asym_id(); // ChainID
}
String auth_seq_id = siteGen.getAuth_seq_id(); // Res num
String insCode = siteGen.getPdbx_auth_ins_code();
if ( insCode != null && insCode.equals("?"))
insCode = null;
// Look for asymID = chainID and seqID = seq_ID. Check that comp_id matches the resname.
Group g = null;
try {
Chain chain = structure.getChainByPDB(chain_id);
if (null != chain) {
try {
Character insChar = null;
if (null != insCode && insCode.length() > 0) insChar = insCode.charAt(0);
g = chain.getGroupByPDB(new ResidueNumber(chain_id, Integer.parseInt(auth_seq_id), insChar));
} catch (NumberFormatException e) {
logger.warn("Could not lookup residue : " + chain_id + auth_seq_id);
}
}
} catch (StructureException e) {
logger.warn("Problem finding residue in site entry " + siteGen.getSite_id() + " - " + e.getMessage(), e.getMessage());
}
if (g != null) {
// 2. find the site_id, if not existing, create anew.
Site site = null;
for (Site asite: sites) {
if (site_id.equals(asite.getSiteID())) site = asite;
}
boolean addSite = false;
// 3. add this residue to the site.
if (site == null) {
addSite = true;
site = new Site();
site.setSiteID(site_id);
}
List groups = site.getGroups();
if (groups == null) groups = new ArrayList();
// Check the self-consistency of the residue reference from auth_seq_id and chain_id
if (!comp_id.equals(g.getPDBName())) {
logger.warn("comp_id doesn't match the residue at " + chain_id + auth_seq_id + " - skipping");
} else {
groups.add(g);
site.setGroups(groups);
}
if (addSite) sites.add(site);
}
}
structure.setSites(sites);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy