org.biojava.nbio.structure.io.cif.AbstractCifFileSupplier Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
package org.biojava.nbio.structure.io.cif;
import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.xtal.CrystalCell;
import org.biojava.nbio.structure.xtal.SpaceGroup;
import org.rcsb.cif.CifBuilder;
import org.rcsb.cif.model.Category;
import org.rcsb.cif.model.CifFile;
import org.rcsb.cif.model.FloatColumnBuilder;
import org.rcsb.cif.model.IntColumnBuilder;
import org.rcsb.cif.model.StrColumnBuilder;
import org.rcsb.cif.schema.StandardSchemata;
import org.rcsb.cif.schema.mm.MmCifBlockBuilder;
import org.rcsb.cif.schema.mm.MmCifCategoryBuilder;
import org.rcsb.cif.schema.mm.MmCifFileBuilder;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.stream.Collector;
import java.util.stream.Collectors;
/**
* Convert a BioJava object to a CifFile.
* @author Sebastian Bittrich
* @since 5.3.0
*/
public abstract class AbstractCifFileSupplier implements CifFileSupplier {
protected CifFile getInternal(Structure structure, List wrappedAtoms) {
// for now BioJava only considered 3 categories for create a Cif representation of a structure
// cell
CrystalCell crystalCell = structure.getPDBHeader().getCrystallographicInfo().getCrystalCell();
// symmetry
SpaceGroup spaceGroup = structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup();
// atom_site
Category atomSite = wrappedAtoms.stream().collect(toAtomSite());
// entity information
List entityInfos = structure.getEntityInfos();
MmCifBlockBuilder blockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF)
.enterBlock(structure.getPdbId() == null? "" : structure.getPdbId().getId());
blockBuilder.enterStructKeywords().enterText()
.add(String.join(", ", structure.getPDBHeader().getKeywords()))
.leaveColumn().leaveCategory();
if (atomSite.isDefined() && atomSite.getRowCount() > 0) {
// set atom site
blockBuilder.addCategory(atomSite);
}
if (crystalCell != null) {
// set cell category
blockBuilder.enterCell()
.enterLengthA()
.add(crystalCell.getA())
.leaveColumn()
.enterLengthB()
.add(crystalCell.getB())
.leaveColumn()
.enterLengthC()
.add(crystalCell.getC())
.leaveColumn()
.enterAngleAlpha()
.add(crystalCell.getAlpha())
.leaveColumn()
.enterAngleBeta()
.add(crystalCell.getBeta())
.leaveColumn()
.enterAngleGamma()
.add(crystalCell.getGamma())
.leaveColumn()
.leaveCategory();
}
if (spaceGroup != null) {
// set symmetry category
blockBuilder.enterSymmetry()
.enterSpaceGroupNameH_M()
.add(spaceGroup.getShortSymbol())
.leaveColumn()
.leaveCategory();
}
if (entityInfos != null) {
String[] entityIds = new String[entityInfos.size()];
String[] entityTypes = new String[entityInfos.size()];
String[] entityDescriptions = new String[entityInfos.size()];
for (int i=0; i e.getType() == EntityType.POLYMER).map(e -> Integer.toString(e.getMolId())).toArray(String[]::new);
String[] polyEntitySeqs = entityInfos.stream().filter(e -> e.getType() == EntityType.POLYMER).map(e -> e.getChains().get(0).getSeqResSequence()).toArray(String[]::new);
blockBuilder.enterEntity()
.enterId()
.add(entityIds)
.leaveColumn()
.enterType()
.add(entityTypes)
.leaveColumn()
.enterPdbxDescription()
.add(entityDescriptions)
.leaveColumn()
.leaveCategory();
blockBuilder.enterEntityPoly()
.enterEntityId()
.add(polyEntityIds)
.leaveColumn()
.enterPdbxSeqOneLetterCodeCan()
.add(polyEntitySeqs)
.leaveColumn()
.leaveCategory();
}
return blockBuilder.leaveBlock().leaveFile();
}
protected void handleChain(Chain chain, int model, List wrappedAtoms) {
final String chainName = chain.getName();
final String chainId = chain.getId();
for (Group group : chain.getAtomGroups()) {
// The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have
// all atoms (see StructureTools#cleanUpAltLocs)
// Thus we have to remove duplicates here by using the atom id
// See issue https://github.com/biojava/biojava/issues/778 and
// TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs
Map uniqueAtoms = new LinkedHashMap<>();
for (int atomIndex = 0; atomIndex < group.size(); atomIndex++) {
Atom atom = group.getAtom(atomIndex);
if (atom == null) {
continue;
}
uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial()));
}
if (group.hasAltLoc()) {
for (Group alt : group.getAltLocs()) {
for (int atomIndex = 0; atomIndex < alt.size(); atomIndex++) {
Atom atom = alt.getAtom(atomIndex);
if (atom == null) {
continue;
}
uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial()));
}
}
}
wrappedAtoms.addAll(uniqueAtoms.values());
}
}
/**
* Wrapped atoms represent individual atoms enriched with model- and chain-level information. Also, gives control
* over the atomId field. Useful to convert structures (and subsets thereof) to their mmCIF representation.
*/
public static class WrappedAtom {
private final int model;
private final String chainName;
private final String chainId;
private final Atom atom;
private final int atomId;
/**
* Construct a new atoms.
* @param model the model number
* @param chainName the label_asym_id
* @param chainId the auth_asym_id
* @param atom the atom instance itself
* @param atomId the label_atom_id
*/
public WrappedAtom(int model, String chainName, String chainId, Atom atom, int atomId) {
this.model = model;
this.chainName = chainName;
this.chainId = chainId;
this.atom = atom;
this.atomId = atomId;
}
public int getModel() {
return model;
}
public String getChainName() {
return chainName;
}
public String getChainId() {
return chainId;
}
public Atom getAtom() {
return atom;
}
public int getAtomId() {
return atomId;
}
}
/**
* Collects {@link WrappedAtom} instances into one {@link org.rcsb.cif.schema.mm.AtomSite}.
* @return an atom site record containing all atoms
*/
public static Collector toAtomSite() {
return Collector.of(AtomSiteCollector::new,
AtomSiteCollector::accept,
AtomSiteCollector::combine,
AtomSiteCollector::get);
}
static class AtomSiteCollector implements Consumer {
private final MmCifCategoryBuilder.AtomSiteBuilder atomSiteBuilder;
private final StrColumnBuilder groupPDB;
private final IntColumnBuilder id;
private final StrColumnBuilder typeSymbol;
private final StrColumnBuilder labelAtomId;
private final StrColumnBuilder labelAltId;
private final StrColumnBuilder labelCompId;
private final StrColumnBuilder labelAsymId;
private final StrColumnBuilder labelEntityId;
private final IntColumnBuilder labelSeqId;
private final StrColumnBuilder pdbxPDBInsCode;
private final FloatColumnBuilder cartnX;
private final FloatColumnBuilder cartnY;
private final FloatColumnBuilder cartnZ;
private final FloatColumnBuilder occupancy;
private final FloatColumnBuilder bIsoOrEquiv;
private final IntColumnBuilder authSeqId;
private final StrColumnBuilder authCompId;
private final StrColumnBuilder authAsymId;
private final StrColumnBuilder authAtomId;
private final IntColumnBuilder pdbxPDBModelNum;
AtomSiteCollector() {
this.atomSiteBuilder = new MmCifCategoryBuilder.AtomSiteBuilder(null);
this.groupPDB = atomSiteBuilder.enterGroupPDB();
this.id = atomSiteBuilder.enterId();
this.typeSymbol = atomSiteBuilder.enterTypeSymbol();
this.labelAtomId = atomSiteBuilder.enterLabelAtomId();
this.labelAltId = atomSiteBuilder.enterLabelAltId();
this.labelCompId = atomSiteBuilder.enterLabelCompId();
this.labelAsymId = atomSiteBuilder.enterLabelAsymId();
this.labelEntityId = atomSiteBuilder.enterLabelEntityId();
this.labelSeqId = atomSiteBuilder.enterLabelSeqId();
this.pdbxPDBInsCode = atomSiteBuilder.enterPdbxPDBInsCode();
this.cartnX = atomSiteBuilder.enterCartnX();
this.cartnY = atomSiteBuilder.enterCartnY();
this.cartnZ = atomSiteBuilder.enterCartnZ();
this.occupancy = atomSiteBuilder.enterOccupancy();
this.bIsoOrEquiv = atomSiteBuilder.enterBIsoOrEquiv();
this.authSeqId = atomSiteBuilder.enterAuthSeqId();
this.authCompId = atomSiteBuilder.enterAuthCompId();
this.authAsymId = atomSiteBuilder.enterAuthAsymId();
this.authAtomId = atomSiteBuilder.enterAuthAtomId();
this.pdbxPDBModelNum = atomSiteBuilder.enterPdbxPDBModelNum();
}
@Override
public void accept(WrappedAtom wrappedAtom) {
Atom atom = wrappedAtom.getAtom();
Group group = atom.getGroup();
Chain chain = group.getChain();
groupPDB.add(group.getType().equals(GroupType.HETATM) ? "HETATM" : "ATOM");
id.add(wrappedAtom.getAtomId());
Element element = atom.getElement();
typeSymbol.add(element.equals(Element.R) ? "X" : element.toString().toUpperCase());
labelAtomId.add(atom.getName());
Character altLoc = atom.getAltLoc();
if (altLoc == null || altLoc == ' ') {
labelAltId.markNextNotPresent();
} else {
labelAltId.add(String.valueOf(altLoc));
}
labelCompId.add(group.getPDBName());
labelAsymId.add(wrappedAtom.getChainId());
String entityId = "0";
int seqId = group.getResidueNumber().getSeqNum();
if (chain.getEntityInfo() != null) {
entityId = Integer.toString(chain.getEntityInfo().getMolId());
if (chain.getEntityInfo().getType() == EntityType.POLYMER) {
// this only makes sense for polymeric chains, non-polymer chains will never have seqres groups and
// there's no point in calling getAlignedResIndex
seqId = chain.getEntityInfo().getAlignedResIndex(group, chain);
}
}
labelEntityId.add(entityId);
labelSeqId.add(seqId);
String insCode = "";
if (group.getResidueNumber().getInsCode() != null) {
insCode = Character.toString(group.getResidueNumber().getInsCode());
}
if (insCode.isEmpty()) {
pdbxPDBInsCode.markNextUnknown();
} else {
pdbxPDBInsCode.add(insCode);
}
cartnX.add(atom.getX());
cartnY.add(atom.getY());
cartnZ.add(atom.getZ());
occupancy.add(atom.getOccupancy());
bIsoOrEquiv.add(atom.getTempFactor());
authSeqId.add(group.getResidueNumber().getSeqNum());
authCompId.add(group.getPDBName());
authAsymId.add(wrappedAtom.getChainName());
authAtomId.add(atom.getName());
pdbxPDBModelNum.add(wrappedAtom.getModel());
}
AtomSiteCollector combine(AtomSiteCollector other) {
throw new UnsupportedOperationException("impl by calling addAll for all collections");
}
Category get() {
groupPDB.leaveColumn();
id.leaveColumn();
typeSymbol.leaveColumn();
labelAtomId.leaveColumn();
labelAltId.leaveColumn();
labelCompId.leaveColumn();
labelAsymId.leaveColumn();
labelEntityId.leaveColumn();
labelSeqId.leaveColumn();
pdbxPDBInsCode.leaveColumn();
cartnX.leaveColumn();
cartnY.leaveColumn();
cartnZ.leaveColumn();
occupancy.leaveColumn();
bIsoOrEquiv.leaveColumn();
authSeqId.leaveColumn();
authCompId.leaveColumn();
authAsymId.leaveColumn();
authAtomId.leaveColumn();
pdbxPDBModelNum.leaveColumn();
return atomSiteBuilder.build();
}
}
}