All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.io.cif.AbstractCifFileSupplier Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
package org.biojava.nbio.structure.io.cif;

import org.biojava.nbio.structure.*;
import org.biojava.nbio.structure.xtal.CrystalCell;
import org.biojava.nbio.structure.xtal.SpaceGroup;
import org.rcsb.cif.CifBuilder;
import org.rcsb.cif.model.Category;
import org.rcsb.cif.model.CifFile;
import org.rcsb.cif.model.FloatColumnBuilder;
import org.rcsb.cif.model.IntColumnBuilder;
import org.rcsb.cif.model.StrColumnBuilder;
import org.rcsb.cif.schema.StandardSchemata;
import org.rcsb.cif.schema.mm.MmCifBlockBuilder;
import org.rcsb.cif.schema.mm.MmCifCategoryBuilder;
import org.rcsb.cif.schema.mm.MmCifFileBuilder;

import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.stream.Collector;
import java.util.stream.Collectors;

/**
 * Convert a BioJava object to a CifFile.
 * @author Sebastian Bittrich
 * @since 5.3.0
 */
public abstract class AbstractCifFileSupplier implements CifFileSupplier {
    protected CifFile getInternal(Structure structure, List wrappedAtoms) {
        // for now BioJava only considered 3 categories for create a Cif representation of a structure

        // cell
        CrystalCell crystalCell = structure.getPDBHeader().getCrystallographicInfo().getCrystalCell();
        // symmetry
        SpaceGroup spaceGroup = structure.getPDBHeader().getCrystallographicInfo().getSpaceGroup();
        // atom_site
        Category atomSite = wrappedAtoms.stream().collect(toAtomSite());
        // entity information
        List entityInfos = structure.getEntityInfos();

        MmCifBlockBuilder blockBuilder = CifBuilder.enterFile(StandardSchemata.MMCIF)
                .enterBlock(structure.getPdbId() == null? "" : structure.getPdbId().getId());

        blockBuilder.enterStructKeywords().enterText()
        .add(String.join(", ", structure.getPDBHeader().getKeywords()))
        .leaveColumn().leaveCategory();

        if (atomSite.isDefined() && atomSite.getRowCount() > 0) {
            // set atom site
            blockBuilder.addCategory(atomSite);
        }

        if (crystalCell != null) {
            // set cell category
            blockBuilder.enterCell()
                    .enterLengthA()
                    .add(crystalCell.getA())
                    .leaveColumn()

                    .enterLengthB()
                    .add(crystalCell.getB())
                    .leaveColumn()

                    .enterLengthC()
                    .add(crystalCell.getC())
                    .leaveColumn()

                    .enterAngleAlpha()
                    .add(crystalCell.getAlpha())
                    .leaveColumn()

                    .enterAngleBeta()
                    .add(crystalCell.getBeta())
                    .leaveColumn()

                    .enterAngleGamma()
                    .add(crystalCell.getGamma())
                    .leaveColumn()
                    .leaveCategory();
        }

        if (spaceGroup != null) {
            // set symmetry category
            blockBuilder.enterSymmetry()
                    .enterSpaceGroupNameH_M()
                    .add(spaceGroup.getShortSymbol())
                    .leaveColumn()
                    .leaveCategory();
        }

        if (entityInfos != null) {

            String[] entityIds = new String[entityInfos.size()];
            String[] entityTypes = new String[entityInfos.size()];
            String[] entityDescriptions = new String[entityInfos.size()];

            for (int i=0; i e.getType() == EntityType.POLYMER).map(e -> Integer.toString(e.getMolId())).toArray(String[]::new);
            String[] polyEntitySeqs = entityInfos.stream().filter(e -> e.getType() == EntityType.POLYMER).map(e -> e.getChains().get(0).getSeqResSequence()).toArray(String[]::new);

            blockBuilder.enterEntity()
                    .enterId()
                    .add(entityIds)
                    .leaveColumn()

                    .enterType()
                    .add(entityTypes)
                    .leaveColumn()

                    .enterPdbxDescription()
                    .add(entityDescriptions)
                    .leaveColumn()

                    .leaveCategory();

            blockBuilder.enterEntityPoly()
                    .enterEntityId()
                    .add(polyEntityIds)
                    .leaveColumn()

                    .enterPdbxSeqOneLetterCodeCan()
                    .add(polyEntitySeqs)
                    .leaveColumn()

                    .leaveCategory();
        }

        return blockBuilder.leaveBlock().leaveFile();
    }

    protected void handleChain(Chain chain, int model, List wrappedAtoms) {
        final String chainName = chain.getName();
        final String chainId = chain.getId();
        for (Group group : chain.getAtomGroups()) {
            // The alt locs can have duplicates, since at parsing time we make sure that all alt loc groups have
            // all atoms (see StructureTools#cleanUpAltLocs)
            // Thus we have to remove duplicates here by using the atom id
            // See issue https://github.com/biojava/biojava/issues/778 and
            // TestAltLocs.testMmcifWritingAllAltlocs/testMmcifWritingPartialAltlocs
            Map uniqueAtoms = new LinkedHashMap<>();
            for (int atomIndex = 0; atomIndex < group.size(); atomIndex++) {
                Atom atom = group.getAtom(atomIndex);
                if (atom == null) {
                    continue;
                }

                uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial()));
            }

            if (group.hasAltLoc()) {
                for (Group alt : group.getAltLocs()) {
                    for (int atomIndex = 0; atomIndex < alt.size(); atomIndex++) {
                        Atom atom = alt.getAtom(atomIndex);
                        if (atom == null) {
                            continue;
                        }

                        uniqueAtoms.put(atom.getPDBserial(), new WrappedAtom(model, chainName, chainId, atom, atom.getPDBserial()));
                    }
                }
            }

            wrappedAtoms.addAll(uniqueAtoms.values());
        }
    }

    /**
     * Wrapped atoms represent individual atoms enriched with model- and chain-level information. Also, gives control
     * over the atomId field. Useful to convert structures (and subsets thereof) to their mmCIF representation.
     */
    public static class WrappedAtom {
        private final int model;
        private final String chainName;
        private final String chainId;
        private final Atom atom;
        private final int atomId;

        /**
         * Construct a new atoms.
         * @param model the model number
         * @param chainName the label_asym_id
         * @param chainId the auth_asym_id
         * @param atom the atom instance itself
         * @param atomId the label_atom_id
         */
        public WrappedAtom(int model, String chainName, String chainId, Atom atom, int atomId) {
            this.model = model;
            this.chainName = chainName;
            this.chainId = chainId;
            this.atom = atom;
            this.atomId = atomId;
        }

        public int getModel() {
            return model;
        }

        public String getChainName() {
            return chainName;
        }

        public String getChainId() {
            return chainId;
        }

        public Atom getAtom() {
            return atom;
        }

        public int getAtomId() {
            return atomId;
        }
    }

    /**
     * Collects {@link WrappedAtom} instances into one {@link org.rcsb.cif.schema.mm.AtomSite}.
     * @return an atom site record containing all atoms
     */
    public static Collector toAtomSite() {
        return Collector.of(AtomSiteCollector::new,
                AtomSiteCollector::accept,
                AtomSiteCollector::combine,
                AtomSiteCollector::get);
    }

    static class AtomSiteCollector implements Consumer {
        private final MmCifCategoryBuilder.AtomSiteBuilder atomSiteBuilder;
        private final StrColumnBuilder groupPDB;
        private final IntColumnBuilder id;
        private final StrColumnBuilder typeSymbol;
        private final StrColumnBuilder labelAtomId;
        private final StrColumnBuilder labelAltId;
        private final StrColumnBuilder labelCompId;
        private final StrColumnBuilder labelAsymId;
        private final StrColumnBuilder labelEntityId;
        private final IntColumnBuilder labelSeqId;
        private final StrColumnBuilder pdbxPDBInsCode;
        private final FloatColumnBuilder cartnX;
        private final FloatColumnBuilder cartnY;
        private final FloatColumnBuilder cartnZ;
        private final FloatColumnBuilder occupancy;
        private final FloatColumnBuilder bIsoOrEquiv;
        private final IntColumnBuilder authSeqId;
        private final StrColumnBuilder authCompId;
        private final StrColumnBuilder authAsymId;
        private final StrColumnBuilder authAtomId;
        private final IntColumnBuilder pdbxPDBModelNum;

        AtomSiteCollector() {
            this.atomSiteBuilder = new MmCifCategoryBuilder.AtomSiteBuilder(null);
            this.groupPDB = atomSiteBuilder.enterGroupPDB();
            this.id = atomSiteBuilder.enterId();
            this.typeSymbol = atomSiteBuilder.enterTypeSymbol();
            this.labelAtomId = atomSiteBuilder.enterLabelAtomId();
            this.labelAltId = atomSiteBuilder.enterLabelAltId();
            this.labelCompId = atomSiteBuilder.enterLabelCompId();
            this.labelAsymId = atomSiteBuilder.enterLabelAsymId();
            this.labelEntityId = atomSiteBuilder.enterLabelEntityId();
            this.labelSeqId = atomSiteBuilder.enterLabelSeqId();
            this.pdbxPDBInsCode = atomSiteBuilder.enterPdbxPDBInsCode();
            this.cartnX = atomSiteBuilder.enterCartnX();
            this.cartnY = atomSiteBuilder.enterCartnY();
            this.cartnZ = atomSiteBuilder.enterCartnZ();
            this.occupancy = atomSiteBuilder.enterOccupancy();
            this.bIsoOrEquiv = atomSiteBuilder.enterBIsoOrEquiv();
            this.authSeqId = atomSiteBuilder.enterAuthSeqId();
            this.authCompId = atomSiteBuilder.enterAuthCompId();
            this.authAsymId = atomSiteBuilder.enterAuthAsymId();
            this.authAtomId = atomSiteBuilder.enterAuthAtomId();
            this.pdbxPDBModelNum = atomSiteBuilder.enterPdbxPDBModelNum();
        }

        @Override
        public void accept(WrappedAtom wrappedAtom) {
            Atom atom = wrappedAtom.getAtom();
            Group group = atom.getGroup();
            Chain chain = group.getChain();

            groupPDB.add(group.getType().equals(GroupType.HETATM) ? "HETATM" : "ATOM");
            id.add(wrappedAtom.getAtomId());
            Element element = atom.getElement();
            typeSymbol.add(element.equals(Element.R) ? "X" : element.toString().toUpperCase());
            labelAtomId.add(atom.getName());
            Character altLoc = atom.getAltLoc();
            if (altLoc == null || altLoc == ' ') {
                labelAltId.markNextNotPresent();
            } else {
                labelAltId.add(String.valueOf(altLoc));
            }
            labelCompId.add(group.getPDBName());
            labelAsymId.add(wrappedAtom.getChainId());
            String entityId = "0";
            int seqId = group.getResidueNumber().getSeqNum();
            if (chain.getEntityInfo() != null) {
                entityId = Integer.toString(chain.getEntityInfo().getMolId());
                if (chain.getEntityInfo().getType() == EntityType.POLYMER) {
                    // this only makes sense for polymeric chains, non-polymer chains will never have seqres groups and
                    // there's no point in calling getAlignedResIndex
                    seqId = chain.getEntityInfo().getAlignedResIndex(group, chain);
                }
            }
            labelEntityId.add(entityId);
            labelSeqId.add(seqId);
            String insCode = "";
            if (group.getResidueNumber().getInsCode() != null) {
                insCode = Character.toString(group.getResidueNumber().getInsCode());
            }
            if (insCode.isEmpty()) {
                pdbxPDBInsCode.markNextUnknown();
            } else {
                pdbxPDBInsCode.add(insCode);
            }
            cartnX.add(atom.getX());
            cartnY.add(atom.getY());
            cartnZ.add(atom.getZ());
            occupancy.add(atom.getOccupancy());
            bIsoOrEquiv.add(atom.getTempFactor());
            authSeqId.add(group.getResidueNumber().getSeqNum());
            authCompId.add(group.getPDBName());
            authAsymId.add(wrappedAtom.getChainName());
            authAtomId.add(atom.getName());
            pdbxPDBModelNum.add(wrappedAtom.getModel());
        }

        AtomSiteCollector combine(AtomSiteCollector other) {
            throw new UnsupportedOperationException("impl by calling addAll for all collections");
        }

        Category get() {
            groupPDB.leaveColumn();
            id.leaveColumn();
            typeSymbol.leaveColumn();
            labelAtomId.leaveColumn();
            labelAltId.leaveColumn();
            labelCompId.leaveColumn();
            labelAsymId.leaveColumn();
            labelEntityId.leaveColumn();
            labelSeqId.leaveColumn();
            pdbxPDBInsCode.leaveColumn();
            cartnX.leaveColumn();
            cartnY.leaveColumn();
            cartnZ.leaveColumn();
            occupancy.leaveColumn();
            bIsoOrEquiv.leaveColumn();
            authSeqId.leaveColumn();
            authCompId.leaveColumn();
            authAsymId.leaveColumn();
            authAtomId.leaveColumn();
            pdbxPDBModelNum.leaveColumn();
            return atomSiteBuilder.build();
        }
    }
}