All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.structure.io.mmtf.MmtfUtils Maven / Gradle / Ivy

There is a newer version: 7.1.3
Show newest version
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */
package org.biojava.nbio.structure.io.mmtf;

import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import javax.vecmath.Matrix4d;

import org.biojava.nbio.structure.AminoAcid;
import org.biojava.nbio.structure.AminoAcidImpl;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Bond;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.ExperimentalTechnique;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.GroupType;
import org.biojava.nbio.structure.NucleotideImpl;
import org.biojava.nbio.structure.PDBCrystallographicInfo;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.chem.ChemComp;
import org.biojava.nbio.structure.chem.ChemCompGroupFactory;
import org.biojava.nbio.structure.chem.ChemCompTools;
import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.secstruc.SecStrucCalc;
import org.biojava.nbio.structure.secstruc.SecStrucState;
import org.biojava.nbio.structure.secstruc.SecStrucType;
import org.biojava.nbio.structure.xtal.CrystalCell;
import org.biojava.nbio.structure.xtal.SpaceGroup;
import org.rcsb.mmtf.dataholders.DsspType;
import org.rcsb.mmtf.utils.CodecUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A utils class of functions needed for Biojava to read and write to mmtf.
 * @author Anthony Bradley
 *
 */
public class MmtfUtils {

	private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class);

	/**
	 * This sets all microheterogeneous groups
	 * (previously alternate location groups) as separate groups.
	 * This is required because mmtf groups cannot have multiple HET codes.
	 * @param bioJavaStruct
	 */
	public static void fixMicroheterogenity(Structure bioJavaStruct) {
		// Loop through the models
		for (int i=0; i chains = bioJavaStruct.getModel(i);
			for (Chain c : chains) {
				// Build a new list of groups
				List outGroups = new ArrayList<>();
				for (Group g : c.getAtomGroups()) {
					List removeList = new ArrayList<>();
					for (Group altLoc : g.getAltLocs()) {
						// Check if they are not equal -> microheterogenity
						if(! altLoc.getPDBName().equals(g.getPDBName())) {
							// Now add this group to the main list
							removeList.add(altLoc);
						}
					}
					// Add this group
					outGroups.add(g);
					// Remove any microhet alt locs
					g.getAltLocs().removeAll(removeList);
					// Add these microhet alt locs
					outGroups.addAll(removeList);
				}
				c.setAtomGroups(outGroups);
			}
		}
	}


	/**
	 * Generate the secondary structure for a Biojava structure object.
	 * @param bioJavaStruct the Biojava structure for which it is to be calculate.
	 */
	public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) {
		SecStrucCalc ssp = new SecStrucCalc();

		try{
			ssp.calculate(bioJavaStruct, true);
		}
		catch(StructureException e) {
			LOGGER.warn("Could not calculate secondary structure (error {}). Secondary structure annotation will be missing.", e.getMessage());
		}
	}

	/**
	 * Get the string representation of a space group.
	 * @param spaceGroup the input SpaceGroup object
	 * @return the space group as a string.
	 */
	public static String getSpaceGroupAsString(SpaceGroup spaceGroup) {
		if(spaceGroup==null){
			return "NA";
		}
		else{
			return spaceGroup.getShortSymbol();
		}
	}

	/**
	 * Get the length six array of the unit cell information.
	 * @param xtalInfo the input PDBCrystallographicInfo object
	 * @return the length six float array
	 */
	public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) {
		CrystalCell xtalCell = xtalInfo.getCrystalCell();
		if(xtalCell==null){
			return null;
		}else{
			float[] inputUnitCell = new float[6];
			inputUnitCell[0] = (float) xtalCell.getA();
			inputUnitCell[1] = (float) xtalCell.getB();
			inputUnitCell[2] = (float) xtalCell.getC();
			inputUnitCell[3] = (float) xtalCell.getAlpha();
			inputUnitCell[4] = (float) xtalCell.getBeta();
			inputUnitCell[5] = (float) xtalCell.getGamma();
			return inputUnitCell;
		}
	}

	/**
	 * Converts the set of experimental techniques to an array of strings.
	 * @param experimentalTechniques the input set of experimental techniques
	 * @return the array of strings describing the methods used.
	 */
	public static String[] techniquesToStringArray(Set experimentalTechniques) {
		if(experimentalTechniques==null){
			return new String[0];
		}
		String[] outArray = new String[experimentalTechniques.size()];
		int index = 0;
		for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) {
			outArray[index] = experimentalTechnique.getName();
			index++;
		}
		return outArray;
	}

	/**
	 * Covert a Date object to ISO time format.
	 * @param inputDate The input date object
	 * @return the time in ISO time format
	 */
	public static String dateToIsoString(Date inputDate) {
		DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd");
		return dateStringFormat.format(inputDate);
	}

	/**
	 * Convert a bioassembly information into a map of transform, chainindices it relates to.
	 * @param bioassemblyInfo  the bioassembly info object for this structure
	 * @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to.
	 * @return the bioassembly information (as primitive types).
	 */
	public static Map getTransformMap(BioAssemblyInfo bioassemblyInfo, Map chainIdToIndexMap) {
	    Map> matMap = new LinkedHashMap<>();
		List transforms = bioassemblyInfo.getTransforms();
		for (BiologicalAssemblyTransformation transformation : transforms) {
			Matrix4d transMatrix = transformation.getTransformationMatrix();
			String transChainId = transformation.getChainId();
			if (!chainIdToIndexMap.containsKey(transChainId)){
				continue;
			}
			int chainIndex = chainIdToIndexMap.get(transformation.getChainId());
			if(matMap.containsKey(transMatrix)){
				matMap.get(transMatrix).add(chainIndex);
			}
			else{
				List chainIdList = new ArrayList<>();
				chainIdList.add(chainIndex);
				matMap.put(transMatrix, chainIdList);
			}
		}

	    Map outMap = new LinkedHashMap<>();
		for (Entry> entry : matMap.entrySet()) {
			outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue()));
		}
		return outMap;
	}

	/**
	 * Convert a four-d matrix to a double array. Row-packed.
	 * @param transformationMatrix the input matrix4d object
	 * @return the double array (16 long).
	 */
	public static double[] convertToDoubleArray(Matrix4d transformationMatrix) {
		// Initialise the output array
		double[] outArray = new double[16];
		// Iterate over the matrix
		for(int i=0; i<4; i++){
			for(int j=0; j<4; j++){
				// Now set this element
				outArray[i*4+j] = transformationMatrix.getElement(i,j);
			}
		}
		return outArray;
	}

	/**
	 * Count the total number of groups in the structure
	 * @param structure the input structure
	 * @return the total number of groups
	 */
	public static int getNumGroups(Structure structure) {
		int count = 0;
		for(int i=0; i getAtomsForGroup(Group inputGroup) {
		Set uniqueAtoms = new HashSet<>();
		List theseAtoms = new ArrayList<>();
		for(Atom a: inputGroup.getAtoms()){
			theseAtoms.add(a);
			uniqueAtoms.add(a);
		}
		List altLocs = inputGroup.getAltLocs();
		for(Group thisG: altLocs){
			for(Atom a: thisG.getAtoms()){
				if(uniqueAtoms.contains(a)){
					continue;
				}
				theseAtoms.add(a);
			}
		}
		return theseAtoms;
	}

	/**
	 * Find the number of bonds in a group
	 * @param atomsInGroup the list of atoms in the group
	 * @return the number of atoms in the group
	 */
	public static int getNumBondsInGroup(List atomsInGroup) {
		int bondCounter = 0;
		for(Atom atom : atomsInGroup) {
			if(atom.getBonds()==null){
				continue;
			}
			for(Bond bond : atom.getBonds()) {
				// Now set the bonding information.
				Atom other = bond.getOther(atom);
				// If both atoms are in the group
				if (atomsInGroup.indexOf(other)!=-1){
					Integer firstBondIndex = atomsInGroup.indexOf(atom);
					Integer secondBondIndex = atomsInGroup.indexOf(other);
					// Don't add the same bond twice
					if (firstBondIndex theseAtoms = new ArrayList<>();
		List allChains = new ArrayList<>();
		Map chainIdToIndexMap = new LinkedHashMap<>();
		int chainCounter = 0;
		int bondCount = 0;
		mmtfSummaryDataBean.setAllAtoms(theseAtoms);
		mmtfSummaryDataBean.setAllChains(allChains);
		mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap);
		for (int i=0; i chains = structure.getModel(i);
			allChains.addAll(chains);
			for (Chain chain : chains) {
				String idOne = chain.getId();
				if (!chainIdToIndexMap.containsKey(idOne)) {
					chainIdToIndexMap.put(idOne, chainCounter);
				}
				chainCounter++;
				for (Group g : chain.getAtomGroups()) {
					for(Atom atom: getAtomsForGroup(g)){
						theseAtoms.add(atom);
						// If both atoms are in the group
						if (atom.getBonds()!=null){
							bondCount+=atom.getBonds().size();
						}
					}
				}
			}
		}
		// Assumes all bonds are referenced twice
		mmtfSummaryDataBean.setNumBonds(bondCount/2);
		return mmtfSummaryDataBean;

	}

	/**
	 * Get a list of N 4*4 matrices from a single list of doubles of length 16*N.
	 * @param ncsOperMatrixList the input list of doubles
	 * @return the list of 4*4 matrics
	 */
	public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) {
		if(ncsOperMatrixList==null){
			return null;
		}
		int numMats = ncsOperMatrixList.length;
		if(numMats==0){
			return null;
		}
		if(numMats==1 && ncsOperMatrixList[0].length==0){
			return null;
		}
		Matrix4d[] outList = new Matrix4d[numMats];
		for(int i=0; i seqResGroups = chain.getSeqResGroups();
		addGroupAtId(seqResGroups, group, sequenceIndexId);
	}

	/**
	 * Add the missing groups to the SeqResGroups.
	 * @param modelChain the chain to add the information for
	 * @param sequence the sequence of the construct
	 */
	public static void addSeqRes(Chain modelChain, String sequence) {

		List seqResGroups = modelChain.getSeqResGroups();
		GroupType chainType = getChainType(modelChain.getAtomGroups());

		for(int i=0; i i) {
				group=seqResGroups.get(i);
			}
			if(group!=null){
				continue;
			}

			group = getSeqResGroup(singleLetterCode, chainType);
			addGroupAtId(seqResGroups, group, i);
		}
	}

	private static GroupType getChainType(List groups) {
		for(Group group : groups) {
			if(group!=null && group.getType()!=GroupType.HETATM){
				return group.getType();
			}
		}
		return GroupType.HETATM;
	}

	private static  void addGroupAtId(List seqResGroups, T group, int sequenceIndexId) {
		while(seqResGroups.size()<=sequenceIndexId){
			seqResGroups.add(null);
		}
		if(sequenceIndexId>=0){
			seqResGroups.set(sequenceIndexId, group);
		}
	}

	private static Group getSeqResGroup(char singleLetterCode, GroupType type) {

		if(type==GroupType.AMINOACID){
			String threeLetter = ChemCompTools.getAminoThreeLetter(singleLetterCode);
			if (threeLetter == null) return null;
			ChemComp chemComp = ChemCompGroupFactory.getChemComp(threeLetter);

			AminoAcidImpl a = new AminoAcidImpl();
			a.setRecordType(AminoAcid.SEQRESRECORD);
			a.setAminoType(singleLetterCode);
			a.setPDBName(threeLetter);
			a.setChemComp(chemComp);
			return a;

		} else if (type==GroupType.NUCLEOTIDE) {
			String twoLetter = ChemCompTools.getDNATwoLetter(singleLetterCode);
			if (twoLetter == null) return null;
			ChemComp chemComp = ChemCompGroupFactory.getChemComp(twoLetter);

			NucleotideImpl n = new NucleotideImpl();
			n.setPDBName(twoLetter);
			n.setChemComp(chemComp);
			return n;
		}
		else{
			return null;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy