org.biojava.nbio.structure.io.mmtf.MmtfUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.io.mmtf;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import javax.vecmath.Matrix4d;
import org.biojava.nbio.structure.AminoAcid;
import org.biojava.nbio.structure.AminoAcidImpl;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.Bond;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.ExperimentalTechnique;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.GroupType;
import org.biojava.nbio.structure.NucleotideImpl;
import org.biojava.nbio.structure.PDBCrystallographicInfo;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.chem.ChemComp;
import org.biojava.nbio.structure.chem.ChemCompGroupFactory;
import org.biojava.nbio.structure.chem.ChemCompTools;
import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.secstruc.SecStrucCalc;
import org.biojava.nbio.structure.secstruc.SecStrucState;
import org.biojava.nbio.structure.secstruc.SecStrucType;
import org.biojava.nbio.structure.xtal.CrystalCell;
import org.biojava.nbio.structure.xtal.SpaceGroup;
import org.rcsb.mmtf.dataholders.DsspType;
import org.rcsb.mmtf.utils.CodecUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A utils class of functions needed for Biojava to read and write to mmtf.
* @author Anthony Bradley
*
*/
public class MmtfUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(MmtfUtils.class);
/**
* This sets all microheterogeneous groups
* (previously alternate location groups) as separate groups.
* This is required because mmtf groups cannot have multiple HET codes.
* @param bioJavaStruct
*/
public static void fixMicroheterogenity(Structure bioJavaStruct) {
// Loop through the models
for (int i=0; i chains = bioJavaStruct.getModel(i);
for (Chain c : chains) {
// Build a new list of groups
List outGroups = new ArrayList<>();
for (Group g : c.getAtomGroups()) {
List removeList = new ArrayList<>();
for (Group altLoc : g.getAltLocs()) {
// Check if they are not equal -> microheterogenity
if(! altLoc.getPDBName().equals(g.getPDBName())) {
// Now add this group to the main list
removeList.add(altLoc);
}
}
// Add this group
outGroups.add(g);
// Remove any microhet alt locs
g.getAltLocs().removeAll(removeList);
// Add these microhet alt locs
outGroups.addAll(removeList);
}
c.setAtomGroups(outGroups);
}
}
}
/**
* Generate the secondary structure for a Biojava structure object.
* @param bioJavaStruct the Biojava structure for which it is to be calculate.
*/
public static void calculateDsspSecondaryStructure(Structure bioJavaStruct) {
SecStrucCalc ssp = new SecStrucCalc();
try{
ssp.calculate(bioJavaStruct, true);
}
catch(StructureException e) {
LOGGER.warn("Could not calculate secondary structure (error {}). Secondary structure annotation will be missing.", e.getMessage());
}
}
/**
* Get the string representation of a space group.
* @param spaceGroup the input SpaceGroup object
* @return the space group as a string.
*/
public static String getSpaceGroupAsString(SpaceGroup spaceGroup) {
if(spaceGroup==null){
return "NA";
}
else{
return spaceGroup.getShortSymbol();
}
}
/**
* Get the length six array of the unit cell information.
* @param xtalInfo the input PDBCrystallographicInfo object
* @return the length six float array
*/
public static float[] getUnitCellAsArray(PDBCrystallographicInfo xtalInfo) {
CrystalCell xtalCell = xtalInfo.getCrystalCell();
if(xtalCell==null){
return null;
}else{
float[] inputUnitCell = new float[6];
inputUnitCell[0] = (float) xtalCell.getA();
inputUnitCell[1] = (float) xtalCell.getB();
inputUnitCell[2] = (float) xtalCell.getC();
inputUnitCell[3] = (float) xtalCell.getAlpha();
inputUnitCell[4] = (float) xtalCell.getBeta();
inputUnitCell[5] = (float) xtalCell.getGamma();
return inputUnitCell;
}
}
/**
* Converts the set of experimental techniques to an array of strings.
* @param experimentalTechniques the input set of experimental techniques
* @return the array of strings describing the methods used.
*/
public static String[] techniquesToStringArray(Set experimentalTechniques) {
if(experimentalTechniques==null){
return new String[0];
}
String[] outArray = new String[experimentalTechniques.size()];
int index = 0;
for (ExperimentalTechnique experimentalTechnique : experimentalTechniques) {
outArray[index] = experimentalTechnique.getName();
index++;
}
return outArray;
}
/**
* Covert a Date object to ISO time format.
* @param inputDate The input date object
* @return the time in ISO time format
*/
public static String dateToIsoString(Date inputDate) {
DateFormat dateStringFormat = new SimpleDateFormat("yyyy-MM-dd");
return dateStringFormat.format(inputDate);
}
/**
* Convert a bioassembly information into a map of transform, chainindices it relates to.
* @param bioassemblyInfo the bioassembly info object for this structure
* @param chainIdToIndexMap the map of chain ids to the index that chain corresponds to.
* @return the bioassembly information (as primitive types).
*/
public static Map getTransformMap(BioAssemblyInfo bioassemblyInfo, Map chainIdToIndexMap) {
Map> matMap = new LinkedHashMap<>();
List transforms = bioassemblyInfo.getTransforms();
for (BiologicalAssemblyTransformation transformation : transforms) {
Matrix4d transMatrix = transformation.getTransformationMatrix();
String transChainId = transformation.getChainId();
if (!chainIdToIndexMap.containsKey(transChainId)){
continue;
}
int chainIndex = chainIdToIndexMap.get(transformation.getChainId());
if(matMap.containsKey(transMatrix)){
matMap.get(transMatrix).add(chainIndex);
}
else{
List chainIdList = new ArrayList<>();
chainIdList.add(chainIndex);
matMap.put(transMatrix, chainIdList);
}
}
Map outMap = new LinkedHashMap<>();
for (Entry> entry : matMap.entrySet()) {
outMap.put(convertToDoubleArray(entry.getKey()), CodecUtils.convertToIntArray(entry.getValue()));
}
return outMap;
}
/**
* Convert a four-d matrix to a double array. Row-packed.
* @param transformationMatrix the input matrix4d object
* @return the double array (16 long).
*/
public static double[] convertToDoubleArray(Matrix4d transformationMatrix) {
// Initialise the output array
double[] outArray = new double[16];
// Iterate over the matrix
for(int i=0; i<4; i++){
for(int j=0; j<4; j++){
// Now set this element
outArray[i*4+j] = transformationMatrix.getElement(i,j);
}
}
return outArray;
}
/**
* Count the total number of groups in the structure
* @param structure the input structure
* @return the total number of groups
*/
public static int getNumGroups(Structure structure) {
int count = 0;
for(int i=0; i getAtomsForGroup(Group inputGroup) {
Set uniqueAtoms = new HashSet<>();
List theseAtoms = new ArrayList<>();
for(Atom a: inputGroup.getAtoms()){
theseAtoms.add(a);
uniqueAtoms.add(a);
}
List altLocs = inputGroup.getAltLocs();
for(Group thisG: altLocs){
for(Atom a: thisG.getAtoms()){
if(uniqueAtoms.contains(a)){
continue;
}
theseAtoms.add(a);
}
}
return theseAtoms;
}
/**
* Find the number of bonds in a group
* @param atomsInGroup the list of atoms in the group
* @return the number of atoms in the group
*/
public static int getNumBondsInGroup(List atomsInGroup) {
int bondCounter = 0;
for(Atom atom : atomsInGroup) {
if(atom.getBonds()==null){
continue;
}
for(Bond bond : atom.getBonds()) {
// Now set the bonding information.
Atom other = bond.getOther(atom);
// If both atoms are in the group
if (atomsInGroup.indexOf(other)!=-1){
Integer firstBondIndex = atomsInGroup.indexOf(atom);
Integer secondBondIndex = atomsInGroup.indexOf(other);
// Don't add the same bond twice
if (firstBondIndex theseAtoms = new ArrayList<>();
List allChains = new ArrayList<>();
Map chainIdToIndexMap = new LinkedHashMap<>();
int chainCounter = 0;
int bondCount = 0;
mmtfSummaryDataBean.setAllAtoms(theseAtoms);
mmtfSummaryDataBean.setAllChains(allChains);
mmtfSummaryDataBean.setChainIdToIndexMap(chainIdToIndexMap);
for (int i=0; i chains = structure.getModel(i);
allChains.addAll(chains);
for (Chain chain : chains) {
String idOne = chain.getId();
if (!chainIdToIndexMap.containsKey(idOne)) {
chainIdToIndexMap.put(idOne, chainCounter);
}
chainCounter++;
for (Group g : chain.getAtomGroups()) {
for(Atom atom: getAtomsForGroup(g)){
theseAtoms.add(atom);
// If both atoms are in the group
if (atom.getBonds()!=null){
bondCount+=atom.getBonds().size();
}
}
}
}
}
// Assumes all bonds are referenced twice
mmtfSummaryDataBean.setNumBonds(bondCount/2);
return mmtfSummaryDataBean;
}
/**
* Get a list of N 4*4 matrices from a single list of doubles of length 16*N.
* @param ncsOperMatrixList the input list of doubles
* @return the list of 4*4 matrics
*/
public static Matrix4d[] getNcsAsMatrix4d(double[][] ncsOperMatrixList) {
if(ncsOperMatrixList==null){
return null;
}
int numMats = ncsOperMatrixList.length;
if(numMats==0){
return null;
}
if(numMats==1 && ncsOperMatrixList[0].length==0){
return null;
}
Matrix4d[] outList = new Matrix4d[numMats];
for(int i=0; i seqResGroups = chain.getSeqResGroups();
addGroupAtId(seqResGroups, group, sequenceIndexId);
}
/**
* Add the missing groups to the SeqResGroups.
* @param modelChain the chain to add the information for
* @param sequence the sequence of the construct
*/
public static void addSeqRes(Chain modelChain, String sequence) {
List seqResGroups = modelChain.getSeqResGroups();
GroupType chainType = getChainType(modelChain.getAtomGroups());
for(int i=0; i i) {
group=seqResGroups.get(i);
}
if(group!=null){
continue;
}
group = getSeqResGroup(singleLetterCode, chainType);
addGroupAtId(seqResGroups, group, i);
}
}
private static GroupType getChainType(List groups) {
for(Group group : groups) {
if(group!=null && group.getType()!=GroupType.HETATM){
return group.getType();
}
}
return GroupType.HETATM;
}
private static void addGroupAtId(List seqResGroups, T group, int sequenceIndexId) {
while(seqResGroups.size()<=sequenceIndexId){
seqResGroups.add(null);
}
if(sequenceIndexId>=0){
seqResGroups.set(sequenceIndexId, group);
}
}
private static Group getSeqResGroup(char singleLetterCode, GroupType type) {
if(type==GroupType.AMINOACID){
String threeLetter = ChemCompTools.getAminoThreeLetter(singleLetterCode);
if (threeLetter == null) return null;
ChemComp chemComp = ChemCompGroupFactory.getChemComp(threeLetter);
AminoAcidImpl a = new AminoAcidImpl();
a.setRecordType(AminoAcid.SEQRESRECORD);
a.setAminoType(singleLetterCode);
a.setPDBName(threeLetter);
a.setChemComp(chemComp);
return a;
} else if (type==GroupType.NUCLEOTIDE) {
String twoLetter = ChemCompTools.getDNATwoLetter(singleLetterCode);
if (twoLetter == null) return null;
ChemComp chemComp = ChemCompGroupFactory.getChemComp(twoLetter);
NucleotideImpl n = new NucleotideImpl();
n.setPDBName(twoLetter);
n.setChemComp(chemComp);
return n;
}
else{
return null;
}
}
}