
com.actelion.research.chem.io.pdb.parser.StructureAssembler Maven / Gradle / Ivy
package com.actelion.research.chem.io.pdb.parser;
import com.actelion.research.chem.Molecule3D;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* @author JW
* December 2019
* The StructureAssembler class takes a list of AtomRecords and constructs the 3D-Molecules. Protein atoms are grouped
* together, HETATM records are grouped according to their connectivity into non-bonded fragments, HETATM molecules that are
* connected to the protein are merged with it.
*
*/
public class StructureAssembler {
public static final String PROTEIN_GROUP = "protein";
public static final String SOLVENT_GROUP = "water";
public static final String LIGAND_GROUP = "ligand";
private Map> groups;
private List bondList;
private List atomRecords;
private List hetAtomRecords;
Map> mols;
public StructureAssembler(List bondList, List atomRecords, List hetAtomRecords) {
this.bondList = bondList;
this.atomRecords = atomRecords;
this.hetAtomRecords = hetAtomRecords;
groups = new HashMap>();
mols = new HashMap>();
}
public Map> assemble() {
group();
List protMols = new ArrayList<>();
mols.putIfAbsent(SOLVENT_GROUP, new ArrayList());
mols.putIfAbsent(LIGAND_GROUP, new ArrayList());
protMols.add(buildProtein());
mols.put(PROTEIN_GROUP, protMols);
buildHetResidues();
mols.forEach((k,v) -> v.forEach(e -> coupleBonds(e)));
return mols;
}
private void group() {
groups.put(PROTEIN_GROUP, atomRecords);
hetAtomRecords.forEach(e -> {
String s = e.getString();
if(groups.get(s)!=null) {
List li = groups.get(s);
li.add(e);
}
else {
List li = new ArrayList();
li.add(e);
groups.put(s, li);
}
});
for(int[] bond : bondList) {
try {
processBond(bond);
}
catch(Exception e) {
continue;
}
}
}
private Molecule3D buildProtein() {
ProteinSynthesizer proteinSynthesizer = new ProteinSynthesizer();
Map> residues_;
List proteinRecords = groups.get(PROTEIN_GROUP);
residues_ = proteinRecords.stream().collect(Collectors.groupingBy(AtomRecord::getString));
List residues = residues_.values().stream().map(v -> new Residue(v)).collect(Collectors.toList());
residues.sort((c1,c2) -> {
if(!c1.getChainID().equals(c2.getChainID())) //different chains
return c1.getChainID().compareTo(c2.getChainID());
else { //same chain
if(c1.getResnum()!=c2.getResnum())
return Integer.compare(c1.getResnum(), c2.getResnum());
else { //same chain, same residue number -> check insertion code
return c1.getInsertionCode().compareTo(c2.getInsertionCode());
}
}
});
List protMols = new ArrayList();
for(Residue residue : residues) {
Molecule3D fragment = residue.getMolecule();
if(fragment.getAtomAmino(0).trim().equals("ACT") || fragment.getAtomAmino(0).trim().equals("LIG")) {
mols.get(LIGAND_GROUP).add(fragment);
continue;
}
else if(fragment.getAtomAmino(0).trim().equals("HOH")) {
mols.get(SOLVENT_GROUP).add(fragment);
continue;
}
boolean coupled = proteinSynthesizer.addResidue(fragment);
if(coupled) {
if(residue.isTerminal()) {
protMols.add(proteinSynthesizer.retrieveProtein());
proteinSynthesizer = new ProteinSynthesizer();
}
else
continue;
}
else { //coupling failed
protMols.add(proteinSynthesizer.retrieveProtein());
proteinSynthesizer = new ProteinSynthesizer();
proteinSynthesizer.addResidue(fragment);
}
}
Molecule3D nextMol = proteinSynthesizer.retrieveProtein();
if(nextMol!=null && !protMols.contains(nextMol))
protMols.add(nextMol);
Molecule3D protein = protMols.stream().reduce((mol1,mol2) ->{
mol1.addMolecule(mol2);
return mol1;})
.get();
// protein.ensureHelperArrays(Molecule.cHelperCIP); // very expensive. Should not be done here just in case somebody might need parities
return protein;
}
private void buildHetResidues() {
for(String group : groups.keySet()) {
if(group.equals(PROTEIN_GROUP))
continue;
else {
List records = groups.get(group);
Residue atomGroup = new Residue(records);
Molecule3D fragment = atomGroup.getMolecule();
if(fragment.getAtomAmino(0).equals("HOH")) {
mols.putIfAbsent(SOLVENT_GROUP, new ArrayList());
mols.get(SOLVENT_GROUP).add(fragment);
}
else {
mols.putIfAbsent(LIGAND_GROUP, new ArrayList());
mols.get(LIGAND_GROUP).add(fragment);
}
}
}
}
private void coupleBonds(Molecule3D mol) {
for(int[] bond:bondList) {
int [] bondedAtoms = {-1,-1};
IntStream.range(0,mol.getAllAtoms()).forEach( e -> {
int pdbAtomID = mol.getAtomSequence(e);
if(pdbAtomID==bond[0])
bondedAtoms[0]=e;
else if(pdbAtomID==bond[1])
bondedAtoms[1]=e;
});
if(bondedAtoms[0]!=-1 && bondedAtoms[1]!=-1)
mol.addBond(bondedAtoms[0], bondedAtoms[1]);
}
}
/**
* merge atom groups that are connected by a bond
* @param bond
*/
private void processBond(int[] bond) {
int atom1 = bond[0];
int atom2 = bond[1];
String[] grps = new String[2];
groups.forEach((k,v) -> {
List atoms = v.stream().map(e -> e.getSerialId()).collect(Collectors.toList());
if(atoms.contains(atom1))
grps[0] = k;
if(atoms.contains(atom2))
grps[1] = k;
});
if(grps[0].equals(grps[1]))
return;
else {
if(grps[0].equals(PROTEIN_GROUP)) {
groups.get(grps[0]).addAll(groups.get(grps[1]));
groups.remove(grps[1]);
}
else if(grps[1].equals(PROTEIN_GROUP)) {
groups.get(grps[1]).addAll(groups.get(grps[0]));
groups.remove(grps[0]);
}
else {
groups.get(grps[0]).addAll(groups.get(grps[1]));
groups.remove(grps[1]);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy