All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.io.pdb.parser.StructureAssembler Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
package com.actelion.research.chem.io.pdb.parser;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import com.actelion.research.chem.Coordinates;
import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.Molecule3D;
import com.actelion.research.chem.io.pdb.converter.AminoAcidsLabeledContainer;
import com.actelion.research.chem.io.pdb.converter.BondsCalculator;
import com.actelion.research.chem.Molecule3D;

/**
 * @author JW
 * December 2019
 * The StructureAssembler class takes a list of AtomRecords and constructs the 3D-Molecules. Protein atoms are grouped 
 * together, HETATM records are grouped according to their connectivity into non-bonded fragments, HETATM molecules that are
 * connected to the protein are merged with it. 
 * 
 */


public class StructureAssembler {
	
	public static final String PROTEIN_GROUP = "protein";
	public static final String SOLVENT_GROUP = "water";
	public static final String LIGAND_GROUP = "ligand";
	
	private Map> groups;
	private List bondList;
	private List atomRecords;
	private List hetAtomRecords;
	Map> mols;

	
	public StructureAssembler(List bondList, List atomRecords, List hetAtomRecords) {
		this.bondList = bondList;
		this.atomRecords = atomRecords;
		this.hetAtomRecords = hetAtomRecords;
		groups = new HashMap>();
		mols = new HashMap>();
		
	}
	
	
	public Map> assemble() {
		group();
		List protMols = new ArrayList<>();
		mols.putIfAbsent(SOLVENT_GROUP, new ArrayList());
		mols.putIfAbsent(LIGAND_GROUP, new ArrayList());
		protMols.add(buildProtein());
		mols.put(PROTEIN_GROUP, protMols);
		buildHetResidues();
		mols.forEach((k,v) -> v.forEach(e -> coupleBonds(e)));
		return mols;
		
	}
	
	private void group() {
		groups.put(PROTEIN_GROUP, atomRecords);
		
		hetAtomRecords.forEach(e -> { 
			String s = e.getString();
			if(groups.get(s)!=null) {
				List li = groups.get(s);
				li.add(e);
			}
			else { 
				List li = new ArrayList();
				li.add(e);
				groups.put(s, li);
			}
		});
		for(int[] bond : bondList) {
			try {
				 processBond(bond);
			}
			catch(Exception e) {
				continue;
			}
			
		}
		
			
	
	}
	
	
	private Molecule3D buildProtein() {
		ProteinSynthesizer proteinSynthesizer = new ProteinSynthesizer();
		Map> residues_;
		List proteinRecords = groups.get(PROTEIN_GROUP);
		residues_ = proteinRecords.stream().collect(Collectors.groupingBy(AtomRecord::getString));
		List residues = residues_.values().stream().map(v -> new Residue(v)).collect(Collectors.toList());
		residues.sort((c1,c2) -> {
				if(!c1.getChainID().equals(c2.getChainID())) //different chains
					return c1.getChainID().compareTo(c2.getChainID());
				else { //same chain
					if(c1.getResnum()!=c2.getResnum())
						return Integer.compare(c1.getResnum(), c2.getResnum());
					else { //same chain, same residue number -> check insertion code
						return c1.getInsertionCode().compareTo(c2.getInsertionCode());
					}
				}
			});
		
		List protMols = new ArrayList();
		for(Residue residue : residues) {
				Molecule3D fragment = residue.getMolecule();
				if(fragment.getAtomAmino(0).trim().equals("ACT") || fragment.getAtomAmino(0).trim().equals("LIG")) {
					mols.get(LIGAND_GROUP).add(fragment);
					continue;
				}
				else if(fragment.getAtomAmino(0).trim().equals("HOH")) {
					mols.get(SOLVENT_GROUP).add(fragment);
					continue;
				}
				boolean coupled = proteinSynthesizer.addResidue(fragment);
				if(coupled) { 
					if(residue.isTerminal()) {
						protMols.add(proteinSynthesizer.retrieveProtein());
						proteinSynthesizer = new ProteinSynthesizer();
					}
					else 
						continue;
				}
				else { //coupling failed
					protMols.add(proteinSynthesizer.retrieveProtein());
					proteinSynthesizer = new ProteinSynthesizer();
					proteinSynthesizer.addResidue(fragment);
						
				}
			}
		Molecule3D nextMol = proteinSynthesizer.retrieveProtein();
		if(nextMol!=null && !protMols.contains(nextMol))
				protMols.add(nextMol);
		Molecule3D protein = protMols.stream().reduce((mol1,mol2) ->{
			mol1.addMolecule(mol2);
			return mol1;})
				.get();
		protein.ensureHelperArrays(Molecule.cHelperCIP);
		return protein;
		}
	
	private void buildHetResidues() {
		for(String group : groups.keySet()) {
			if(group.equals(PROTEIN_GROUP))
				continue;
			else {
				List records = groups.get(group);
				Residue atomGroup = new Residue(records);
				Molecule3D fragment = atomGroup.getMolecule();
				if(fragment.getAtomAmino(0).equals("HOH")) {
					mols.putIfAbsent(SOLVENT_GROUP, new ArrayList());
					mols.get(SOLVENT_GROUP).add(fragment);
				}
				else {
					mols.putIfAbsent(LIGAND_GROUP, new ArrayList());
					mols.get(LIGAND_GROUP).add(fragment);
				}
					
				
				
			}
		}

	}
				
	
		
	
	private void coupleBonds(Molecule3D mol) {
		for(int[] bond:bondList) {
			int [] bondedAtoms = {-1,-1};
			IntStream.range(0,mol.getAllAtoms()).forEach( e -> {
				int pdbAtomID = mol.getAtomSequence(e);
				if(pdbAtomID==bond[0])
					bondedAtoms[0]=e;
				else if(pdbAtomID==bond[1])
					bondedAtoms[1]=e;
			});
			if(bondedAtoms[0]!=-1 && bondedAtoms[1]!=-1)
				mol.addBond(bondedAtoms[0], bondedAtoms[1]);		
		}
	}
	
	/**
	 * merge atom groups that are connected by a bond
	 * @param bond
	 */
	private void processBond(int[] bond) {
		int atom1 = bond[0];
		int atom2 = bond[1];
		String[] grps = new String[2];
		groups.forEach((k,v) -> {
			List atoms = v.stream().map(e -> e.getSerialId()).collect(Collectors.toList());
			if(atoms.contains(atom1))
				grps[0] = k;
			if(atoms.contains(atom2))
				grps[1] = k;
			});

		if(grps[0].equals(grps[1]))
			return;
		else {
			if(grps[0].equals(PROTEIN_GROUP)) {
				groups.get(grps[0]).addAll(groups.get(grps[1]));
				groups.remove(grps[1]);
				}
			else if(grps[1].equals(PROTEIN_GROUP)) {
				groups.get(grps[1]).addAll(groups.get(grps[0]));
				groups.remove(grps[0]);
				}
			else {
				groups.get(grps[0]).addAll(groups.get(grps[1]));
				groups.remove(grps[1]);
				}
		}
	}
	


	
	

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy