All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.io.pdb.parser.StructureAssembler Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
package com.actelion.research.chem.io.pdb.parser;

import com.actelion.research.chem.Molecule3D;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/**
 * @author JW
 * December 2019
 * The StructureAssembler class takes a list of AtomRecords and constructs the 3D-Molecules. Protein atoms are grouped 
 * together, HETATM records are grouped according to their connectivity into non-bonded fragments, HETATM molecules that are
 * connected to the protein are merged with it. 
 * 
 */


public class StructureAssembler {
	
	public static final String PROTEIN_GROUP = "protein";
	public static final String SOLVENT_GROUP = "water";
	public static final String LIGAND_GROUP = "ligand";
	
	private Map> groups;
	private final List bondList;
	private final List protAtomRecords;
	private final List hetAtomRecords;
	private boolean detachCovalentLigands;
	private Map> mols;


	public StructureAssembler(List bondList, List protAtomRecords, List hetAtomRecords) {
		this.bondList = bondList;
		this.protAtomRecords = protAtomRecords;
		this.hetAtomRecords = hetAtomRecords;
	}

	public void setDetachCovalentLigands(boolean b) {
		detachCovalentLigands = b;
	}

	public Map> assemble() {
		groups = new HashMap<>();
		mols = new HashMap<>();

		group();
		List protMols = new ArrayList<>();
		mols.putIfAbsent(SOLVENT_GROUP, new ArrayList<>());
		mols.putIfAbsent(LIGAND_GROUP, new ArrayList<>());
		protMols.add(buildProtein());
		mols.put(PROTEIN_GROUP, protMols);
		buildHetResidues();
		mols.forEach((k,v) -> v.forEach(e -> coupleBonds(e)));
		return mols;
	}
	
	private void group() {
		groups.put(PROTEIN_GROUP, new ArrayList<>(protAtomRecords));
		
		hetAtomRecords.forEach(e -> {
			List li = groups.computeIfAbsent(e.getString(), k -> new ArrayList<>());
			li.add(e);
		});

		for(int[] bond : bondList)
			processBond(bond);
	}
	
	private Molecule3D buildProtein() {
		ProteinSynthesizer proteinSynthesizer = new ProteinSynthesizer();
		Map> residues_;
		List proteinRecords = groups.get(PROTEIN_GROUP);
		residues_ = proteinRecords.stream().collect(Collectors.groupingBy(AtomRecord::getString));
		List residues = residues_.values().stream().map(v -> new Residue(v)).collect(Collectors.toList());
		residues.sort((c1,c2) -> {
				if(!c1.getChainID().equals(c2.getChainID())) //different chains
					return c1.getChainID().compareTo(c2.getChainID());
				else { //same chain
					if(c1.getResnum()!=c2.getResnum())
						return Integer.compare(c1.getResnum(), c2.getResnum());
					else { //same chain, same residue number -> check insertion code
						return c1.getInsertionCode().compareTo(c2.getInsertionCode());
					}
				}
			});
		
		List protMols = new ArrayList();
		for(Residue residue : residues) {
				Molecule3D fragment = residue.getMolecule();
				if(fragment.getAtomAmino(0).trim().equals("ACT") || fragment.getAtomAmino(0).trim().equals("LIG")) {
					mols.get(LIGAND_GROUP).add(fragment);
					continue;
				}
				else if(fragment.getAtomAmino(0).trim().equals("HOH")) {
					mols.get(SOLVENT_GROUP).add(fragment);
					continue;
				}
				boolean coupled = proteinSynthesizer.addResidue(fragment);
				if(coupled) { 
					if(residue.isTerminal()) {
						protMols.add(proteinSynthesizer.retrieveProtein());
						proteinSynthesizer = new ProteinSynthesizer();
					}
				}
				else { //coupling failed
					protMols.add(proteinSynthesizer.retrieveProtein());
					proteinSynthesizer = new ProteinSynthesizer();
					proteinSynthesizer.addResidue(fragment);
				}
			}
		Molecule3D nextMol = proteinSynthesizer.retrieveProtein();
		if(nextMol!=null && !protMols.contains(nextMol))
				protMols.add(nextMol);
		Molecule3D protein = protMols.stream().reduce((mol1,mol2) ->{
			mol1.addMolecule(mol2);
			return mol1;})
				.get();
		return protein;
		}
	
	private void buildHetResidues() {
		for(String group : groups.keySet()) {
			if(!group.equals(PROTEIN_GROUP)) {
				List records = groups.get(group);
				Residue atomGroup = new Residue(records);
				Molecule3D fragment = atomGroup.getMolecule();
				if(fragment.getAtomAmino(0).equals("HOH")) {
					mols.putIfAbsent(SOLVENT_GROUP, new ArrayList());
					mols.get(SOLVENT_GROUP).add(fragment);
				}
				else {
					mols.putIfAbsent(LIGAND_GROUP, new ArrayList());
					mols.get(LIGAND_GROUP).add(fragment);
				}
			}
		}
	}
	
	private void coupleBonds(Molecule3D mol) {
		for(int[] bond:bondList) {
			int [] bondedAtoms = {-1,-1};
			IntStream.range(0,mol.getAllAtoms()).forEach( e -> {
				int pdbAtomID = mol.getAtomSequence(e);
				if(pdbAtomID==bond[0])
					bondedAtoms[0]=e;
				else if(pdbAtomID==bond[1])
					bondedAtoms[1]=e;
			});
			if(bondedAtoms[0]!=-1 && bondedAtoms[1]!=-1)
				mol.addBond(bondedAtoms[0], bondedAtoms[1]);		
		}
	}
	
	/**
	 * merge atom groups that are connected by a bond
	 * @param bond
	 */
	private void processBond(int[] bond) {
		int atom1 = bond[0];
		int atom2 = bond[1];
		String[] grps = new String[2];
		groups.forEach((k,v) -> {
			List atoms = v.stream().map(e -> e.getSerialId()).collect(Collectors.toList());
			if(atoms.contains(atom1))
				grps[0] = k;
			if(atoms.contains(atom2))
				grps[1] = k;
			});

		if(!grps[0].equals(grps[1])) {
			if(grps[0].equals(PROTEIN_GROUP)) {
				if (!detachCovalentLigands) {
					groups.get(grps[0]).addAll(groups.get(grps[1]));
					groups.remove(grps[1]);
				}
			}
			else if(grps[1].equals(PROTEIN_GROUP)) {
				if (!detachCovalentLigands) {
					groups.get(grps[1]).addAll(groups.get(grps[0]));
					groups.remove(grps[0]);
				}
			}
			else {
				groups.get(grps[0]).addAll(groups.get(grps[1]));
				groups.remove(grps[1]);
			}
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy