org.biojava.nbio.structure.cluster.SubunitClusterer Maven / Gradle / Ivy
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.cluster;
import java.util.ArrayList;
import java.util.List;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.symmetry.core.Stoichiometry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The SubunitClusterer takes as input a collection of {@link Subunit} and
* returns a collection of {@link SubunitCluster}.
*
* @author Aleix Lafita
* @since 5.0.0
*
*/
public class SubunitClusterer {
private static final Logger logger = LoggerFactory
.getLogger(SubunitClusterer.class);
/** Prevent instantiation **/
private SubunitClusterer() {
}
public static Stoichiometry cluster(Structure structure,
SubunitClustererParameters params) {
List subunits = SubunitExtractor.extractSubunits(structure,
params.getAbsoluteMinimumSequenceLength(),
params.getMinimumSequenceLengthFraction(),
params.getMinimumSequenceLength());
return cluster(subunits, params);
}
public static Stoichiometry cluster(List subunits, SubunitClustererParameters params) {
List clusters = new ArrayList<>();
if (subunits.size() == 0)
return new Stoichiometry(clusters);
// First generate a new cluster for each Subunit
for (Subunit s : subunits)
clusters.add(new SubunitCluster(s));
if (params.getClustererMethod() == SubunitClustererMethod.SEQUENCE ||
params.getClustererMethod() == SubunitClustererMethod.SEQUENCE_STRUCTURE) {
// Now merge clusters by SEQUENCE
for (int c1 = 0; c1 < clusters.size(); c1++) {
for (int c2 = clusters.size() - 1; c2 > c1; c2--) {
try {
if (params.isUseEntityIdForSeqIdentityDetermination() &&
clusters.get(c1).mergeIdenticalByEntityId(clusters.get(c2))) {
// This we will only do if the switch is for entity id comparison is on.
// In some cases it can save enormous amounts of time, e.g. for clustering full
// chains of deposited PDB entries. For instance for 6NHJ: with pure alignments it
// takes ~ 6 hours, with entity id comparisons it takes 2 minutes.
clusters.remove(c2);
} else if (clusters.get(c1).mergeSequence(clusters.get(c2), params)) {
clusters.remove(c2);
}
} catch (CompoundNotFoundException e) {
logger.warn("Could not merge by Sequence. {}",
e.getMessage());
}
}
}
}
if (params.getClustererMethod() == SubunitClustererMethod.STRUCTURE ||
params.getClustererMethod() == SubunitClustererMethod.SEQUENCE_STRUCTURE) {
// Now merge clusters by STRUCTURE
for (int c1 = 0; c1 < clusters.size(); c1++) {
for (int c2 = clusters.size() - 1; c2 > c1; c2--) {
try {
if (clusters.get(c1).mergeStructure(clusters.get(c2), params)) {
clusters.remove(c2);
}
} catch (StructureException e) {
logger.warn("Could not merge by Structure. {}", e.getMessage());
}
}
}
}
if (params.isInternalSymmetry()) {
// Now divide clusters by their INTERNAL SYMMETRY
for (int c = 0; c < clusters.size(); c++) {
try {
clusters.get(c).divideInternally(params);
} catch (StructureException e) {
logger.warn("Error analyzing internal symmetry. {}",
e.getMessage());
}
}
// After internal symmetry merge again by structural similarity
// Use case: C8 propeller with 3 chains with 3+3+2 repeats each
for (int c1 = 0; c1 < clusters.size(); c1++) {
for (int c2 = clusters.size() - 1; c2 > c1; c2--) {
try {
if (clusters.get(c1).mergeStructure(clusters.get(c2), params))
clusters.remove(c2);
} catch (StructureException e) {
logger.warn("Could not merge by Structure. {}",
e.getMessage());
}
}
}
}
return new Stoichiometry(clusters);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy