org.biojava.nbio.structure.align.ClusterAltAligs Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of biojava-structure Show documentation
Show all versions of biojava-structure Show documentation
The protein structure modules of BioJava.
/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.nbio.structure.align;
import org.biojava.nbio.structure.align.pairwise.AlternativeAlignment;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
* A class that clusters alternative alignments according to their
* similarity.
*
* @author Andreas Prlic
* @since 1.5
* @version %I% %G%
*/
public class ClusterAltAligs {
public static final int DEFAULT_CLUSTER_CUTOFF = 95;
public static void cluster(AlternativeAlignment[] aligs ){
cluster(aligs, DEFAULT_CLUSTER_CUTOFF);
}
public static void cluster(AlternativeAlignment[] aligs, int cutoff){
List alist = Arrays.asList(aligs);
List testAligs = new ArrayList<>(alist);
List> clusters = new ArrayList<>();
List excludeList = new ArrayList<>();
// check how similar the eqrs are...
for ( int i=0 ; i< aligs.length;i++){
AlternativeAlignment a = aligs[i];
if ( excludeList.contains(a)){
continue;
}
int[] idxA = a.getIdx1();
Iterator iter = testAligs.iterator();
List remainList = new ArrayList<>();
List currentCluster = new ArrayList<>();
currentCluster.add(i);
excludeList.add(a);
int j=-1;
while (iter.hasNext()){
j++;
AlternativeAlignment b = (AlternativeAlignment) iter.next();
if ( excludeList.contains(b))
continue;
int[] idxB = b.getIdx1();
// compare the eqrs..
int samepos = 0;
for ( int x = 0 ; x < idxA.length ;x++){
int p1 =idxA[x];
for (int y =0; y< idxB.length ; y++){
int p2 = idxB[y];
if ( p1 == p2){
samepos++;
}
}
}
float perpos = (samepos / (float)idxA.length) * 100;
//System.out.println("aa " + i + " samepos:"+ samepos +
// " l1:"+ idxA.length + " l2:" + idxB.length + " perpos:" + perpos);
if ( perpos > cutoff){
currentCluster.add(j);
excludeList.add(b);
} else {
remainList.add(b);
}
}
clusters.add(currentCluster);
if ( remainList.size() == 0) {
break;
}
}
// now print the clusters...
Iterator> iter = clusters.iterator();
int cpos = 0;
while (iter.hasNext()){
cpos++;
//System.out.println("cluster "+cpos+":");
List cluster = iter.next();
Iterator iter2 = cluster.iterator();
while (iter2.hasNext()){
Integer i = (Integer) iter2.next();
AlternativeAlignment alig = aligs[i];
alig.setCluster(cpos);
//System.out.println( " ("+ aligs[i.intValue()]+")");
}
}
}
}