All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.Clusterer Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
 * Copyright 2017 Idorsia Pharmaceuticals Ltd., Hegenheimermattweg 91, CH-4123 Allschwil, Switzerland
 *
 * This file is part of DataWarrior.
 * 
 * DataWarrior is free software: you can redistribute it and/or modify it under the terms of the
 * GNU General Public License as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 * 
 * DataWarrior is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License along with DataWarrior.
 * If not, see http://www.gnu.org/licenses/.
 *
 * @author Thomas Sander
 */

package com.actelion.research.chem;

import com.actelion.research.calc.DataProcessor;
import com.actelion.research.chem.descriptor.DescriptorHandler;

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;

public class Clusterer extends DataProcessor {
	private volatile int[]			mClusterNo,mNoOfMembers;
	private volatile int			mNoOfCompounds;
	private volatile float[][]		mSimilarityMatrix;
	private volatile T[]			mDescriptor;
	private volatile DescriptorHandler mDescriptorHandler;
	private volatile AtomicInteger	mSMPCompoundIndex;

	private boolean[]				mIsRepresentative;
	private int						mNoOfClusters,mThreadCount;
	private ExecutorService			mExecutor;
	private ClusterWorker[]		mClusterWorker;

    @SuppressWarnings("unchecked")
    public Clusterer(DescriptorHandler descriptorHandler, T[] descriptor) {
		mDescriptorHandler = descriptorHandler;
		mDescriptor = descriptor;
		mNoOfCompounds = mDescriptor.length;

		mSimilarityMatrix = new float[mNoOfCompounds][];
		for (int i=1; i();
			}
		}

	/**
	 * Defines the criteria for stopping the clustering.
	 * At least one of the two limits must be in the applicable valid range.
	 * @param similarityLimit >0...<=1.0 or 0.0 if not applied
	 * @param clusterCountLimit >=2...objectCount or -1 if not applied
	 */
	public void cluster(double similarityLimit, int clusterCountLimit) {
		calculateSimilarityMatrix(false);
		if (threadMustDie()) {
		    stopProgress("clustering cancelled");
			return;
			}

		mNoOfMembers = new int[mNoOfCompounds];	// initialize no of cluster members
		mClusterNo = new int[mNoOfCompounds];	// initialize compound's cluster numbers
		for (int i=0; i clusterCountLimit) {
			float maxSimValue = 0;		// find highest similarity level
			int maxCluster1 = -1;
			int maxCluster2 = -1;
			if (mThreadCount == 1) {
				for (int cluster2=1; cluster2 0) {
						for (int cluster1=0; cluster1 worker:mClusterWorker) {
					if (maxSimValue < worker.getMaxSimilarity()) {
						maxSimValue = worker.getMaxSimilarity();
						maxCluster1 = worker.getCluster1();
						maxCluster2 = worker.getCluster2();
						}
					}
				}

			if (maxSimValue < similarityLimit)
				break;

			for (int i=0; i mSimilarityMatrix[cluster2][cluster1])
						lowSim[mClusterNo[cluster1]] = mSimilarityMatrix[cluster2][cluster1];
					simSum[cluster1] += mSimilarityMatrix[cluster2][cluster1];
					simSum[cluster2] += mSimilarityMatrix[cluster2][cluster1];
					}
				}
			}

		int[] representative = new int[mNoOfCompounds];
		for (int i=0; i worker:mClusterWorker) {
			worker.initJob(whatToDo, doneSignal);
			mExecutor.execute(worker);
			}
		try {
			doneSignal.await();
			}
		catch (InterruptedException e) {}
		}


	private class ClusterWorker implements Runnable {
		private static final int CALC_ALL_SIMILARITIES = 1;
		private static final int CALC_CLUSTER_SIMILARITIES = 2;
		private static final int FIND_MAXIMUM_SIMILARITY = 3;

		private CountDownLatch mDoneSignal;
		private int mWhatToDo,mCluster1,mCluster2;
		private float mMaxSimilarity;
		private DescriptorHandler mThreadSafeDH;

		public void initJob(int whatToDo, CountDownLatch doneSignal) {
			mWhatToDo = whatToDo;
			mDoneSignal = doneSignal;
	    	mSMPCompoundIndex = new AtomicInteger(mNoOfCompounds);
			mThreadSafeDH = mDescriptorHandler.getThreadSafeCopy();
			}

		public void run() {
			switch (mWhatToDo) {
			case CALC_ALL_SIMILARITIES:
				int compound2 = mSMPCompoundIndex.decrementAndGet();
				while (compound2 >= 1 && !threadMustDie()) {
					for (int compound1=0; compound1= 1 && !threadMustDie()) {
					for (int compound1=0; compound1= 1 && !threadMustDie()) {
					if (mNoOfMembers[cluster2] > 0) {
						for (int cluster1=0; cluster1