All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.Clusterer Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
 * Copyright (c) 1997 - 2016
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Thomas Sander
 */

package com.actelion.research.chem;

import com.actelion.research.calc.DataProcessor;
import com.actelion.research.chem.descriptor.DescriptorHandler;

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;

public class Clusterer extends DataProcessor {
	private volatile int[]			mClusterNo,mNoOfMembers;
	private volatile int			mNoOfCompounds;
	private volatile float[][]		mSimilarityMatrix;
	private volatile T[]			mDescriptor;
	private volatile DescriptorHandler mDescriptorHandler;
	private volatile AtomicInteger	mSMPCompoundIndex;

	private boolean[]				mIsRepresentative;
	private int						mNoOfClusters,mThreadCount;
	private ExecutorService			mExecutor;
	private ClusterWorker[]		mClusterWorker;

    @SuppressWarnings("unchecked")
    public Clusterer(DescriptorHandler descriptorHandler, T[] descriptor) {
		mDescriptorHandler = descriptorHandler;
		mDescriptor = descriptor;
		mNoOfCompounds = mDescriptor.length;

		mSimilarityMatrix = new float[mNoOfCompounds][];
		for (int i=1; i();
			}
		}

	/**
	 * Defines the criteria for stopping the clustering.
	 * At least one of the two limits must be in the applicable valid range.
	 * @param similarityLimit >0...<=1.0 or 0.0 if not applied
	 * @param clusterCountLimit >=2...objectCount or -1 if not applied
	 */
	public void cluster(double similarityLimit, int clusterCountLimit) {
		calculateSimilarityMatrix(false);
		if (threadMustDie()) {
		    stopProgress("clustering cancelled");
			return;
			}

		mNoOfMembers = new int[mNoOfCompounds];	// initialize no of cluster members
		mClusterNo = new int[mNoOfCompounds];	// initialize compound's cluster numbers
		for (int i=0; i clusterCountLimit) {
			float maxSimValue = 0;		// find highest similarity level
			int maxCluster1 = -1;
			int maxCluster2 = -1;
			if (mThreadCount == 1) {
				for (int cluster2=1; cluster2 0) {
						for (int cluster1=0; cluster1 worker:mClusterWorker) {
					if (maxSimValue < worker.getMaxSimilarity()) {
						maxSimValue = worker.getMaxSimilarity();
						maxCluster1 = worker.getCluster1();
						maxCluster2 = worker.getCluster2();
						}
					}
				}

			if (maxSimValue < similarityLimit)
				break;

			for (int i=0; i mSimilarityMatrix[cluster2][cluster1])
						lowSim[mClusterNo[cluster1]] = mSimilarityMatrix[cluster2][cluster1];
					simSum[cluster1] += mSimilarityMatrix[cluster2][cluster1];
					simSum[cluster2] += mSimilarityMatrix[cluster2][cluster1];
					}
				}
			}

		int[] representative = new int[mNoOfCompounds];
		for (int i=0; i worker:mClusterWorker) {
			worker.initJob(whatToDo, doneSignal);
			mExecutor.execute(worker);
			}
		try {
			doneSignal.await();
			}
		catch (InterruptedException e) {}
		}


	private class ClusterWorker implements Runnable {
		private static final int CALC_ALL_SIMILARITIES = 1;
		private static final int CALC_CLUSTER_SIMILARITIES = 2;
		private static final int FIND_MAXIMUM_SIMILARITY = 3;

		private CountDownLatch mDoneSignal;
		private int mWhatToDo,mCluster1,mCluster2;
		private float mMaxSimilarity;
		private DescriptorHandler mThreadSafeDH;

		public void initJob(int whatToDo, CountDownLatch doneSignal) {
			mWhatToDo = whatToDo;
			mDoneSignal = doneSignal;
	    	mSMPCompoundIndex = new AtomicInteger(mNoOfCompounds);
			mThreadSafeDH = mDescriptorHandler.getThreadSafeCopy();
			}

		public void run() {
			switch (mWhatToDo) {
			case CALC_ALL_SIMILARITIES:
				int compound2 = mSMPCompoundIndex.decrementAndGet();
				while (compound2 >= 1 && !threadMustDie()) {
					for (int compound1=0; compound1= 1 && !threadMustDie()) {
					for (int compound1=0; compound1= 1 && !threadMustDie()) {
					if (mNoOfMembers[cluster2] > 0) {
						for (int cluster1=0; cluster1