All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.calc.SimilarityMulticore Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
package com.actelion.research.calc;

import com.actelion.research.chem.descriptor.ISimilarityCalculator;
import com.actelion.research.util.Pipeline;
import com.actelion.research.util.datamodel.IIdentifiedObject;
import com.actelion.research.util.datamodel.IdentifiedObject;
import com.actelion.research.util.datamodel.ScorePoint;

import java.awt.*;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;


/**
 * 
 * SimilarityMulticore
 * T is the descriptor object class
 * @author Modest von Korff
 * @version 1.0
 * 10 Dec 2010 MvK: Start implementation
 * Nov 2011 MvK: Generalization via interface definitions.
 * 24 Apr 2013 MvK: Some improvements on the thread handling.
 * 04 Dec 2014 MvK: Some improvements on the thread handling.
 * 09.05.2016 MvK: Calculates now the similarity matrix.
 * 26.04.2017 MvK: Using ExecutorServices now.
 * 26.11.2018 code changed. Similarity for identically labeled descriptors will now be calculated.
 */
public class SimilarityMulticore {
	
	private static final int MAX_KERNELS = 80;
	
	// private static boolean VERBOSE = false;
	
	// private static final double DEFAULT_SIMILARITY = 1.0;
	
	private static final double DEFAULT_MINIMUM_SIMILARITY = 0.01;
	
	private static final long SLEEP_SHORT = 10;
	
	private static final long SLEEP_ULTRA_SHORT = 1;
	
	private ISimilarityCalculator similarityCalculator;
	private List> liDescriptor1;
	private List> liDescriptor2;
	private int kernels;
	private AtomicLong sleep;
	private Pipeline queueIndices;
	private ConcurrentLinkedQueue queueScore;
	private List liRun;
	private int similarities2Calculate;
	private AtomicLong calculationsPerSecond;
	private Matrix maSimilarity;
	private boolean verbose;

	/**
	 *
	 * @param similarityCalculator
	 */
	public SimilarityMulticore(ISimilarityCalculator similarityCalculator) {
		this(similarityCalculator, Math.min(Runtime.getRuntime().availableProcessors()-1, MAX_KERNELS));
	}
	
	public SimilarityMulticore(ISimilarityCalculator similarityCalculator, int kernels) {
		this.similarityCalculator = similarityCalculator;
		this.kernels = kernels;
		queueIndices = new Pipeline();
		queueScore = new ConcurrentLinkedQueue();
		sleep = new AtomicLong();
		calculationsPerSecond = new AtomicLong();
		verbose = false;
	}

	public void setVerbose() {
		this.verbose = true;
	}

	public void run(IdentifiedObject descriptor, List> liDescriptor2) {
		List> liOneSample = new ArrayList>();
		liOneSample.add(descriptor);
		run(liOneSample, liDescriptor2);
	}

	public void run(List> liDescriptor) {
		run(liDescriptor, liDescriptor, true);
	}

	public void run(List> liDescriptor1, List> liDescriptor2) {
		run(liDescriptor1, liDescriptor2, false);
	}

	/**
	 *
	 * @param liDescriptor1 list with descriptors,
	 * @param liDescriptor2 list with descriptors,
	 * liDescriptor1 will be compared with liDescriptor2 via ISimilarityCalculator given in constructor.
	 * RFesulting is a similarity matrix with rows = liDescriptor1.size() and cols = liDescriptor2.size()
	 */
	private void run(List> liDescriptor1, List> liDescriptor2, boolean singleList) {

		calculationsPerSecond.set(-1);

		long t1 = new Date().getTime();
		
		sleep.set(SLEEP_ULTRA_SHORT);
		
		if(verbose) {
			System.out.println("SimilarityMulticore start.");
			System.out.println("SimilarityMulticore kernels\t" + kernels);
		}

		this.liDescriptor1 = liDescriptor1;
		
		this.liDescriptor2 = liDescriptor2;

		if(verbose){
			System.out.println("liDescriptor1 " + liDescriptor1.size() + " liDescriptor2 " + liDescriptor2.size() + ".");
		}
		
		queueScore.clear();

		maSimilarity = new Matrix(liDescriptor1.size(), liDescriptor2.size());

		if(singleList){
			fillCalculationIndexQueueSingleList();
		} else {
			fillCalculationIndexQueueTwoLists();
		}

		liRun = new ArrayList();

		ExecutorService executorService = Executors.newFixedThreadPool(kernels);

		for (int i = 0; i < kernels; i++) {
			RunSimilarityCalc rsc = new RunSimilarityCalc(i, similarityCalculator, queueIndices, liDescriptor1, liDescriptor2, maSimilarity, singleList, queueScore);
			liRun.add(rsc);
			executorService.execute(rsc);
		}
		executorService.shutdown();
		while(!executorService.isTerminated()){
			try {Thread.sleep(1);} catch (InterruptedException e) {}
		}
		
		long t2 = new Date().getTime();
		long sec = (t2-t1) / 1000;

		if(sec!=0){
			calculationsPerSecond.set(getCalculatedSimilarityValues() / sec);
		}
		
		if(verbose){
			System.out.println("Similarity calculations " + getCalculatedSimilarityValues());
			System.out.println("Similarity calculations per second " + calculationsPerSecond.get());
									
			int sumCalc = 0;
			for (int i = 0; i < liRun.size(); i++) {
				RunSimilarityCalc rsc = liRun.get(i);
				sumCalc += rsc.getNSimilarityCalculations();
				System.out.println("Thread " + rsc.getIndexThread() + " calcs " + rsc.getNSimilarityCalculations());
			}
			
			System.out.println("Sum calcs " + sumCalc + ".");
		}
		
		sleep.set(SLEEP_SHORT);

	}
	
	public long getCalculationsPerSecond(){
		return calculationsPerSecond.get();
	}
	
	public int getSimilarities2Calculate(){
		return similarities2Calculate;
	}
	

	public long getCalculatedSimilarityValues(){
		
		long ccCalc = 0;
		
		for (RunSimilarityCalc rsc : liRun) {
			ccCalc += rsc.getNSimilarityCalculations();
		}
		
		return ccCalc;
	}
	
	private boolean isFinished() {
		
		if(!queueIndices.isAllDataIn()){
			return false;
		}
		
		if(!queueIndices.isEmpty()){
			return false;
		}

		boolean finished = true;
		
		if(queueScore.size() != similarities2Calculate){
			finished=false;
		}
				
		return finished;
	}

	public boolean hasMoreResults() {
		return !queueScore.isEmpty();
	}
	
	/**
	 * 
	 * @return similarity score with the id numbers of the compared input objects.
	 * The x value is the identifier from the object from liDescriptor1 and the y value from liDescriptor2.
	 */
	public ScorePoint getNextResult() {
		return queueScore.poll();
	}

	private void fillCalculationIndexQueueTwoLists(){
		
		queueIndices.setAllDataIn(false);
		
		similarities2Calculate = liDescriptor1.size() * liDescriptor2.size();
					
		for (int i = 0; i < liDescriptor1.size(); i++) {
			for (int j = 0; j < liDescriptor2.size(); j++) {
				Point p = new Point(i,j);
				queueIndices.addData(p);
			}
		}

		queueIndices.setAllDataIn(true);
				
		if(verbose){
			System.out.println("SimilarityMulticore sim to calc " + similarities2Calculate + ".");
		}

	}

	private void fillCalculationIndexQueueSingleList(){

		queueIndices.setAllDataIn(false);

		similarities2Calculate = ((liDescriptor1.size() * liDescriptor1.size()) - liDescriptor1.size()) / 2;

		for (int i = 0; i < liDescriptor1.size(); i++) {
			for (int j = i; j < liDescriptor1.size(); j++) {
				Point p = new Point(i,j);
				queueIndices.addData(p);
			}
		}

		queueIndices.setAllDataIn(true);

		if(verbose){
			System.out.println("SimilarityMulticore sim to calc " + similarities2Calculate + ".");
		}
	}


	public Matrix getSimilarityMatrix() {
		return maSimilarity;
	}

	private static class RunSimilarityCalc implements Runnable {
		
		private ISimilarityCalculator iSimilarityCalculator;
		private Pipeline queueIndices;
		private List> liDescriptor1;
		private List> liDescriptor2;
		private Matrix maSimilarity;
		private boolean singleList;
		private ConcurrentLinkedQueue queueScore;
		private AtomicLong calculatedSimilarities;
		private int indexThread;
		
		public RunSimilarityCalc(int indexThread,
								 ISimilarityCalculator similarityCalculator,
								 Pipeline queueIndices,
								 List> liDescriptor1,
								 List> liDescriptor2,
								 Matrix maSimilarity,
								 boolean singleList,
								 ConcurrentLinkedQueue queueScore) {
			
			this.indexThread = indexThread;
			this.iSimilarityCalculator = similarityCalculator.getThreadSafeCopy();
			this.queueIndices = queueIndices;
			this.liDescriptor1 = liDescriptor1;
			this.liDescriptor2 = liDescriptor2;
			this.maSimilarity = maSimilarity;
			this.singleList = singleList;
			this.queueScore = queueScore;
			calculatedSimilarities = new AtomicLong();
		}
		
		public void run() {
			
			while(!queueIndices.wereAllDataFetched()) {

				Point p = queueIndices.pollData();

				if(p == null) {
					try {Thread.sleep(SLEEP_SHORT);} catch (InterruptedException e) {}
					continue;
				}

				int indexX = p.x;
				int indexY = p.y;

				IIdentifiedObject idObj1 = null;
				idObj1 = liDescriptor1.get(indexX);
				IIdentifiedObject idObj2 = null;
				idObj2 = liDescriptor2.get(indexY);
				
				ScorePoint sp = new ScorePoint((int)idObj1.getId(), (int)idObj2.getId());
					
				try {
					double sc = iSimilarityCalculator.getSimilarity(idObj1.getData(), idObj2.getData());
					if(sc




© 2015 - 2025 Weber Informatics LLC | Privacy Policy