All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.StructureSearch Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
 * Copyright (c) 1997 - 2016
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Thomas Sander
 */

package com.actelion.research.chem;

import com.actelion.research.calc.ProgressController;
import com.actelion.research.chem.descriptor.*;
import com.actelion.research.util.ByteArrayComparator;

import java.nio.charset.StandardCharsets;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicInteger;

public class StructureSearch {
	public static final int SEARCH_RUNNING = -1;
	public static final int SEARCH_PENDING = 0;
	public static final int SEARCH_STOPPED = 1;
	public static final int QUERY_MISSING = 2;
	public static final int SEARCH_TYPE_NOT_SUPPORTED = 3;
	public static final int SUCCESSFUL_COMPLETION = 4;
	public static final int COUNT_LIMIT_EXCEEDED = 5;
	public static final int TIME_LIMIT_EXCEEDED = 6;
	public static final String[] COMPLETION_TEXT = { "not started", "stopped", "query missing", "unsupported search type", "successful", "count limit hit", "time limit hit" };

	private final StructureSearchSpecification mSpecification;
	private final StructureSearchDataSource mDataSource;
	private final StructureSearchController mSearchController;
	private final ProgressController mProgressController;
	private volatile StereoMolecule[] mQueryFragment,mDoubleQueryFragment;
	private volatile ByteArrayComparator mIDCodeComparator;
	private volatile DescriptorHandler mDescriptorHandler;
	private volatile Object[] mQueryDescriptor;
	private volatile long[] mQueryHashCode;
	private volatile byte[][] mQueryIDCode;
	private volatile int mDescriptorColumn;
	private volatile int mMaxSSSMatches,mMaxNonSSSMatches, mStatus;
	private volatile long mStopTime,mMaxMillis;
	private ConcurrentLinkedQueue mResultQueue;
	private AtomicInteger mSMPIndex,mMatchCount;

	/**
	 * This contructs a new structure search, which upon calling start()
	 * runs a multithreaded structure search on the structure rows provided by dataSource.
	 * If a searchController is given, this is asked for every row, whether the row
	 * meets all preconditions and qualifies for the search. 
	 * @param specification
	 * @param dataSource
	 * @param searchController may be null, if all rows need to be searched
	 * @param progressController may be null
	 * @param dhFactory if null then the default DescriptorHandlerStandard2DFactory is used
	 */
	public StructureSearch(StructureSearchSpecification specification,
						   StructureSearchDataSource dataSource,
						   StructureSearchController searchController,
						   ProgressController progressController,
						   DescriptorHandlerFactory dhFactory) {
		mSpecification = specification;
		mDataSource = dataSource;
		mSearchController = searchController;
		mProgressController = progressController;
		mStatus = SEARCH_PENDING;

		if (mSpecification != null) {
			// define needed descriptor handlers
			if (mSpecification.isSimilaritySearch()) {
				DescriptorHandlerFactory factory = (dhFactory != null) ? dhFactory : DescriptorHandlerStandard2DFactory.getFactory();
				mDescriptorHandler = factory.getDefaultDescriptorHandler(specification.getDescriptorShortName());
				}
			else if (mSpecification.isSubstructureSearch()) {
				mDescriptorHandler = DescriptorHandlerLongFFP512.getDefaultInstance();
				}
			}
		}

	/**
	 * If the search shall be aborted once it exceeds a given number of matches,
	 * then define the maximum number of matches with this method before starting the search.
	 * In case a search would return more than the defined maximum of allowed matches,
	 * then the search would stop at the allowed maximum and return those matches.
	 * @param maxSSSMatches maximum number of allowed sub-reaction/retron search matches (0: no limit)
	 * @param maxNonSSSMatches maximum number of allowed matches for other search types (0: no limit)
	 */
	public void setMatchLimit(int maxSSSMatches, int maxNonSSSMatches) {
		mMaxSSSMatches = maxSSSMatches;
		mMaxNonSSSMatches = maxNonSSSMatches;
		}

	/**
	 * If the search shall be aborted once it exceeds a given elapsed time limit,
	 * then define the maximum allowed search time in milliseconds.
	 * If a search time limit is reached, then the search would return all matches found.
	 * @param maxMillis maximum allowed elapsed search milliseconds (0: no limit)
	 */
	public void setTimeLimit(long maxMillis) {
		mMaxMillis = maxMillis;
		}

	public String getCompletionStatus() {
		return COMPLETION_TEXT[mStatus];
		}

	public int[] start() {
		if (!mDataSource.isSupportedSearchType(mSpecification)) {
			mStatus = SEARCH_TYPE_NOT_SUPPORTED;
			return null;
			}

		mMatchCount = new AtomicInteger(0);

		if (!mSpecification.isNoStructureSearch()) {
			final int queryStructureCount = mSpecification.getStructureCount();
			if (queryStructureCount == 0) {
				mStatus = QUERY_MISSING;
				return null;
				}

			mDescriptorColumn = -1;
	        boolean largestFragmentOnly = mSpecification.isLargestFragmentOnly();

			if (mSpecification.isSubstructureSearch() || mSpecification.isSimilaritySearch()) {
				if (mSpecification.isSubstructureSearch()) {
					mDescriptorColumn = mDataSource.getDescriptorColumn(DescriptorConstants.DESCRIPTOR_FFP512.shortName);
					mQueryFragment = new StereoMolecule[queryStructureCount];
					for (int i=0; i();

		if (mProgressController != null && mSpecification.getStructureCount() > 1023)
			mProgressController.startProgress("Searching structures", 0, mSpecification.getStructureCount());

		mStopTime = (mMaxMillis == 0) ? Long.MAX_VALUE : System.currentTimeMillis() + mMaxMillis;
		mStatus = SEARCH_RUNNING;

		int threadCount = Runtime.getRuntime().availableProcessors();
    	SearchThread[] t = new SearchThread[threadCount];
    	for (int i=0; i= 0) {
				if ((mProgressController != null && mProgressController.threadMustDie())) {
					mStatus = SEARCH_STOPPED;
					break;
					}

				if (System.currentTimeMillis() > mStopTime) {
					mStatus = TIME_LIMIT_EXCEEDED;
					break;
					}

				if (mProgressController != null && row%1024==1023)
					mProgressController.updateProgress(mSpecification.getStructureCount()-row);

				if (mSearchController == null || mSearchController.rowQualifies(row)) {
					boolean isMatch = false;

					if (mSpecification.isSubstructureSearch()) {
						if (mMaxSSSMatches != 0 && mMatchCount.get() > mMaxSSSMatches) {
							mStatus = COUNT_LIMIT_EXCEEDED;
							break;
							}

						for (int s=0; !isMatch && s mMaxNonSSSMatches) {
							mStatus = COUNT_LIMIT_EXCEEDED;
							break;
							}

						if (mSpecification.isNoStructureSearch()) {
							isMatch = true;
							}
						else if (mSpecification.isSimilaritySearch()) {
							for (int s=0; !isMatch && s=mSpecification.getSimilarityThreshold()) {
										isMatch = true;
										break;
									}
								}
								}
							}
						else if (mSpecification.isExactSearch()) {
							for (int s=0; !isMatch && s




© 2015 - 2024 Weber Informatics LLC | Privacy Policy