
com.actelion.research.chem.reaction.RSSSearcher Maven / Gradle / Ivy
/*
* Project: DD_core
* @(#)ReactionRSS.java
*
* Copyright (c) 1997- 2014
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All Rights Reserved.
*
* This software is the proprietary information of Actelion Pharmaceuticals, Ltd.
* Use is subject to license terms.
*
* Author: Christian Rufener
*/
package com.actelion.research.chem.reaction;
import com.actelion.research.chem.SSSearcher;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.util.ArrayUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Project:
* User: rufenec
* Date: 9/30/2014
* Time: 1:38 PM
*/
public class RSSSearcher
{
private static final boolean debug = false;
private static final boolean debug2 = false;
private static void debug(String format,Object ...args)
{
if (debug)
System.out.printf(format,args);
}
private static void debug2(String format,Object ...args)
{
if (debug2)
System.out.printf(format,args);
}
private static class MapsList extends ArrayList
{
public String toString()
{
String s = new String();
for (int[] a : this) {
s += "\n {";
for (int i : a) {
s += " " + i;
}
s += "}";
}
return s;
}
}
/**
* Try to match a query reaction in a target reaction
* Algo:
* First we check that there are more product reactants than query reactants
* Idem for products
*
* @param queryRxn
* @param targetRxn
* @return
*/
public static boolean match(Reaction queryRxn, Reaction targetRxn)
{
List reactantMatchList = new ArrayList();
List productMatchList = new ArrayList();
int numOfQueryReactants = queryRxn.getReactants();
int numOfTargetReactants = targetRxn.getReactants();
int numOfQueryProducts = queryRxn.getProducts();
int numOfTargetProducts = targetRxn.getProducts();
if (numOfQueryReactants > numOfTargetReactants || numOfQueryProducts > numOfTargetProducts)
return false;
debug("MATCHING Start\n");
if (numOfQueryReactants <= numOfTargetReactants) {
debug("MATCHING Reactants\n");
List reactantList = getReactants(targetRxn);
ListPermutator permutator = new ListPermutator(reactantList);
while (permutator.hasNext()) {
List targetReactants = permutator.next();
List matchList = new ArrayList();
for (int i = 0; i < numOfQueryReactants; i++) {
StereoMolecule queryMol = queryRxn.getReactant(i);
// Since the targetReactant list is a permutation,
// we use the same index as for the query
StereoMolecule targetMol = targetReactants.get(i);
// debug("Attempt to match\n%s : %s\n%s : %s\n",
// queryMol.getIDCode(), queryMol,
// targetMol.getIDCode(), targetMol);
debug2("Matching reactant\n");
MapsList matched = findMatchingMaps(queryMol, targetMol);
// molecules did not even match on SSS, so try another permutation
if (matched == null) {
debug("Did not match SSS\n");
break;
}
if (matched.size() > 0) {
debug("Matched with matchlist\n");
matchList = add(matchList, matched);
} else {
debug("NO match with matchlist\n");
//break; // this seemed to be wrong
}
}
// Choose the best match:
// We consider longer match list as a better solution
if (getMaxSize(matchList) > getMaxSize(reactantMatchList)) {
reactantMatchList = matchList;
}
}
debug("Reactant matchlist %d\n", reactantMatchList.size());
}
if (numOfQueryProducts <= numOfTargetProducts) {
debug("MATCHING PRODUCTS\n");
List productList = getProducts(targetRxn);
ListPermutator permute = new ListPermutator(productList);
while (permute.hasNext()) {
// debug("Next product permutation\n");
List targetProducts = permute.next();
List matchList = new ArrayList();
for (int i = 0; i < numOfQueryProducts; i++) {
StereoMolecule queryMol = queryRxn.getProduct(i);
StereoMolecule target = targetProducts.get(i);
// debug("Attempt to match\n%s : %s\n%s : %s\n",
// queryMol.getIDCode(), queryMol,
// target.getIDCode(), target);
debug2("Matching product\n");
List matched = findMatchingMaps(queryMol, target);
// molecules did not even match on SSS, so try another permutation
if (matched == null) {
debug("Did not match SSS\n");
break;
}
if (matched.size() > 0) {
debug("Matched with matchlist\n");
matchList = add(matchList, matched);
} else {
debug("NO match with matchlist\n");
//break; // this seemed to be wrong
// break;
}
}
if (getMaxSize(matchList) > getMaxSize(productMatchList)) {
// Found a better match, so use this?
productMatchList = matchList;
}
}
debug("Product matchlist ist " + productMatchList.size());
}
boolean ok = false;
// Open issues
// What if reactant Matchlist is empty
// for (int[] rs : reactantMatchList) {
//// Arrays.sort(rs);
// debug("Reactant List: ");
// for (int j : rs) {
// debug("%d,", j);
// }
// }
// debug("\n");
// for (int[] ps : productMatchList) {
//// Arrays.sort(ps);
// debug("Product List: ");
// for (int j : ps) {
// debug("%d,", j);
// }
// }
// debug("\n");
boolean sort = true;
if (sort) {
debug2("Sorting\n");
for (int[] rs : reactantMatchList) {
Arrays.sort(rs);
}
for (int[] ps : productMatchList) {
Arrays.sort(ps);
}
}
for (int[] rs : reactantMatchList) {
for (int[] ps : productMatchList) {
if (Arrays.equals(rs, ps)) {
ok = true;
return ok;
}
}
}
// debug("Query did not match!");
return ok;
}
/**
* Returns a List of sorted arrays containing the mapping numbers of the target molecule
* which have been matched by the query sub-structure
* Please note the list contains only valid mapping numbers. Unmapped atoms
* which have been matched as well are ignored
* If the structures did not match SSS wise null will be returned
* @param query
* @param target
* @return null if the simple SSS failed
*/
private static MapsList findMatchingMaps(StereoMolecule query, StereoMolecule target)
{
MapsList ret = new MapsList();
// First Performance check: Don't SSS for bigger query mols
if (query.getAllAtoms() <= target.getAllAtoms()) {
// Get the list of map numbers of the query
int[] queryMaps = getMapList(query);
boolean found = false;
for (int i : queryMaps) {
if (i != 0) {
found = true;
break;
}
}
if (found) {
// Leave if query has no maps
SSSearcher searcher = new SSSearcher();
boolean fragment = query.isFragment();
// debug("Query check on target \n%s\n%s\n",query.getIDCode(),target.getIDCode());
query.setFragment(true);
searcher.setMol(query, target);
int count = 0;
// SSS first
if ((count = searcher.findFragmentInMolecule()) > 0) {
// so we found the query in the target {count} times
// Get list of the matched indizes of the target molecule
List sssMatchList = searcher.getMatchList();
for (int i = 0; i < count; i++) {
int[] mapList = new int[0];
// these are the indizes of the atoms found in the target
int[] matchedSSSAtoms = sssMatchList.get(i);
// Query Atom[n] matches matchedSSSAtoms[n] => Target Atom Index
// queryMaps[0] = 5 ; means mapping number of query atom 0 is 5
// matchedSSSAtoms[0] = 4 ; means query Atom 0 matched on Target Atom 4
// targetMaps[0] = target.getAtomMapNo(matchedSSSAtoms[0]) => x;
// Get the corresponding mapping numbers of these matched target atoms
int[] targetMaps = getMapList(target, matchedSSSAtoms);
int index = mapList.length;
// Make room for more mapping numbers in the maplist
mapList = copyOf(mapList, mapList.length + matchedSSSAtoms.length);
// And append the mapping numbers
debug2("Query Map Arr\t: %s\n", ArrayUtils.toString(queryMaps));
debug2("Target Map Arr\t: %s\n", ArrayUtils.toString(targetMaps));
for (int k = 0; k < matchedSSSAtoms.length; k++) {
// System.out.printf("targetMap[%d] = %d queryMap[%d] = %d\n",k,targetMaps[k],k,queryMaps[k]);
if (targetMaps[k] != 0 && queryMaps[k] != 0) {
mapList[index++] = targetMaps[k];
}
}
// Remove the unmapped entries
mapList = removeZeros(mapList);
debug2("Query Map List\t: %s\n", ArrayUtils.toString(removeZeros(queryMaps)));
debug2("Target Map List\t: %s\n", ArrayUtils.toString(mapList));
ret.add(mapList);
}
// debug("Matched!\n");
} else {
ret = null; // signal not found!
// debug("Did not match!\n");
}
query.setFragment(fragment);
}
}
return ret;
}
public static boolean matchKeys(byte[] tK,byte[] qK)
{
if (qK == null || tK == null || qK.length != tK.length)
return false;
for (int i = 0; i < qK.length; i++) {
if (qK[i] > tK[i])
return false;
}
return true;
}
private static int getMaxSize(List foo)
{
int size = 0;
for (int[] k : foo) {
size = Math.max(size,k.length);
}
return size;
}
private static List getProducts(Reaction r)
{
List list = new ArrayList();
for (int i = 0; i < r.getProducts(); i++) {
list.add(r.getProduct(i));
}
return list;
}
private static List getReactants(Reaction r)
{
List list = new ArrayList();
for (int i = 0; i < r.getReactants(); i++) {
list.add(r.getReactant(i));
}
return list;
}
/*
Before adding
+++++++++++++++++++++++++++++++++++
sourceList listToAdd
___ ___ ___
|3| |7| |4|
|2| |8| |3|
|1| |9| |4|
___ |4| |5|
|5| |2|
___ ___
After adding
+++++++++++++++++++++++++++++++++++
sourceList listToAdd
___ ___
|3 |3|
|2| |2|
|1| |1|
|7| |4|
|8| |3|
|9| |4|
|4| |5|
|5| |2|
___ ___
*/
/**
* Append target list(s) to source list(s)
* if # of targets > 1 the source lists needs to be cloned n-1 times
* so effectively do the cross product
* Maybe we need a simpler solution
* However the final check for equality of reactant and product mapping is simple then
* @param sourceList
* @param listToAdd
* @return
*/
private static List add(List sourceList, List listToAdd)
{
int sizeofListToAdd = listToAdd.size();
int originalSourceListSize = sourceList.size();
if (sizeofListToAdd > 1) {
int sourceListSize = sourceList.size();
if (sourceListSize == 0) {
for (int[] t : listToAdd) {
sourceList.add(t);
}
} else {
// OK there are multiple lists to add
// clone the source list n-1 times
for (int i = 1; i < sizeofListToAdd; i++) {
for (int j = 0; j < sourceListSize; j++) {
int[] s = sourceList.get(j);
sourceList.add(s.clone());
}
}
// Add the elements of each list to add at the end of the sourcelist
for (int i = 0; i < sizeofListToAdd; i++) {
int[] t = listToAdd.get(i);
for (int j = 0; j < originalSourceListSize; j++) {
int index = i * originalSourceListSize + j;
int[] s = sourceList.get(index);
// Create a new array from the current array to hold t.length more elements
// and copy the s.length no of elements into it
int[] q = copyOf(s, s.length + t.length);
// Append the t array at the end
System.arraycopy(t, 0, q, s.length, t.length);
// Replace the original element in the list
sourceList.set(index, q);
}
}
}
} else if (sizeofListToAdd == 1) {
if (sourceList.size() == 0) {
sourceList.add(listToAdd.get(0));
} else {
int[] t = listToAdd.get(0);
for (int i = 0; i < sourceList.size(); i++) {
int[] s = sourceList.get(i);
int[] q = copyOf(s, s.length + t.length);
System.arraycopy(t, 0, q, s.length, t.length);
sourceList.set(i, q);
}
}
}
return sourceList;
}
private static int[] removeZeros(int[] array)
{
int count = 0;
int[] t = new int[array.length];
for (int i = 0; i < array.length; i++) {
if (array[i] != 0) {
t[count++] = array[i];
}
}
return copyOf(t,count);
}
// private static int[] sortAndRemoveZeros(int[] array)
// {
// Arrays.sort(array);
// int index = 0;
// for (int i = 0; i < array.length; i++) {
// if (array[i] == 0) {
// index++;
// }
// }
// return copyOfRange(array, index, array.length);
// }
/**
* Algorithm:
* (This is not 100% correct, but for now lets go with it)
* If query molecules are SSS of target molecules
* Find the (combined) mapping numbers for each side of the target reaction by using the SSS match list from each Q / T comparison
* If the mapping numbers on both sides of the target are equal then we have a match
*
* let ML={}
* for each query reactant:
* if query reactant matches SSS in target reactant
* let QL = matching atoms in target (matchlist)
* let AM = list of Atom Maps of QL
* let ML += AM
* end if
* end for
*
* let MP={}
* for each query product
* if query product matches SSS in target product
* let QL = matching atoms in target (matchlist)
* let AM = list of Atom Maps of QL
* let MP += AM
* end if
* end for
*
* ML = eliminate 0 map nos from ML and sort
* MP = eliminate 0 map nos from MP and sort
*
* if (ML == MP)
* => MATCH
* else
* -> NO MATCH
*/
static int[] getMapList(StereoMolecule m, int atoms[])
{
int[] ret = new int[atoms.length];
for (int i = 0; i < atoms.length; i++) {
ret[i] = m.getAtomMapNo(atoms[i]);
}
return ret;
}
/**
* Returns an array of mapping number for this molecule,
* the index into this array corresponds to the atom index in the molecule
* Note: Unmapped mapping numbers are included
*
* @param m
* @return
*/
static int[] getMapList(StereoMolecule m)
{
int atoms = m.getAllAtoms();
int[] ret = new int[atoms];
for (int i = 0; i < atoms; i++) {
ret[i] = m.getAtomMapNo(i);
}
return ret;
}
// static int getAtomByMap(StereoMolecule m, int mapNo)
// {
// int atoms = m.getAllAtoms();
// for (int i = 0; i < atoms; i++) {
// if (m.getAtomMapNo(i) == mapNo) {
// return i;
// }
// }
// return -1;
// }
private static class ListPermutator //implements Iterator?
{
int total;
int index = 0;
int count = 0;
List list;
ListPermutator(List list)
{
this.list = new ArrayList(list);
total = fac(list.size());
}
boolean hasNext()
{
return count < total;
}
List next()
{
if (count == 0) {
count++;
return list;
}
permute(list, index);
count++;
index = (index + 1) % (list.size() - 1);
return list;
}
private void permute(List arr, int k)
{
java.util.Collections.swap(arr, k, k + 1);
}
private int fac(int c)
{
if (c <= 1) {
return 1;
}
return c * fac(c - 1);
}
}
/** We need these since we need 1.5 compliance for ORACLE db i.e Cartridge */
private static int[] copyOf(int[] original, int newLength) {
int[] copy = new int[newLength];
System.arraycopy(original, 0, copy, 0,
Math.min(original.length, newLength));
return copy;
}
private static int[] copyOfRange(int[] original, int from, int to) {
int newLength = to - from;
if (newLength < 0)
throw new IllegalArgumentException(from + " > " + to);
int[] copy = new int[newLength];
System.arraycopy(original, from, copy, 0,
Math.min(original.length - from, newLength));
return copy;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy