
com.actelion.research.chem.chemicalspaces.ptree.search.FragmentPTreeSearch Maven / Gradle / Ivy
package com.actelion.research.chem.chemicalspaces.ptree.search;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Set;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.chemicalspaces.ptree.PharmTreeSynthonReactionHelper;
import com.actelion.research.chem.chemicalspaces.ptree.synthon.PharmTreeSynthon;
import com.actelion.research.chem.chemicalspaces.ptree.synthon.PharmTreeSynthonLibrary;
import com.actelion.research.chem.chemicalspaces.synthon.SynthonReactor;
import com.actelion.research.chem.descriptor.DescriptorHandlerSkeletonSpheres;
import com.actelion.research.chem.descriptor.pharmacophoretree.DescriptorHandlerPTree;
import com.actelion.research.chem.descriptor.pharmacophoretree.HungarianAlgorithm;
import com.actelion.research.chem.descriptor.pharmacophoretree.PharmacophoreNode;
import com.actelion.research.chem.descriptor.pharmacophoretree.PharmacophoreTree;
import com.actelion.research.chem.descriptor.pharmacophoretree.PharmacophoreTreeGenerator;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeMatcher;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeUtils;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeMatcher.FeatureMatch;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeMatcher.TreeMatching;
/**
* Based on: https://doi.org/10.1023/A:1011144622059
* Given a query molecule on one hand and lists of building blocks that can react according to given rules defined
* by a rxnHelper, the FragmentPTreeSearch searches the virtual space defined by the fragments and the reactions for compounds
* that have similar features compared to the query. The first step is the construction of the potential solution space,
* consisting of an edge-link table, which stores for every edge and direction in the query tree a list of matched fragments
* together with the score of the matchings, matched to the subtree of the query tree created by cutting this edge.
* Compatible fragments from the high-scoring solutions are reacted (product enumeration) and the similarity to the query is
* calculated. Solutions with similarities higher than a defined cutoff are stored and returned.
* Two Modes: Scaffold Hopping returns results with high similarity regarding PharmTree, Hit Expansion aims to find
* hits with high chemical similarity (SkelSpheres)
* In order to make the search more efficient, initially only a chemically diverse set of fragments is matched (cluster centroids).
* Only if the similarity of the cluster centroid is below a given threshold, the remaining members of the cluster are matched.
*
* @author Joel Wahl
*
*/
public class FragmentPTreeSearch {
// synthon id and linker id are merged into one integer, in order to have a fast lookup for compatible synthons
private static final int LINKER_ID_MASK = 7;
private static final int SYNTHON_ID_MASK = 56;
private static final int SYNTHON_SHIFT = 3;
public static final double RECURSION_BREAK_SIMILARITY = 0.5;//if similarity of extension match is below this, the recursion breaks
public static final double RECURSION_BREAK_SIZE_CUTOFF = 6;
private static final double SUBTREE_MATCHING_BUFFER = 0.3;
private PharmacophoreTree queryTree;
private PharmTreeSynthonLibrary synthonLib;
private PharmTreeSynthonReactionHelper rxnHelper;
private SearchResult[][] edgeLinkTable;
private double pTreeSimilarityCutoff;
private Map> linkerToSynthons; //maps the linkerID+synthonID to the corresponding trees, the index of the linker node and the index of the edge attached to the linker node are stored
private int[] cuts;
private int nBestSolutions;
public FragmentPTreeSearch(StereoMolecule queryMol,PharmacophoreTree queryTree, PharmTreeSynthonLibrary synthonLib,
double pTreeSimilarityCutoff) {
this.queryTree = queryTree;
this.rxnHelper = synthonLib.getReactionHelper();
this.pTreeSimilarityCutoff = pTreeSimilarityCutoff;
this.synthonLib = synthonLib;
cuts = new int[] {PharmacophoreTree.CUT_LEFT,PharmacophoreTree.CUT_RIGHT};
nBestSolutions = 2000;
linkerToSynthons = new HashMap>();
}
private void processFragments() {
Set allLinkers = new HashSet();
int highestLinkerID = 0;
rxnHelper.getReactantsWithLinkers().values().stream().forEach(e -> allLinkers.addAll(e));
for(int linkerID : allLinkers)
if(linkerID>highestLinkerID)
highestLinkerID = linkerID;
List> allSynthons = synthonLib.getSynthons();
edgeLinkTable = new SearchResult[highestLinkerID*allSynthons.size()][queryTree.getEdges().size()*2];
for(int i=0;i synthons = allSynthons.get(i);
for(int j=0;j());
Map map = linkerToSynthons.get(id);
map.put(synthon,new int[] {n,linkerEdge});
}
}
}
}
}
public Map search() {
LinkedHashMap hits = new LinkedHashMap();
processFragments();
for(int i=0;i querySourceTreeEdgeIndeces = new ArrayList();
List queryTargetTreeEdgeIndeces = new ArrayList();
List querySourceTreeEdgeParentIndeces = new ArrayList();
List queryTargetTreeEdgeParentIndeces = new ArrayList();
int [] headNodes = queryTree.initialCut(cuts[0],i,querySourceTreeEdgeIndeces, querySourceTreeEdgeParentIndeces,
queryTargetTreeEdgeIndeces , queryTargetTreeEdgeParentIndeces);
for(int l : linkerToSynthons.keySet()) {
int synthonID = (l & SYNTHON_ID_MASK)>>SYNTHON_SHIFT ;
int linkerID = l & LINKER_ID_MASK;
for(int cutDirIndex=0;cutDirIndex querySubTreeEdgeIndeces;
List querySubTreeEdgeParentIndeces;
int queryCutDir;
int querySubTreeHeadNode;
//match to source side of cut query tree
if(cuts[cutDirIndex] == PharmacophoreTree.CUT_LEFT) {
querySubTreeEdgeIndeces = querySourceTreeEdgeIndeces;
querySubTreeEdgeParentIndeces = querySourceTreeEdgeParentIndeces;
queryCutDir = PharmacophoreTree.CUT_LEFT;
querySubTreeHeadNode = headNodes[0];
}
else {
//match to target side of cut query tree
querySubTreeEdgeIndeces = queryTargetTreeEdgeIndeces;
querySubTreeEdgeParentIndeces = queryTargetTreeEdgeParentIndeces;
queryCutDir = PharmacophoreTree.CUT_RIGHT;
querySubTreeHeadNode = headNodes[1];
}
result = new SearchResult();
Map treeToLinkerHead = linkerToSynthons.get(l);
for(PharmTreeSynthon synthon: treeToLinkerHead.keySet()) {
FragmentMatching matching = matchFragmentSubtree(synthon, treeToLinkerHead,synthonID, linkerID, queryCutDir,
querySubTreeHeadNode, i, querySubTreeEdgeIndeces,
querySubTreeEdgeParentIndeces);
if(matching.sim > (pTreeSimilarityCutoff-SUBTREE_MATCHING_BUFFER)) {
//similarity of cluster centroid below threshold
result.addResult(matching);
}
}
if(result.getResults().size()> nBestSolutions) { //prune matchings, store only best solutionsJu
List prunedMatchings = result.getResults().subList(0, nBestSolutions);
result.setResult(prunedMatchings);
}
edgeLinkTable[(linkerID-1)*synthonLib.getSynthons().size()+synthonID][2*i+cutDirIndex] = result;
}
}
}
}
getHits(hits);
hits = hits.entrySet().stream().sorted(Map.Entry.comparingByValue().reversed()).collect(Collectors.toMap(Map.Entry::getKey,Map.Entry::getValue,
(e1, e2) -> e1, LinkedHashMap::new));
return hits;
}
private FragmentMatching matchFragmentSubtree(PharmTreeSynthon synthon, Map treeToLinkerHead,
int synthonID, int linkerID, int queryCutDir, int queryTreeHeadNode, int queryCutEdge, List querySubTreeEdgeIndeces,
List querySubTreeEdgeParentIndeces){
int fragTreeHeadNode = treeToLinkerHead.get(synthon)[0];
List fragmentTreeEdgeIndeces = new ArrayList();
List fragmentTreeEdgeParentIndeces = new ArrayList();
PharmacophoreTree fragmentTree = synthon.getPharmTree();
fragTreeHeadNode = processFragmentTree(fragTreeHeadNode,fragmentTree,fragmentTreeEdgeIndeces,fragmentTreeEdgeParentIndeces);
FragmentMatchSearch matchSearch;
//match to source side of cut query tree
matchSearch = new FragmentMatchSearch(this, queryTree, synthon, synthonID, linkerID, queryTreeHeadNode, fragTreeHeadNode, queryCutEdge,
treeToLinkerHead.get(synthon)[1],queryCutDir,querySubTreeEdgeIndeces, fragmentTreeEdgeIndeces,
querySubTreeEdgeParentIndeces, fragmentTreeEdgeParentIndeces);
return matchSearch.matchSearch();
}
public void getHits(LinkedHashMap hits) {
final double buffer = 0.05;
for(int i=0;i> solutions = constructSolutions(result);
int linkerID = (i/synthonLib.getSynthons().size()) + 1;
int synthonID = i%synthonLib.getSynthons().size();
// find compatible matches
int compatibleCutDirIndex = cutDirIndex == 0 ? 1 : 0;
for(int k=0;k> compatibleSolutions = constructSolutions(compatibleResult);
for(List solution1 : solutions) {
double bestScore = 0.0;
for(List solution2 : compatibleSolutions) {
List combinedSolution = new ArrayList();
combinedSolution.addAll(solution1);
combinedSolution.addAll(solution2);
double sim = getTotalSimilarity(combinedSolution);
if(sim<(pTreeSimilarityCutoff-buffer))
break;
else {
List reactants = combinedSolution.stream().map(r -> r.synthon.getStructure()).collect(Collectors.toList());
if(reactants.size()!=synthonLib.getSynthons().size())
continue;
StereoMolecule product = SynthonReactor.react(reactants);
boolean accept = true;
if(accept) {
StringBuilder resultString = new StringBuilder();
resultString.append(product.getIDCode());
resultString.append("____");
combinedSolution.stream().forEach(r -> {
resultString.append(r.synthon.getId());
resultString.append("____");
});
String rs = resultString.toString();
if(hits.containsKey(rs)) {
double oldSim = hits.get(rs);
if(oldSimbestScore) {
bestScore = sim;
}
}
}
}
}
}
}
}
}
private List> constructSolutions(SearchResult result) {
List> allSolutions = new ArrayList>();
List matchings = result.getResults();
for(FragmentMatching fm : matchings) {
List> solutionSet = new ArrayList>();
List solution = new ArrayList();
solution.add(fm);
solutionSet.add(solution);
if(fm.getFurtherMatches()!=null) {
Map> furtherMatches = fm.furtherMatchings;
for(int key : furtherMatches.keySet()) {
List> toBeDeleted = new ArrayList>();
List> toBeAdded = new ArrayList>();
for(List oneSolution : solutionSet) {
toBeDeleted.add(oneSolution);
for(FragmentMatching ffm : furtherMatches.get(key)) {
List newSolution = new ArrayList(oneSolution);
newSolution.add(ffm);
toBeAdded.add(newSolution);
}
}
solutionSet.removeAll(toBeDeleted);
solutionSet.addAll(toBeAdded);
}
}
allSolutions.addAll(solutionSet);
}
return allSolutions;
}
/**
*
* @param headNode
* @param treeEdges
* @param treeEdgeParents
*/
private int processFragmentTree(int linkerNode, PharmacophoreTree fragmentTree, List treeEdges,List treeEdgeParents) {
int cutEdge = -1;
int headNode = -1;
for(int e=0;e matchCompatibleFragments(PharmacophoreTree queryTree, int querySubTreeHeadNode,
int querySubtreeCutEdge, int querySubtreeCutEdgeDir,int linkerID, int fragmentID) {
List querySubTreeEdgeIndeces = new ArrayList();
List querySubTreeEdgeParentIndeces = new ArrayList();
queryTree.treeWalkBFS(querySubTreeHeadNode, querySubtreeCutEdge, querySubTreeEdgeIndeces, querySubTreeEdgeParentIndeces);
List allCompatibleMatchings = new ArrayList();
int cutDirIndex = querySubtreeCutEdgeDir == cuts[0] ? 0: 1;
for(int i=0;i matchings = new ArrayList();
if(fragmentID==i) //fragments with same ID are not compatible
continue;
int index = (i< compatibleTrees = linkerToSynthons.get(index);
if(compatibleTrees==null)
continue;
SearchResult result = edgeLinkTable[(linkerID-1)*synthonLib.getSynthons().size()+i][2*querySubtreeCutEdge+cutDirIndex];
if(result!=null)
matchings = result.getResults();
else {
for(PharmTreeSynthon synthon : compatibleTrees.keySet()) {
PharmacophoreTree fragTree = synthon.getPharmTree();
List fragmentTreeEdgeIndeces = new ArrayList();
List fragmentTreeEdgeParentIndeces = new ArrayList();
int[] res = compatibleTrees.get(synthon);
int fragmentHeadLinkerNode = res[0];
int fragmentLinkerEdge = res[1];
int[] linkerEdge = fragTree.getEdges().get(fragmentLinkerEdge);
int fragmentHeadNode = linkerEdge[0] == fragmentHeadLinkerNode ? linkerEdge[1] : linkerEdge [0];
fragTree.treeWalkBFS(fragmentHeadNode, fragmentLinkerEdge,
fragmentTreeEdgeIndeces, fragmentTreeEdgeParentIndeces);
FragmentMatchSearch fms = new FragmentMatchSearch(this,queryTree, synthon,i, linkerID, querySubTreeHeadNode,fragmentHeadNode,
querySubtreeCutEdge, fragmentLinkerEdge,querySubtreeCutEdgeDir,querySubTreeEdgeIndeces,fragmentTreeEdgeIndeces,
querySubTreeEdgeParentIndeces,fragmentTreeEdgeParentIndeces);
FragmentMatching matching = fms.matchSearch();
matching.calculate();
matchings.add(matching);
}
matchings.sort((e1,e2) -> {
return Double.compare(e2.sim, e1.sim);}); // reverse order;
if(matchings.size()> nBestSolutions)
matchings = matchings.subList(0,nBestSolutions);
SearchResult sr = new SearchResult();
sr.setResult(matchings);
edgeLinkTable[(linkerID-1)*synthonLib.getSynthons().size()+i][2*querySubtreeCutEdge+cutDirIndex] = sr;
}
allCompatibleMatchings.addAll(matchings);
}
return allCompatibleMatchings;
}
private double getTotalSimilarity(List matchings) { //calculate total similarity from a list of matchings
double sim = 0.0;
double size1 = 0.0;
double size2 = 0.0;
for(FragmentMatching matching : matchings) {
for(FeatureMatch match : matching.getTreeMatching().getMatches()) {
double[] sizes = match.getSizes();
double s = match.getSim();
sim +=(sizes[0]+sizes[1])*s;
size1+=sizes[0];
size2+=sizes[1];
}
}
return 0.5*sim/
((TreeMatcher.NULL_MATCH_SCALING*Math.max(size1, size2)+(1.0-TreeMatcher.NULL_MATCH_SCALING)*Math.min(size1, size2)));
}
public static class FragmentMatchSearch {
public static final int EXTENSION_MATCHES = 3; //number of explicitly considered extension matches at every recursion step
public static final double ALPHA = 0.8; //weighting of source-tree match vs extension-tree match, takes values from 0 to 1
public static final double NULL_MATCH_SCALING = 0.3;
public static final double SIMILARITY_SCALING_SPLIT_SCORE = 0.6;
public static final double MATCH_BALANCE = 2.0; //named beta in the original publication
public static final double MATCH_SIZE_LIMIT = 3.0;
public static final int MATCH_NODE_NR_LIMIT = 2;
public static final int EXTENSION_MATCH_NODE_NR_LIMIT = 3;
private PharmacophoreTree queryTree;
private PharmacophoreTree fragmentTree;
private PharmTreeSynthon synthon;
private int fragmentTreeSynthonID;
private int fragmentTreeLinkerID;
private int queryTreeHeadNode;
private int cutEdgeQueryTree;
private int cutEdgeFragmentTree;
private int cutDirQueryTree;
private int cutDirFragmentTree;
private List querySubTreeEdgeIndeces;
private List querySubTreeEdgeParentIndeces;
private int fragTreeHeadNode;
private List fragTreeEdgeIndeces;
private List fragTreeEdgeParentIndeces;
private TreeMatching[][] dpMatchMatrix;
private FragmentPTreeSearch pTreeSearch;
private List queryNodes;
private List fragmentNodes;
public FragmentMatchSearch(FragmentPTreeSearch pTreeSearch, PharmacophoreTree queryTree, PharmTreeSynthon synthon, int fragmentTreeSynthonID, int fragmentTreeLinkerID, int queryTreeHeadNode, int fragTreeHeadNode, int cutEdgeQueryTree,
int cutEdgeFragmentTree, int cutDirQueryTree, List querySubTreeEdgeIndeces, List fragTreeEdgeIndeces,
List querySubTreeEdgeParentIndeces, List fragTreeEdgeParentIndeces) {
this.queryTree = queryTree;
this.synthon = synthon;
this.queryTreeHeadNode = queryTreeHeadNode;
this.cutEdgeQueryTree = cutEdgeQueryTree;
this.cutEdgeFragmentTree = cutEdgeFragmentTree;
this.cutDirQueryTree = cutDirQueryTree;
this.querySubTreeEdgeIndeces = querySubTreeEdgeIndeces;
this.querySubTreeEdgeParentIndeces = querySubTreeEdgeParentIndeces;
this.fragTreeHeadNode = fragTreeHeadNode;
this.fragTreeEdgeIndeces = fragTreeEdgeIndeces;
this.fragTreeEdgeParentIndeces = fragTreeEdgeParentIndeces ;
this.fragmentTreeSynthonID = fragmentTreeSynthonID;
this.fragmentTreeLinkerID = fragmentTreeLinkerID;
this.pTreeSearch = pTreeSearch;
this.queryNodes = queryTree.getNodes();
this.fragmentTree = synthon.getPharmTree();
this.fragmentNodes = fragmentTree.getNodes();
if(fragmentTree.getEdges().get(cutEdgeFragmentTree)[1] == fragTreeHeadNode)
cutDirFragmentTree = PharmacophoreTree.CUT_RIGHT;
else
cutDirFragmentTree = PharmacophoreTree.CUT_LEFT;
dpMatchMatrix = new TreeMatching[2*queryTree.getEdges().size()][2*fragmentTree.getEdges().size()];
}
public FragmentMatching matchSearch() {
return recMatchSearch(queryTreeHeadNode,fragTreeHeadNode,cutEdgeQueryTree,cutEdgeFragmentTree,cutDirQueryTree,cutDirFragmentTree,
querySubTreeEdgeIndeces,fragTreeEdgeIndeces,querySubTreeEdgeParentIndeces,fragTreeEdgeParentIndeces);
}
/**
* /recursive part of match-search algorithm, as described in:
* https://doi.org/10.1023/A:1008068904628
* The algorithm uses a dynamic-programming approach, whereby the results of matching subtrees are stored in a matrix
* and can be reused for increased performance
* @param headNode1
* @param headNode2
* @param cutEdge1
* @param cutEdge2
* @param cutDir1
* @param cutDir2
* @param subTreeEdgeIndeces1
* @param subTreeEdgeIndeces2
* @param subTreeEdgeParentIndeces1
* @param subTreeEdgeParentIndeces2
* @return
*/
private FragmentMatching recMatchSearch(int headNode1, int headNode2, int cutEdge1, int cutEdge2, int cutDir1, int cutDir2,List subTreeEdgeIndeces1,
List subTreeEdgeIndeces2,List subTreeEdgeParentIndeces1, List subTreeEdgeParentIndeces2) {
TreeMatching treeMatching = new TreeMatching();
FragmentMatching fragmentMatching = new FragmentMatching(synthon, fragmentTreeSynthonID, fragmentTreeLinkerID );
int index1 = cutDir1 == PharmacophoreTree.CUT_LEFT ? cutEdge1*2 : cutEdge1*2+1;
int index2 = cutDir2 == PharmacophoreTree.CUT_LEFT ? cutEdge2*2 : cutEdge2*2+1;
Set nodes1 = queryTree.getNodesFromEdges(subTreeEdgeIndeces1);
nodes1.add(headNode1);
Set nodes2 = fragmentTree.getNodesFromEdges(subTreeEdgeIndeces2);
nodes2.add(headNode2);
if(fragmentTree.getNodes().get(headNode2).isLinkNode() && nodes2.size()==1) { //node is link node -> needs a new call for a match search
// look for fragments that have compatible links!
int linkerID = fragmentTree.getNodes().get(headNode2).getFunctionalities()[0];
List matchings = pTreeSearch.matchCompatibleFragments(queryTree, headNode1,
cutEdge1, cutDir1,linkerID,fragmentTreeSynthonID);
fragmentMatching.setTreeMatching(treeMatching);
fragmentMatching.addFurtherMatches(linkerID, matchings);
}
else {
if(dpMatchMatrix[index1][index2]!= null) {
//result found in dynamic-programing matrix
treeMatching = dpMatchMatrix[index1][index2];
fragmentMatching.setTreeMatching(treeMatching);
}
else {
//check if match fulfills criteria, if not, create extension match
List matches = assessMatch(nodes1,nodes2);
if(matches!=null) {
treeMatching = new TreeMatching();
for(FeatureMatch fmatch : matches)
treeMatching.addFeatureMatch(fmatch);
treeMatching.calculate();
dpMatchMatrix[index1][index2] = treeMatching;
fragmentMatching.setTreeMatching(treeMatching);
}
else { // create extension match
List cuts1 = queryTree.getExtensionCuts(subTreeEdgeIndeces1,subTreeEdgeParentIndeces1);
List cuts2 = fragmentTree.getExtensionCuts(subTreeEdgeIndeces2,subTreeEdgeParentIndeces2);
double[][] scores = new double[cuts1.size()][cuts2.size()];
for(int i=0;i extensionNodes1 = new HashSet();
Set sourceNodes1 = new HashSet();
queryTree.enumerateExtensionCutFast(headNode1,cut1, subTreeEdgeIndeces1,
extensionNodes1, sourceNodes1);
for(int j=0;j extensionNodes2 = new HashSet();
Set sourceNodes2 = new HashSet();
fragmentTree.enumerateExtensionCutFast(headNode2,cut2, subTreeEdgeIndeces2,
extensionNodes2, sourceNodes2);
scores[i][j] = scoreExtensionMatch(queryTree,fragmentTree,extensionNodes1, extensionNodes2,
sourceNodes1, sourceNodes2);
}
}
int[][] bestCuts = new int[cuts1.size()*cuts2.size()][2];
double[] bestScores = new double[cuts1.size()*cuts2.size()];
TreeUtils.retrieveHighestValuesFrom2DArray(scores, bestScores, bestCuts);
double bestScore = -Double.MAX_VALUE;
FragmentMatching bestMatching = null;
int counter = 0;
// fully enumerate best extension cuts and find the one with the best match
for(int[] cut:bestCuts) {
if(counter>EXTENSION_MATCHES)
break;
if(cut[0]==-1 || cut[1]==-1)
continue;
int[] cut1 = cuts1.get(cut[0]);
int[] cut2 = cuts2.get(cut[1]);
List> sourceTreeEdgeIndeces1 = new ArrayList>();
List> sourceTreeEdgeParentIndeces1 = new ArrayList>();
List sourceTreeHeadNodes1 = new ArrayList();
Set extensionNodes1 = new HashSet();
List cutEdges1 = new ArrayList();
List cutDirs1 = new ArrayList();
queryTree.enumerateExtensionCutFull(headNode1,cut1, subTreeEdgeIndeces1,
subTreeEdgeParentIndeces1, sourceTreeEdgeIndeces1,sourceTreeEdgeParentIndeces1,
sourceTreeHeadNodes1,extensionNodes1, cutEdges1, cutDirs1);
List> sourceTreeEdgeIndeces2 = new ArrayList>();
List> sourceTreeEdgeParentIndeces2 = new ArrayList>();
List sourceTreeHeadNodes2 = new ArrayList();
Set extensionNodes2 = new HashSet();
List cutEdges2 = new ArrayList();
List cutDirs2 = new ArrayList();
fragmentTree.enumerateExtensionCutFull(headNode2,cut2, subTreeEdgeIndeces2,
subTreeEdgeParentIndeces2, sourceTreeEdgeIndeces2,sourceTreeEdgeParentIndeces2,
sourceTreeHeadNodes2,extensionNodes2, cutEdges2, cutDirs2);
FeatureMatch extensionMatch = assessExtensionMatch(extensionNodes1,extensionNodes2);
if(extensionMatch==null)
continue;
counter++;
FragmentMatching[][] sourceTreeMatches = new FragmentMatching[sourceTreeHeadNodes1.size()][sourceTreeHeadNodes2.size()];
double[][] sourceTreeScores = new double[sourceTreeHeadNodes1.size()][sourceTreeHeadNodes2.size()];
for(int i=0;i 0 && sourceTreeScores[0].length>0) {
if (sourceTreeScores.length > sourceTreeScores[0].length)
{ //Cols must be >= Rows.
sourceTreeScores = HungarianAlgorithm.transpose(sourceTreeScores);
transpose = true;
}
if(sourceTreeScores.length>0 && sourceTreeScores[0].length>0);
assignment = HungarianAlgorithm.hgAlgorithm(sourceTreeScores, "max");
if(transpose) {
sourceTreeScores = HungarianAlgorithm.transpose(sourceTreeScores);
for(int a=0;a matchedSourceTrees1 = new HashSet();
Set matchedSourceTrees2 = new HashSet();
FragmentMatching extensionMatching = new FragmentMatching(synthon,fragmentTreeSynthonID,fragmentTreeLinkerID);
TreeMatching extensionTreeMatching = new TreeMatching();
extensionMatching.setTreeMatching(extensionTreeMatching);
extensionTreeMatching.addFeatureMatch(extensionMatch);
for(int i=0;i());
extensionTreeMatching.addFeatureMatch(nullMatch);
}
}
for(int j=0;j(),
sourceTreeHeadNodes2.get(j),sourceTreeEdgeIndeces2.get(j));
extensionTreeMatching.addFeatureMatch(nullMatch);
}
}
extensionTreeMatching.calculate();
extensionMatching.calculate();
double extensionScore = extensionMatching.sim;
if(extensionScore>=bestScore) {
bestScore = extensionScore;
bestMatching = extensionMatching;
}
}
fragmentMatching = bestMatching;
}
}
}
fragmentMatching.calculate();
return fragmentMatching;
}
/**
* accept match if:
* match is a nullMatch
* least one of the subtrees has a size of less than 3 atoms AND trees are balanced or
* at least one of the subtrees contains only one node AND trees are balanced
* if the number of nodes and size criterion is fulfilled, but the trees are not balanced:
* null-matches are formed!
* @return
*/
private List assessMatch(Set nodes1,
Set nodes2) {
List matches = null;
double size1 = getSizeOfNodeSet(nodes1,queryTree);
double size2 = getSizeOfNodeSet(nodes2,fragmentTree);
boolean balanced = isMatchBalanced(size1,size2);
boolean containsLinkNodes = false; //if one of the two collections contain a link nodes, they cannot be matched directly
for(int n : nodes2) {
if(fragmentTree.getNodes().get(n).isLinkNode()) {
containsLinkNodes = true;
break;
}
}
if(!containsLinkNodes) {
if ((size1();
matches.add(getMatch(nodes1, nodes2));
}
else {
matches = new ArrayList();
matches.add(getMatch(nodes1,new HashSet()));
matches.add(getMatch(new HashSet(),nodes2));
}
}
}
return matches;
}
private FeatureMatch assessExtensionMatch( Set nodes1,
Set nodes2) {
FeatureMatch match = null;
double size1, size2;
size1 = getSizeOfNodeSet(nodes1,queryTree);
size2 = getSizeOfNodeSet(nodes2,fragmentTree);
boolean containsLinkNodes = false; //if one of the two collections contain a link nodes, they cannot be matched directly
for(int n : nodes2) {
if(fragmentTree.getNodes().get(n).isLinkNode()) {
containsLinkNodes = true;
break;
}
}
if(!containsLinkNodes) {
if(nodes1.size()!=0 && nodes2.size()!=0) {
if ((size1 subTreeEdgeIndeces1,
int headNode2,List subTreeEdgeIndeces2) {
FeatureMatch m = null;
int[][] match = new int[2][];
if(headNode1==-1) {
Set nodes2 = fragmentTree.getNodesFromEdges(subTreeEdgeIndeces2);
nodes2.add(headNode2);
match[0] = new int[0];
match[1] = nodes2.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
}
else if(headNode2==-1) {
Set nodes1 = queryTree.getNodesFromEdges(subTreeEdgeIndeces1);
nodes1.add(headNode1);
match[1] = new int[0];
match[0] = nodes1.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
}
else {
Set nodes1 = queryTree.getNodesFromEdges(subTreeEdgeIndeces1);
nodes1.add(headNode1);
Set nodes2 = fragmentTree.getNodesFromEdges(subTreeEdgeIndeces2);
nodes2.add(headNode2);
match[0] = nodes1.stream().mapToInt(x -> x).toArray();
match[1] = nodes2.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
}
return m;
}
private FeatureMatch getMatch(Set nodes1,Set nodes2) {
FeatureMatch m = null;
int[][] match = new int[2][];
match[0] = nodes1.stream().mapToInt(x -> x).toArray();
match[1] = nodes2.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
return m;
}
private double scoreExtensionMatch(PharmacophoreTree pTree1, PharmacophoreTree pTree2, Set extensionNodes1,
Set extensionNodes2, Set sourceNodes1, Set sourceNodes2) {
double extensionScore = 0.0;
double sourceScore = 0.0;
extensionScore = PharmacophoreNode.getSimilarity(extensionNodes1, extensionNodes2, queryNodes, fragmentNodes);
sourceScore = PharmacophoreNode.getSimilarity(sourceNodes1, sourceNodes2, queryNodes, fragmentNodes);
return ALPHA*extensionScore+(1-ALPHA)*sourceScore;
}
private static boolean isMatchBalanced(double size1,double size2) {
boolean isBalanced = true;
double ratio = size1/size2;
if(ratio > MATCH_BALANCE || ratio < 1.0/MATCH_BALANCE)
isBalanced = false;
return isBalanced;
}
public static double getSizeOfNodeSet(Set nodes, PharmacophoreTree pTree) {
double size = 0;
List n = pTree.getNodes(nodes);
for(PharmacophoreNode node : n)
size += node.getSize();
return size;
}
public static int[] getFunctionalitiesOfNodeSet(Set nodes, PharmacophoreTree pTree) {
int[] functionalities = new int[PharmacophoreNode.FUNCTIONALITY_WEIGHTS.length];
List n = pTree.getNodes(nodes);
for(PharmacophoreNode node : n) {
int[] functionalities2 = node.getFunctionalities();
for(int i=0;i> furtherMatchings;
private double sim;
private double size1;
private double size2;
public FragmentMatching(PharmTreeSynthon synthon, int synthonID, int linkerID) {
this.synthon = synthon;
this.fragmentSynthonID = synthonID;
this.fragmentLinkerID = linkerID;
furtherMatchings = new HashMap>();
}
public void addFragmentMatch(FragmentMatching fragmentMatching) {
if(fragmentMatching.treeMatching != null)
treeMatching.addMatching(fragmentMatching.treeMatching);
if(fragmentMatching.furtherMatchings!=null) {
fragmentMatching.furtherMatchings.forEach((key,value) -> furtherMatchings.merge(key, value, (v1,v2) -> {
v1.addAll(v2);
return v1;}));
}
}
public void setTreeMatching(TreeMatching treeMatching) {
this.treeMatching = treeMatching;
}
public TreeMatching getTreeMatching() {
return treeMatching;
}
public void addFurtherMatch(int linkerID, FragmentMatching fragmentMatching) {
furtherMatchings.putIfAbsent(linkerID, new ArrayList());
furtherMatchings.get(linkerID).add(fragmentMatching);
}
public void addFurtherMatches(int linkerID, List fragmentMatchings) {
furtherMatchings.putIfAbsent(linkerID, new ArrayList());
furtherMatchings.get(linkerID).addAll(fragmentMatchings);
}
public Map> getFurtherMatches() {
return furtherMatchings;
}
public int getFragmentLinkerID() {
return fragmentLinkerID;
}
public int getFragmentSynthonID() {
return fragmentSynthonID;
}
public PharmTreeSynthon getFragmentPTree() {
return this.synthon;
}
public void calculate() {
if(furtherMatchings.keySet().size()==0) {
sim = treeMatching.getSim();
size1 = treeMatching.getSize1();
size2 = treeMatching.getSize2();
}
else {
sim = 0.0;
size1 = 0.0;
size2 = 0.0;
for(FeatureMatch match : treeMatching.getMatches()) {
double[] sizes = match.getSizes();
double s = match.getSim();
sim +=(sizes[0]+sizes[1])*s;
size1+=sizes[0];
size2+=sizes[1];
}
for(int linkerID : furtherMatchings.keySet()) {
if(furtherMatchings.get(linkerID).size()==0)
continue;
FragmentMatching furtherMatch = furtherMatchings.get(linkerID).get(0); //calculate from highest scored additional match
for(FeatureMatch match : furtherMatch.treeMatching.getMatches()) {
double[] sizes = match.getSizes();
double s = match.getSim();
sim +=(sizes[0]+sizes[1])*s;
size1+=sizes[0];
size2+=sizes[1];
}
}
sim = 0.5*sim/
((TreeMatcher.NULL_MATCH_SCALING*Math.max(size1, size2)+(1.0-TreeMatcher.NULL_MATCH_SCALING)*Math.min(size1, size2)));
if(size1==0 && size2==0)
sim = 0.0;
}
}
}
public static class SearchResult {
List results;
private SearchResult() {
results = new ArrayList();
}
public List getResults() {
return results;
}
public void setResult(List results) {
this.results = results;
}
public FragmentMatching getResult(int index) {
return results.get(index);
}
public void addResult(List results) {
this.results.addAll(results);
this.results.sort((c1,c2) -> {
return Double.compare(c2.sim,c1.sim);} //reverse order
);
}
public void addResult(FragmentMatching result) {
results.add(result);
results.sort((c1,c2) -> {
return Double.compare(c2.sim,c1.sim);} //reverse order
);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy