com.actelion.research.chem.chemicalspaces.ptree.search.FragmentPTreeSearch Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
package com.actelion.research.chem.chemicalspaces.ptree.search;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Set;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.chemicalspaces.ptree.PharmTreeSynthonReactionHelper;
import com.actelion.research.chem.chemicalspaces.ptree.synthon.PharmTreeSynthon;
import com.actelion.research.chem.chemicalspaces.ptree.synthon.PharmTreeSynthonLibrary;
import com.actelion.research.chem.chemicalspaces.synthon.SynthonReactor;
import com.actelion.research.chem.descriptor.DescriptorHandlerSkeletonSpheres;
import com.actelion.research.chem.descriptor.pharmacophoretree.DescriptorHandlerPTree;
import com.actelion.research.chem.descriptor.pharmacophoretree.HungarianAlgorithm;
import com.actelion.research.chem.descriptor.pharmacophoretree.PharmacophoreNode;
import com.actelion.research.chem.descriptor.pharmacophoretree.PharmacophoreTree;
import com.actelion.research.chem.descriptor.pharmacophoretree.PharmacophoreTreeGenerator;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeMatcher;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeUtils;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeMatcher.FeatureMatch;
import com.actelion.research.chem.descriptor.pharmacophoretree.TreeMatcher.TreeMatching;
/**
* Based on: https://doi.org/10.1023/A:1011144622059
* Given a query molecule on one hand and lists of building blocks that can react according to given rules defined
* by a rxnHelper, the FragmentPTreeSearch searches the virtual space defined by the fragments and the reactions for compounds
* that have similar features compared to the query. The first step is the construction of the potential solution space,
* consisting of an edge-link table, which stores for every edge and direction in the query tree a list of matched fragments
* together with the score of the matchings, matched to the subtree of the query tree created by cutting this edge.
* Compatible fragments from the high-scoring solutions are reacted (product enumeration) and the similarity to the query is
* calculated. Solutions with similarities higher than a defined cutoff are stored and returned.
* Two Modes: Scaffold Hopping returns results with high similarity regarding PharmTree, Hit Expansion aims to find
* hits with high chemical similarity (SkelSpheres)
* In order to make the search more efficient, initially only a chemically diverse set of fragments is matched (cluster centroids).
* Only if the similarity of the cluster centroid is below a given threshold, the remaining members of the cluster are matched.
*
* @author Joel Wahl
*
*/
public class FragmentPTreeSearch {
// synthon id and linker id are merged into one integer, in order to have a fast lookup for compatible synthons
private static final int LINKER_ID_MASK = 7;
private static final int SYNTHON_ID_MASK = 56;
private static final int SYNTHON_SHIFT = 3;
public static final double RECURSION_BREAK_SIMILARITY = 0.5;//if similarity of extension match is below this, the recursion breaks
public static final double RECURSION_BREAK_SIZE_CUTOFF = 6;
private static final double SUBTREE_MATCHING_BUFFER = 0.3;
private PharmacophoreTree queryTree;
private PharmTreeSynthonLibrary synthonLib;
private PharmTreeSynthonReactionHelper rxnHelper;
private SearchResult[][] edgeLinkTable;
private double pTreeSimilarityCutoff;
private Map> linkerToSynthons; //maps the linkerID+synthonID to the corresponding trees, the index of the linker node and the index of the edge attached to the linker node are stored
private int[] cuts;
private int nBestSolutions;
public FragmentPTreeSearch(StereoMolecule queryMol,PharmacophoreTree queryTree, PharmTreeSynthonLibrary synthonLib,
double pTreeSimilarityCutoff) {
this.queryTree = queryTree;
this.rxnHelper = synthonLib.getReactionHelper();
this.pTreeSimilarityCutoff = pTreeSimilarityCutoff;
this.synthonLib = synthonLib;
cuts = new int[] {PharmacophoreTree.CUT_LEFT,PharmacophoreTree.CUT_RIGHT};
nBestSolutions = 2000;
linkerToSynthons = new HashMap>();
}
private void processFragments() {
Set allLinkers = new HashSet();
int highestLinkerID = 0;
rxnHelper.getReactantsWithLinkers().values().stream().forEach(e -> allLinkers.addAll(e));
for(int linkerID : allLinkers)
if(linkerID>highestLinkerID)
highestLinkerID = linkerID;
List> allSynthons = synthonLib.getSynthons();
edgeLinkTable = new SearchResult[highestLinkerID*allSynthons.size()][queryTree.getEdges().size()*2];
for(int i=0;i synthons = allSynthons.get(i);
for(int j=0;j());
Map map = linkerToSynthons.get(id);
map.put(synthon,new int[] {n,linkerEdge});
}
}
}
}
}
public Map search() {
LinkedHashMap hits = new LinkedHashMap();
processFragments();
for(int i=0;i querySourceTreeEdgeIndeces = new ArrayList();
List queryTargetTreeEdgeIndeces = new ArrayList();
List querySourceTreeEdgeParentIndeces = new ArrayList();
List queryTargetTreeEdgeParentIndeces = new ArrayList();
int [] headNodes = queryTree.initialCut(cuts[0],i,querySourceTreeEdgeIndeces, querySourceTreeEdgeParentIndeces,
queryTargetTreeEdgeIndeces , queryTargetTreeEdgeParentIndeces);
for(int l : linkerToSynthons.keySet()) {
int synthonID = (l & SYNTHON_ID_MASK)>>SYNTHON_SHIFT ;
int linkerID = l & LINKER_ID_MASK;
for(int cutDirIndex=0;cutDirIndex querySubTreeEdgeIndeces;
List querySubTreeEdgeParentIndeces;
int queryCutDir;
int querySubTreeHeadNode;
//match to source side of cut query tree
if(cuts[cutDirIndex] == PharmacophoreTree.CUT_LEFT) {
querySubTreeEdgeIndeces = querySourceTreeEdgeIndeces;
querySubTreeEdgeParentIndeces = querySourceTreeEdgeParentIndeces;
queryCutDir = PharmacophoreTree.CUT_LEFT;
querySubTreeHeadNode = headNodes[0];
}
else {
//match to target side of cut query tree
querySubTreeEdgeIndeces = queryTargetTreeEdgeIndeces;
querySubTreeEdgeParentIndeces = queryTargetTreeEdgeParentIndeces;
queryCutDir = PharmacophoreTree.CUT_RIGHT;
querySubTreeHeadNode = headNodes[1];
}
result = new SearchResult();
Map treeToLinkerHead = linkerToSynthons.get(l);
for(PharmTreeSynthon synthon: treeToLinkerHead.keySet()) {
FragmentMatching matching = matchFragmentSubtree(synthon, treeToLinkerHead,synthonID, linkerID, queryCutDir,
querySubTreeHeadNode, i, querySubTreeEdgeIndeces,
querySubTreeEdgeParentIndeces);
if(matching.sim > (pTreeSimilarityCutoff-SUBTREE_MATCHING_BUFFER)) {
//similarity of cluster centroid below threshold
result.addResult(matching);
}
}
if(result.getResults().size()> nBestSolutions) { //prune matchings, store only best solutionsJu
List prunedMatchings = result.getResults().subList(0, nBestSolutions);
result.setResult(prunedMatchings);
}
edgeLinkTable[(linkerID-1)*synthonLib.getSynthons().size()+synthonID][2*i+cutDirIndex] = result;
}
}
}
}
getHits(hits);
hits = hits.entrySet().stream().sorted(Map.Entry.comparingByValue().reversed()).collect(Collectors.toMap(Map.Entry::getKey,Map.Entry::getValue,
(e1, e2) -> e1, LinkedHashMap::new));
return hits;
}
private FragmentMatching matchFragmentSubtree(PharmTreeSynthon synthon, Map treeToLinkerHead,
int synthonID, int linkerID, int queryCutDir, int queryTreeHeadNode, int queryCutEdge, List querySubTreeEdgeIndeces,
List querySubTreeEdgeParentIndeces){
int fragTreeHeadNode = treeToLinkerHead.get(synthon)[0];
List fragmentTreeEdgeIndeces = new ArrayList();
List fragmentTreeEdgeParentIndeces = new ArrayList();
PharmacophoreTree fragmentTree = synthon.getPharmTree();
fragTreeHeadNode = processFragmentTree(fragTreeHeadNode,fragmentTree,fragmentTreeEdgeIndeces,fragmentTreeEdgeParentIndeces);
FragmentMatchSearch matchSearch;
//match to source side of cut query tree
matchSearch = new FragmentMatchSearch(this, queryTree, synthon, synthonID, linkerID, queryTreeHeadNode, fragTreeHeadNode, queryCutEdge,
treeToLinkerHead.get(synthon)[1],queryCutDir,querySubTreeEdgeIndeces, fragmentTreeEdgeIndeces,
querySubTreeEdgeParentIndeces, fragmentTreeEdgeParentIndeces);
return matchSearch.matchSearch();
}
public void getHits(LinkedHashMap hits) {
final double buffer = 0.05;
for(int i=0;i> solutions = constructSolutions(result);
int linkerID = (i/synthonLib.getSynthons().size()) + 1;
int synthonID = i%synthonLib.getSynthons().size();
// find compatible matches
int compatibleCutDirIndex = cutDirIndex == 0 ? 1 : 0;
for(int k=0;k> compatibleSolutions = constructSolutions(compatibleResult);
for(List solution1 : solutions) {
double bestScore = 0.0;
for(List solution2 : compatibleSolutions) {
List combinedSolution = new ArrayList();
combinedSolution.addAll(solution1);
combinedSolution.addAll(solution2);
double sim = getTotalSimilarity(combinedSolution);
if(sim<(pTreeSimilarityCutoff-buffer))
break;
else {
List reactants = combinedSolution.stream().map(r -> r.synthon.getStructure()).collect(Collectors.toList());
if(reactants.size()!=synthonLib.getSynthons().size())
continue;
StereoMolecule product = SynthonReactor.react(reactants);
boolean accept = true;
if(accept) {
StringBuilder resultString = new StringBuilder();
resultString.append(product.getIDCode());
resultString.append("____");
combinedSolution.stream().forEach(r -> {
resultString.append(r.synthon.getId());
resultString.append("____");
});
String rs = resultString.toString();
if(hits.containsKey(rs)) {
double oldSim = hits.get(rs);
if(oldSimbestScore) {
bestScore = sim;
}
}
}
}
}
}
}
}
}
private List> constructSolutions(SearchResult result) {
List> allSolutions = new ArrayList>();
List matchings = result.getResults();
for(FragmentMatching fm : matchings) {
List> solutionSet = new ArrayList>();
List solution = new ArrayList();
solution.add(fm);
solutionSet.add(solution);
if(fm.getFurtherMatches()!=null) {
Map> furtherMatches = fm.furtherMatchings;
for(int key : furtherMatches.keySet()) {
List> toBeDeleted = new ArrayList>();
List> toBeAdded = new ArrayList>();
for(List oneSolution : solutionSet) {
toBeDeleted.add(oneSolution);
for(FragmentMatching ffm : furtherMatches.get(key)) {
List newSolution = new ArrayList(oneSolution);
newSolution.add(ffm);
toBeAdded.add(newSolution);
}
}
solutionSet.removeAll(toBeDeleted);
solutionSet.addAll(toBeAdded);
}
}
allSolutions.addAll(solutionSet);
}
return allSolutions;
}
/**
*
* @param headNode
* @param treeEdges
* @param treeEdgeParents
*/
private int processFragmentTree(int linkerNode, PharmacophoreTree fragmentTree, List treeEdges,List treeEdgeParents) {
int cutEdge = -1;
int headNode = -1;
for(int e=0;e matchCompatibleFragments(PharmacophoreTree queryTree, int querySubTreeHeadNode,
int querySubtreeCutEdge, int querySubtreeCutEdgeDir,int linkerID, int fragmentID) {
List querySubTreeEdgeIndeces = new ArrayList();
List querySubTreeEdgeParentIndeces = new ArrayList();
queryTree.treeWalkBFS(querySubTreeHeadNode, querySubtreeCutEdge, querySubTreeEdgeIndeces, querySubTreeEdgeParentIndeces);
List allCompatibleMatchings = new ArrayList();
int cutDirIndex = querySubtreeCutEdgeDir == cuts[0] ? 0: 1;
for(int i=0;i matchings = new ArrayList();
if(fragmentID==i) //fragments with same ID are not compatible
continue;
int index = (i< compatibleTrees = linkerToSynthons.get(index);
if(compatibleTrees==null)
continue;
SearchResult result = edgeLinkTable[(linkerID-1)*synthonLib.getSynthons().size()+i][2*querySubtreeCutEdge+cutDirIndex];
if(result!=null)
matchings = result.getResults();
else {
for(PharmTreeSynthon synthon : compatibleTrees.keySet()) {
PharmacophoreTree fragTree = synthon.getPharmTree();
List fragmentTreeEdgeIndeces = new ArrayList();
List fragmentTreeEdgeParentIndeces = new ArrayList();
int[] res = compatibleTrees.get(synthon);
int fragmentHeadLinkerNode = res[0];
int fragmentLinkerEdge = res[1];
int[] linkerEdge = fragTree.getEdges().get(fragmentLinkerEdge);
int fragmentHeadNode = linkerEdge[0] == fragmentHeadLinkerNode ? linkerEdge[1] : linkerEdge [0];
fragTree.treeWalkBFS(fragmentHeadNode, fragmentLinkerEdge,
fragmentTreeEdgeIndeces, fragmentTreeEdgeParentIndeces);
FragmentMatchSearch fms = new FragmentMatchSearch(this,queryTree, synthon,i, linkerID, querySubTreeHeadNode,fragmentHeadNode,
querySubtreeCutEdge, fragmentLinkerEdge,querySubtreeCutEdgeDir,querySubTreeEdgeIndeces,fragmentTreeEdgeIndeces,
querySubTreeEdgeParentIndeces,fragmentTreeEdgeParentIndeces);
FragmentMatching matching = fms.matchSearch();
matching.calculate();
matchings.add(matching);
}
matchings.sort((e1,e2) -> {
return Double.compare(e2.sim, e1.sim);}); // reverse order;
if(matchings.size()> nBestSolutions)
matchings = matchings.subList(0,nBestSolutions);
SearchResult sr = new SearchResult();
sr.setResult(matchings);
edgeLinkTable[(linkerID-1)*synthonLib.getSynthons().size()+i][2*querySubtreeCutEdge+cutDirIndex] = sr;
}
allCompatibleMatchings.addAll(matchings);
}
return allCompatibleMatchings;
}
private double getTotalSimilarity(List matchings) { //calculate total similarity from a list of matchings
double sim = 0.0;
double size1 = 0.0;
double size2 = 0.0;
for(FragmentMatching matching : matchings) {
for(FeatureMatch match : matching.getTreeMatching().getMatches()) {
double[] sizes = match.getSizes();
double s = match.getSim();
sim +=(sizes[0]+sizes[1])*s;
size1+=sizes[0];
size2+=sizes[1];
}
}
return 0.5*sim/
((TreeMatcher.NULL_MATCH_SCALING*Math.max(size1, size2)+(1.0-TreeMatcher.NULL_MATCH_SCALING)*Math.min(size1, size2)));
}
public static class FragmentMatchSearch {
public static final int EXTENSION_MATCHES = 3; //number of explicitly considered extension matches at every recursion step
public static final double ALPHA = 0.8; //weighting of source-tree match vs extension-tree match, takes values from 0 to 1
public static final double NULL_MATCH_SCALING = 0.3;
public static final double SIMILARITY_SCALING_SPLIT_SCORE = 0.6;
public static final double MATCH_BALANCE = 2.0; //named beta in the original publication
public static final double MATCH_SIZE_LIMIT = 3.0;
public static final int MATCH_NODE_NR_LIMIT = 2;
public static final int EXTENSION_MATCH_NODE_NR_LIMIT = 3;
private PharmacophoreTree queryTree;
private PharmacophoreTree fragmentTree;
private PharmTreeSynthon synthon;
private int fragmentTreeSynthonID;
private int fragmentTreeLinkerID;
private int queryTreeHeadNode;
private int cutEdgeQueryTree;
private int cutEdgeFragmentTree;
private int cutDirQueryTree;
private int cutDirFragmentTree;
private List querySubTreeEdgeIndeces;
private List querySubTreeEdgeParentIndeces;
private int fragTreeHeadNode;
private List fragTreeEdgeIndeces;
private List fragTreeEdgeParentIndeces;
private TreeMatching[][] dpMatchMatrix;
private FragmentPTreeSearch pTreeSearch;
private List queryNodes;
private List fragmentNodes;
public FragmentMatchSearch(FragmentPTreeSearch pTreeSearch, PharmacophoreTree queryTree, PharmTreeSynthon synthon, int fragmentTreeSynthonID, int fragmentTreeLinkerID, int queryTreeHeadNode, int fragTreeHeadNode, int cutEdgeQueryTree,
int cutEdgeFragmentTree, int cutDirQueryTree, List querySubTreeEdgeIndeces, List fragTreeEdgeIndeces,
List querySubTreeEdgeParentIndeces, List fragTreeEdgeParentIndeces) {
this.queryTree = queryTree;
this.synthon = synthon;
this.queryTreeHeadNode = queryTreeHeadNode;
this.cutEdgeQueryTree = cutEdgeQueryTree;
this.cutEdgeFragmentTree = cutEdgeFragmentTree;
this.cutDirQueryTree = cutDirQueryTree;
this.querySubTreeEdgeIndeces = querySubTreeEdgeIndeces;
this.querySubTreeEdgeParentIndeces = querySubTreeEdgeParentIndeces;
this.fragTreeHeadNode = fragTreeHeadNode;
this.fragTreeEdgeIndeces = fragTreeEdgeIndeces;
this.fragTreeEdgeParentIndeces = fragTreeEdgeParentIndeces ;
this.fragmentTreeSynthonID = fragmentTreeSynthonID;
this.fragmentTreeLinkerID = fragmentTreeLinkerID;
this.pTreeSearch = pTreeSearch;
this.queryNodes = queryTree.getNodes();
this.fragmentTree = synthon.getPharmTree();
this.fragmentNodes = fragmentTree.getNodes();
if(fragmentTree.getEdges().get(cutEdgeFragmentTree)[1] == fragTreeHeadNode)
cutDirFragmentTree = PharmacophoreTree.CUT_RIGHT;
else
cutDirFragmentTree = PharmacophoreTree.CUT_LEFT;
dpMatchMatrix = new TreeMatching[2*queryTree.getEdges().size()][2*fragmentTree.getEdges().size()];
}
public FragmentMatching matchSearch() {
return recMatchSearch(queryTreeHeadNode,fragTreeHeadNode,cutEdgeQueryTree,cutEdgeFragmentTree,cutDirQueryTree,cutDirFragmentTree,
querySubTreeEdgeIndeces,fragTreeEdgeIndeces,querySubTreeEdgeParentIndeces,fragTreeEdgeParentIndeces);
}
/**
* /recursive part of match-search algorithm, as described in:
* https://doi.org/10.1023/A:1008068904628
* The algorithm uses a dynamic-programming approach, whereby the results of matching subtrees are stored in a matrix
* and can be reused for increased performance
* @param headNode1
* @param headNode2
* @param cutEdge1
* @param cutEdge2
* @param cutDir1
* @param cutDir2
* @param subTreeEdgeIndeces1
* @param subTreeEdgeIndeces2
* @param subTreeEdgeParentIndeces1
* @param subTreeEdgeParentIndeces2
* @return
*/
private FragmentMatching recMatchSearch(int headNode1, int headNode2, int cutEdge1, int cutEdge2, int cutDir1, int cutDir2,List subTreeEdgeIndeces1,
List subTreeEdgeIndeces2,List subTreeEdgeParentIndeces1, List subTreeEdgeParentIndeces2) {
TreeMatching treeMatching = new TreeMatching();
FragmentMatching fragmentMatching = new FragmentMatching(synthon, fragmentTreeSynthonID, fragmentTreeLinkerID );
int index1 = cutDir1 == PharmacophoreTree.CUT_LEFT ? cutEdge1*2 : cutEdge1*2+1;
int index2 = cutDir2 == PharmacophoreTree.CUT_LEFT ? cutEdge2*2 : cutEdge2*2+1;
Set nodes1 = queryTree.getNodesFromEdges(subTreeEdgeIndeces1);
nodes1.add(headNode1);
Set nodes2 = fragmentTree.getNodesFromEdges(subTreeEdgeIndeces2);
nodes2.add(headNode2);
if(fragmentTree.getNodes().get(headNode2).isLinkNode() && nodes2.size()==1) { //node is link node -> needs a new call for a match search
// look for fragments that have compatible links!
int linkerID = fragmentTree.getNodes().get(headNode2).getFunctionalities()[0];
List matchings = pTreeSearch.matchCompatibleFragments(queryTree, headNode1,
cutEdge1, cutDir1,linkerID,fragmentTreeSynthonID);
fragmentMatching.setTreeMatching(treeMatching);
fragmentMatching.addFurtherMatches(linkerID, matchings);
}
else {
if(dpMatchMatrix[index1][index2]!= null) {
//result found in dynamic-programing matrix
treeMatching = dpMatchMatrix[index1][index2];
fragmentMatching.setTreeMatching(treeMatching);
}
else {
//check if match fulfills criteria, if not, create extension match
List matches = assessMatch(nodes1,nodes2);
if(matches!=null) {
treeMatching = new TreeMatching();
for(FeatureMatch fmatch : matches)
treeMatching.addFeatureMatch(fmatch);
treeMatching.calculate();
dpMatchMatrix[index1][index2] = treeMatching;
fragmentMatching.setTreeMatching(treeMatching);
}
else { // create extension match
List cuts1 = queryTree.getExtensionCuts(subTreeEdgeIndeces1,subTreeEdgeParentIndeces1);
List cuts2 = fragmentTree.getExtensionCuts(subTreeEdgeIndeces2,subTreeEdgeParentIndeces2);
double[][] scores = new double[cuts1.size()][cuts2.size()];
for(int i=0;i extensionNodes1 = new HashSet();
Set sourceNodes1 = new HashSet();
queryTree.enumerateExtensionCutFast(headNode1,cut1, subTreeEdgeIndeces1,
extensionNodes1, sourceNodes1);
for(int j=0;j extensionNodes2 = new HashSet();
Set sourceNodes2 = new HashSet();
fragmentTree.enumerateExtensionCutFast(headNode2,cut2, subTreeEdgeIndeces2,
extensionNodes2, sourceNodes2);
scores[i][j] = scoreExtensionMatch(queryTree,fragmentTree,extensionNodes1, extensionNodes2,
sourceNodes1, sourceNodes2);
}
}
int[][] bestCuts = new int[cuts1.size()*cuts2.size()][2];
double[] bestScores = new double[cuts1.size()*cuts2.size()];
TreeUtils.retrieveHighestValuesFrom2DArray(scores, bestScores, bestCuts);
double bestScore = -Double.MAX_VALUE;
FragmentMatching bestMatching = null;
int counter = 0;
// fully enumerate best extension cuts and find the one with the best match
for(int[] cut:bestCuts) {
if(counter>EXTENSION_MATCHES)
break;
if(cut[0]==-1 || cut[1]==-1)
continue;
int[] cut1 = cuts1.get(cut[0]);
int[] cut2 = cuts2.get(cut[1]);
List> sourceTreeEdgeIndeces1 = new ArrayList>();
List> sourceTreeEdgeParentIndeces1 = new ArrayList>();
List sourceTreeHeadNodes1 = new ArrayList();
Set extensionNodes1 = new HashSet();
List cutEdges1 = new ArrayList();
List cutDirs1 = new ArrayList();
queryTree.enumerateExtensionCutFull(headNode1,cut1, subTreeEdgeIndeces1,
subTreeEdgeParentIndeces1, sourceTreeEdgeIndeces1,sourceTreeEdgeParentIndeces1,
sourceTreeHeadNodes1,extensionNodes1, cutEdges1, cutDirs1);
List> sourceTreeEdgeIndeces2 = new ArrayList>();
List> sourceTreeEdgeParentIndeces2 = new ArrayList>();
List sourceTreeHeadNodes2 = new ArrayList();
Set extensionNodes2 = new HashSet();
List cutEdges2 = new ArrayList();
List cutDirs2 = new ArrayList();
fragmentTree.enumerateExtensionCutFull(headNode2,cut2, subTreeEdgeIndeces2,
subTreeEdgeParentIndeces2, sourceTreeEdgeIndeces2,sourceTreeEdgeParentIndeces2,
sourceTreeHeadNodes2,extensionNodes2, cutEdges2, cutDirs2);
FeatureMatch extensionMatch = assessExtensionMatch(extensionNodes1,extensionNodes2);
if(extensionMatch==null)
continue;
counter++;
FragmentMatching[][] sourceTreeMatches = new FragmentMatching[sourceTreeHeadNodes1.size()][sourceTreeHeadNodes2.size()];
double[][] sourceTreeScores = new double[sourceTreeHeadNodes1.size()][sourceTreeHeadNodes2.size()];
for(int i=0;i 0 && sourceTreeScores[0].length>0) {
if (sourceTreeScores.length > sourceTreeScores[0].length)
{ //Cols must be >= Rows.
sourceTreeScores = HungarianAlgorithm.transpose(sourceTreeScores);
transpose = true;
}
if(sourceTreeScores.length>0 && sourceTreeScores[0].length>0);
assignment = HungarianAlgorithm.hgAlgorithm(sourceTreeScores, "max");
if(transpose) {
sourceTreeScores = HungarianAlgorithm.transpose(sourceTreeScores);
for(int a=0;a matchedSourceTrees1 = new HashSet();
Set matchedSourceTrees2 = new HashSet();
FragmentMatching extensionMatching = new FragmentMatching(synthon,fragmentTreeSynthonID,fragmentTreeLinkerID);
TreeMatching extensionTreeMatching = new TreeMatching();
extensionMatching.setTreeMatching(extensionTreeMatching);
extensionTreeMatching.addFeatureMatch(extensionMatch);
for(int i=0;i());
extensionTreeMatching.addFeatureMatch(nullMatch);
}
}
for(int j=0;j(),
sourceTreeHeadNodes2.get(j),sourceTreeEdgeIndeces2.get(j));
extensionTreeMatching.addFeatureMatch(nullMatch);
}
}
extensionTreeMatching.calculate();
extensionMatching.calculate();
double extensionScore = extensionMatching.sim;
if(extensionScore>=bestScore) {
bestScore = extensionScore;
bestMatching = extensionMatching;
}
}
fragmentMatching = bestMatching;
}
}
}
fragmentMatching.calculate();
return fragmentMatching;
}
/**
* accept match if:
* match is a nullMatch
* least one of the subtrees has a size of less than 3 atoms AND trees are balanced or
* at least one of the subtrees contains only one node AND trees are balanced
* if the number of nodes and size criterion is fulfilled, but the trees are not balanced:
* null-matches are formed!
* @return
*/
private List assessMatch(Set nodes1,
Set nodes2) {
List matches = null;
double size1 = getSizeOfNodeSet(nodes1,queryTree);
double size2 = getSizeOfNodeSet(nodes2,fragmentTree);
boolean balanced = isMatchBalanced(size1,size2);
boolean containsLinkNodes = false; //if one of the two collections contain a link nodes, they cannot be matched directly
for(int n : nodes2) {
if(fragmentTree.getNodes().get(n).isLinkNode()) {
containsLinkNodes = true;
break;
}
}
if(!containsLinkNodes) {
if ((size1();
matches.add(getMatch(nodes1, nodes2));
}
else {
matches = new ArrayList();
matches.add(getMatch(nodes1,new HashSet()));
matches.add(getMatch(new HashSet(),nodes2));
}
}
}
return matches;
}
private FeatureMatch assessExtensionMatch( Set nodes1,
Set nodes2) {
FeatureMatch match = null;
double size1, size2;
size1 = getSizeOfNodeSet(nodes1,queryTree);
size2 = getSizeOfNodeSet(nodes2,fragmentTree);
boolean containsLinkNodes = false; //if one of the two collections contain a link nodes, they cannot be matched directly
for(int n : nodes2) {
if(fragmentTree.getNodes().get(n).isLinkNode()) {
containsLinkNodes = true;
break;
}
}
if(!containsLinkNodes) {
if(nodes1.size()!=0 && nodes2.size()!=0) {
if ((size1 subTreeEdgeIndeces1,
int headNode2,List subTreeEdgeIndeces2) {
FeatureMatch m = null;
int[][] match = new int[2][];
if(headNode1==-1) {
Set nodes2 = fragmentTree.getNodesFromEdges(subTreeEdgeIndeces2);
nodes2.add(headNode2);
match[0] = new int[0];
match[1] = nodes2.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
}
else if(headNode2==-1) {
Set nodes1 = queryTree.getNodesFromEdges(subTreeEdgeIndeces1);
nodes1.add(headNode1);
match[1] = new int[0];
match[0] = nodes1.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
}
else {
Set nodes1 = queryTree.getNodesFromEdges(subTreeEdgeIndeces1);
nodes1.add(headNode1);
Set nodes2 = fragmentTree.getNodesFromEdges(subTreeEdgeIndeces2);
nodes2.add(headNode2);
match[0] = nodes1.stream().mapToInt(x -> x).toArray();
match[1] = nodes2.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
}
return m;
}
private FeatureMatch getMatch(Set nodes1,Set nodes2) {
FeatureMatch m = null;
int[][] match = new int[2][];
match[0] = nodes1.stream().mapToInt(x -> x).toArray();
match[1] = nodes2.stream().mapToInt(x -> x).toArray();
m = new FeatureMatch(match);
m.calculate(queryNodes,fragmentNodes);
return m;
}
private double scoreExtensionMatch(PharmacophoreTree pTree1, PharmacophoreTree pTree2, Set extensionNodes1,
Set extensionNodes2, Set sourceNodes1, Set sourceNodes2) {
double extensionScore = 0.0;
double sourceScore = 0.0;
extensionScore = PharmacophoreNode.getSimilarity(extensionNodes1, extensionNodes2, queryNodes, fragmentNodes);
sourceScore = PharmacophoreNode.getSimilarity(sourceNodes1, sourceNodes2, queryNodes, fragmentNodes);
return ALPHA*extensionScore+(1-ALPHA)*sourceScore;
}
private static boolean isMatchBalanced(double size1,double size2) {
boolean isBalanced = true;
double ratio = size1/size2;
if(ratio > MATCH_BALANCE || ratio < 1.0/MATCH_BALANCE)
isBalanced = false;
return isBalanced;
}
public static double getSizeOfNodeSet(Set nodes, PharmacophoreTree pTree) {
double size = 0;
List n = pTree.getNodes(nodes);
for(PharmacophoreNode node : n)
size += node.getSize();
return size;
}
public static int[] getFunctionalitiesOfNodeSet(Set nodes, PharmacophoreTree pTree) {
int[] functionalities = new int[PharmacophoreNode.FUNCTIONALITY_WEIGHTS.length];
List n = pTree.getNodes(nodes);
for(PharmacophoreNode node : n) {
int[] functionalities2 = node.getFunctionalities();
for(int i=0;i> furtherMatchings;
private double sim;
private double size1;
private double size2;
public FragmentMatching(PharmTreeSynthon synthon, int synthonID, int linkerID) {
this.synthon = synthon;
this.fragmentSynthonID = synthonID;
this.fragmentLinkerID = linkerID;
furtherMatchings = new HashMap>();
}
public void addFragmentMatch(FragmentMatching fragmentMatching) {
if(fragmentMatching.treeMatching != null)
treeMatching.addMatching(fragmentMatching.treeMatching);
if(fragmentMatching.furtherMatchings!=null) {
fragmentMatching.furtherMatchings.forEach((key,value) -> furtherMatchings.merge(key, value, (v1,v2) -> {
v1.addAll(v2);
return v1;}));
}
}
public void setTreeMatching(TreeMatching treeMatching) {
this.treeMatching = treeMatching;
}
public TreeMatching getTreeMatching() {
return treeMatching;
}
public void addFurtherMatch(int linkerID, FragmentMatching fragmentMatching) {
furtherMatchings.putIfAbsent(linkerID, new ArrayList());
furtherMatchings.get(linkerID).add(fragmentMatching);
}
public void addFurtherMatches(int linkerID, List fragmentMatchings) {
furtherMatchings.putIfAbsent(linkerID, new ArrayList());
furtherMatchings.get(linkerID).addAll(fragmentMatchings);
}
public Map> getFurtherMatches() {
return furtherMatchings;
}
public int getFragmentLinkerID() {
return fragmentLinkerID;
}
public int getFragmentSynthonID() {
return fragmentSynthonID;
}
public PharmTreeSynthon getFragmentPTree() {
return this.synthon;
}
public void calculate() {
if(furtherMatchings.keySet().size()==0) {
sim = treeMatching.getSim();
size1 = treeMatching.getSize1();
size2 = treeMatching.getSize2();
}
else {
sim = 0.0;
size1 = 0.0;
size2 = 0.0;
for(FeatureMatch match : treeMatching.getMatches()) {
double[] sizes = match.getSizes();
double s = match.getSim();
sim +=(sizes[0]+sizes[1])*s;
size1+=sizes[0];
size2+=sizes[1];
}
for(int linkerID : furtherMatchings.keySet()) {
if(furtherMatchings.get(linkerID).size()==0)
continue;
FragmentMatching furtherMatch = furtherMatchings.get(linkerID).get(0); //calculate from highest scored additional match
for(FeatureMatch match : furtherMatch.treeMatching.getMatches()) {
double[] sizes = match.getSizes();
double s = match.getSim();
sim +=(sizes[0]+sizes[1])*s;
size1+=sizes[0];
size2+=sizes[1];
}
}
sim = 0.5*sim/
((TreeMatcher.NULL_MATCH_SCALING*Math.max(size1, size2)+(1.0-TreeMatcher.NULL_MATCH_SCALING)*Math.min(size1, size2)));
if(size1==0 && size2==0)
sim = 0.0;
}
}
}
public static class SearchResult {
List results;
private SearchResult() {
results = new ArrayList();
}
public List getResults() {
return results;
}
public void setResult(List results) {
this.results = results;
}
public FragmentMatching getResult(int index) {
return results.get(index);
}
public void addResult(List results) {
this.results.addAll(results);
this.results.sort((c1,c2) -> {
return Double.compare(c2.sim,c1.sim);} //reverse order
);
}
public void addResult(FragmentMatching result) {
results.add(result);
results.sort((c1,c2) -> {
return Double.compare(c2.sim,c1.sim);} //reverse order
);
}
}
}