All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xmlcml.cml.tools.AtomMatcher Maven / Gradle / Ivy

/**
 *    Copyright 2011 Peter Murray-Rust et. al.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package org.xmlcml.cml.tools;

import java.util.ArrayList;
import java.util.List;

import nu.xom.Elements;

import org.apache.log4j.Logger;
import org.xmlcml.cml.base.AbstractTool;
import org.xmlcml.cml.base.CMLConstants;
import org.xmlcml.cml.base.CMLElement;
import org.xmlcml.cml.element.CMLAtom;
import org.xmlcml.cml.element.CMLAtomSet;
import org.xmlcml.cml.element.CMLLink;
import org.xmlcml.cml.element.CMLMap;
import org.xmlcml.cml.element.CMLMolecule;
import org.xmlcml.cml.element.CMLProductList;
import org.xmlcml.cml.element.CMLReactantList;
import org.xmlcml.cml.element.CMLReaction;
import org.xmlcml.cml.element.CMLMap.Direction;
import org.xmlcml.cml.element.CMLReaction.Component;
import org.xmlcml.euclid.IntMatrix;
import org.xmlcml.euclid.Real2;
import org.xmlcml.euclid.Util;

/**
 * manages information for matching atoms.
 * 
 * excludeElementTypes is list of elementTypes (CML) to exclude (empty => none)
 * includeElementTypes is list of elementTypes (CML) to include (empty => all)
 * 
 * include takes precedence over exclude useCharge determines whether formal
 * atom charge is used useLabel determines whether atom label is used
 * maximumAtomTreeLevel maximum level to explore trees to atomMatchStrategy
 * MapTool.MATCH_MORGAN, etc. atomSetExpansionStrategy MapTool.MATCH_GEOM, etc.
 */
public abstract class AtomMatcher extends AbstractTool {

	static Logger LOG = Logger.getLogger(AtomMatcher.class);

	/** */
	public enum Strategy {

		/**
		 * geometry is to be matched deprecated in favour of more precise
		 * methods
		 * 
		 */
		MATCH_GEOM("match geometry"),

		/**
		 * distance matrix should be used
		 * 
		 */
		MATCH_DISTANCE_MATRIX("match via distance matrix"),

		/**
		 * minimum total distance should be used
		 * 
		 */
		MATCH_TOTAL_DISTANCE("match via minimum total distance"),

		/**
		 * atomicSymbol indicating that Morgan algorithm is to be used for
		 * matching
		 * 
		 */
		MATCH_MORGAN("match Morgan"),

		/**
		 * existing labels from Morgan algorithm are to be used for matching
		 * 
		 */
		MATCH_MORGAN_LABEL("match Morgan label"),

		/**
		 * atomicSymbol indicating that atomTreeLabelling is to be used for
		 * matching
		 * 
		 */
		MATCH_ATOM_TREE_LABEL("match atomTreeLabel"),

		/**
		 * mapping from overlap of 2D coordinates.
		 * 
		 */
		FROM_2DOVERLAP("from 2D overlap"),

		/**
		 * mapping from overlap of 2D coordinates.
		 * 
		 */
		FROM_UNIQUE_MATCHED_ATOMS("from unique matched atoms"),

		/**
		 * mapping from overlap of 2D coordinates.
		 * 
		 */
		FROM_DISTANCE_MATRIX_OVERLAP("from distance matrix overlap"),

		// mapping
		/** */
		FROM_OVERLAP_LIST("FROM OVERLAL LIST"),
		/** */
		DIFFERENT_SIZES("ATOM SETS OF DIFFERENT SIZES"),
		/** */
		REMAINING2DFIT("REMAINING 2D FIT"),
		/** */
		ALLATOM2DFIT("ALLATOM 2D FIT"),
		/** */
		INCOMPLETE_MAPPING("INCOMPLETE MAPPING");

		String value;

		private Strategy(String s) {
			this.value = s;
		}

		/**
		 * to string.
		 * 
		 * @return string
		 */
		public String toString() {
			return value;
		}
	}

	/**
	 * maximum level for recursion (to avoid infinite regress).
	 */
	public final static int MAX_ATOM_TREE_LEVEL = 10;
	public final static String[] DEFAULT_INCLUDE_ELEMENT_TYPES = {};
	public final static String[] DEFAULT_EXCLUDE_ELEMENT_TYPES = {};
	public final static String[] DEFAULT_INCLUDE_LIGAND_ELEMENT_TYPES = {};
	public final static String[] DEFAULT_EXCLUDE_LIGAND_ELEMENT_TYPES = {};
	public final static boolean DEFAULT_USE_CHARGE = false;
	public final static boolean DEFAULT_USE_LABEL = false;
	public final static int DEFAULT_MAXIMUM_ATOM_TREE_LEVEL = MAX_ATOM_TREE_LEVEL;

	/**
	 * user-specified limit to apply to all atoms.
	 * 
	 * if
	 */
	public final static int DEFAULT_ATOM_TREE_LEVEL = -1;
	public final static AtomMatcher.Strategy DEFAULT_ATOM_MATCH_STRATEGY = AtomMatcher.Strategy.MATCH_ATOM_TREE_LABEL;
	public final static AtomMatcher.Strategy DEFAULT_ATOM_SET_EXPANSION = AtomMatcher.Strategy.MATCH_GEOM;
	
	protected AtomMatchObject atomMatchObject = new AtomMatchObject();
	public AtomMatchObject getAtomMatchObject() {
		return atomMatchObject;
	}

	public void setAtomMatchObject(AtomMatchObject atomMatchObject) {
		this.atomMatchObject = atomMatchObject;
	}

	/**
	 * the level to explore all atoms uniformly. if < 0 the explores until
	 * uniquification or maximumAtomTreeLevel reached
	 */
//	protected int atomTreeLevel;
//	protected AtomMatcher.Strategy atomMatchStrategy;
//	protected AtomMatcher.Strategy atomSetExpansionStrategy;

	// this is to overlap 2D atoms within a tolerance
	final static int RESOLUTION = 2;

	/**
	 * constructor.
	 */
	public AtomMatcher() {
		init();
	}

	void init() {
		atomMatchObject.setIncludeElementTypes(DEFAULT_INCLUDE_ELEMENT_TYPES);
		atomMatchObject.setExcludeElementTypes(DEFAULT_EXCLUDE_ELEMENT_TYPES);
		atomMatchObject.setIncludeLigandElementTypes(DEFAULT_INCLUDE_LIGAND_ELEMENT_TYPES);
		atomMatchObject.setExcludeLigandElementTypes(DEFAULT_EXCLUDE_LIGAND_ELEMENT_TYPES);
		atomMatchObject.setUseCharge(DEFAULT_USE_CHARGE);
		atomMatchObject.setUseLabel(DEFAULT_USE_LABEL);
		atomMatchObject.setMaximumAtomTreeLevel(DEFAULT_MAXIMUM_ATOM_TREE_LEVEL);
		atomMatchObject.setAtomTreeLevel(DEFAULT_ATOM_TREE_LEVEL);
		atomMatchObject.setAtomMatchStrategy(DEFAULT_ATOM_MATCH_STRATEGY);
		atomMatchObject.setAtomSetExpansionStrategy(DEFAULT_ATOM_SET_EXPANSION);
	}

	/**
	 * get the included elementTypes.
	 * 
	 * @return Returns the elementTypes to include.
	 */
	public String[] getIncludeElementTypes() {
		return atomMatchObject.getIncludeElementTypes();
	}

	/**
	 * set the included elementTypes. resets excludeElement types to empty list
	 * 
	 * @param elementTypes
	 *            to include.
	 */
	public void setIncludeElementTypes(String[] elementTypes) {
		atomMatchObject.setIncludeElementTypes(elementTypes);
		atomMatchObject.setExcludeElementTypes(new String[] {});
	}

	/**
	 * get the excluded elementTypes.
	 * 
	 * @return Returns the elementTypes to exclude.
	 */
	public String[] getExcludeElementTypes() {
		return atomMatchObject.getExcludeElementTypes();
	}

	/**
	 * set the excluded elementTypes. resets includeElement types to empty list
	 * 
	 * @param elementTypes
	 *            to exclude.
	 */
	public void setExcludeElementTypes(String[] elementTypes) {
		atomMatchObject.setExcludeElementTypes(elementTypes);
		atomMatchObject.setIncludeElementTypes(new String[] {});
	}

	/**
	 * get the included ligand elementTypes.
	 * 
	 * @return Returns the elementTypes to include.
	 */
	public String[] getIncludeLigandElementTypes() {
		return atomMatchObject.getIncludeLigandElementTypes();
	}

	/**
	 * set the included ligand elementTypes. resets excludeLigandElementTypes to
	 * empty list
	 * 
	 * @param elementTypes
	 *            to include.
	 */
	public void setIncludeLigandElementTypes(String[] elementTypes) {
		atomMatchObject.setIncludeLigandElementTypes(elementTypes);
		atomMatchObject.setExcludeLigandElementTypes(new String[] {});
	}

	/**
	 * get the excluded ligand elementTypes.
	 * 
	 * @return Returns the elementTypes to exclude.
	 */
	public String[] getExcludeLigandElementTypes() {
		return atomMatchObject.getExcludeLigandElementTypes();
	}

	/**
	 * set the excluded ligand elementTypes. set includedLigand element types to
	 * empty list.
	 * 
	 * @param elementTypes
	 *            to exclude.
	 */
	public void setExcludeLigandElementTypes(String[] elementTypes) {
		atomMatchObject.setExcludeLigandElementTypes(elementTypes);
		atomMatchObject.setIncludeLigandElementTypes(new String[] {});
	}

	/**
	 * @return Returns the atomTreeLevel.
	 */
	public int getAtomTreeLevel() {
		return atomMatchObject.getAtomTreeLevel();
	}

	/**
	 * @param atomTreeLevel
	 *            the maximum level to explore trees to
	 */
	public void setAtomTreeLevel(int atomTreeLevel) {
		atomMatchObject.setAtomTreeLevel(atomTreeLevel);
	}

	/**
	 * @return Returns the maximumAtomTreeLevel.
	 */
	public int getMaximumAtomTreeLevel() {
		return atomMatchObject.getMaximumAtomTreeLevel();
	}

	/**
	 * @param maximumAtomTreeLevel
	 *            the maximum level to explore trees to
	 */
	public void setMaximumAtomTreeLevel(int maximumAtomTreeLevel) {
		atomMatchObject.setMaximumAtomTreeLevel(maximumAtomTreeLevel);
	}

	/**
	 * @return is charge to be used
	 */
	public boolean isUseCharge() {
		return atomMatchObject.isUseCharge();
	}

	/**
	 * @param useCharge
	 *            is charge to be used
	 */
	public void setUseCharge(boolean useCharge) {
		atomMatchObject.setUseCharge(useCharge);
	}

	/**
	 * @return is label to be used
	 */
	public boolean isUseLabel() {
		return atomMatchObject.isUseLabel();
	}

	/**
	 * @param useLabel
	 *            is label to be used.
	 */
	public void setUseLabel(boolean useLabel) {
		atomMatchObject.setUseLabel(useLabel);
	}

	/**
	 * @return Returns the atomMatchStrategy.
	 */
	public AtomMatcher.Strategy getAtomMatchStrategy() {
		return atomMatchObject.getAtomMatchStrategy();
	}

	/**
	 * @param strategy
	 *            The atomMatchStrategy to set.
	 */
	public void setAtomMatchStrategy(AtomMatcher.Strategy strategy) {
		atomMatchObject.setAtomMatchStrategy(strategy);
	}

	/**
	 * @return Returns the atomSetExpansionStrategy.
	 */
	public AtomMatcher.Strategy getAtomSetExpansionStrategy() {
		return atomMatchObject.getAtomSetExpansionStrategy();
	}

	/**
	 * @param strategy
	 *            The atomSetExpansionStrategy to set.
	 */
	public void setAtomSetExpansionStrategy(AtomMatcher.Strategy strategy) {
		atomMatchObject.setAtomSetExpansionStrategy(strategy);
	}

	/**
	 * skip atom. uses previously set include and exclude atom criteria
	 * 
	 * @param atom
	 * @return true
	 */
	public boolean skipAtom(CMLAtom atom) {
		return skipAtom(atom, 
				atomMatchObject.getIncludeElementTypes(), 
				atomMatchObject.getExcludeElementTypes());
	}

	/**
	 * skip ligand atom. uses previously set include and exclude atom criteria
	 * 
	 * @param atom
	 * @return true
	 */
	public boolean skipLigandAtom(CMLAtom atom) {
		return skipAtom(atom, atomMatchObject.getIncludeLigandElementTypes(),
				atomMatchObject.getExcludeLigandElementTypes());
	}

	private boolean skipAtom(CMLAtom atom, String[] include, String[] exclude) {
		boolean skip = false;
		String elementType = atom.getElementType();
		if (exclude.length > 0) {
			skip = matches(elementType, exclude);
		} else {
			skip = !matches(elementType, include);
		}
		return skip;
	}

	/**
	 * matches if elementType is in allowedTypes or allowedTypes is a single
	 * CMLConstants.S_STAR character
	 * 
	 * @param elementType
	 * @param allowedTypes
	 * @return true if allowed
	 */
	private boolean matches(String elementType, String[] allowedTypes) {
		boolean matches = false;
		if (elementType == null) {
		} else if (allowedTypes.length == 0) {
		} else if (allowedTypes.length == 1 && CMLConstants.S_STAR.equals(allowedTypes[0])) {
			matches = true;
		} else {
			for (String ss : allowedTypes) {
				if (elementType.equals(ss)) {
					matches = true;
					break;
				}
			}
		}
		return matches;
	}

	public boolean matches(CMLAtom atom1, CMLAtom atom2) {
		return (atom1 != null && atom1.getElementType() != null
				&& atom2 != null && atom1.getElementType().equals(
				atom2.getElementType()));
	}

	/**
	 * to string.
	 * 
	 * @return thr string
	 */
	public String toString() {
		String s = CMLElement.S_EMPTY;
		s += "useCharge: " + atomMatchObject.isUseCharge() + CMLConstants.S_SEMICOLON;
		s += "useLabel: " + atomMatchObject.isUseLabel() + CMLConstants.S_SEMICOLON;
		return s;
	}

	/**
	 * map two atomSets returns ordered list of equivalence classes. if several
	 * atoms in class uses atomSet Map may not be as large as atomSet if there
	 * are equivalence classes does not annotate links
	 * 
	 * currently has two methods, Morgan and UniqueAtomLabels
	 * 
	 * if the atomSets are of different lengths returns null if the Morgan
	 * fails, returns null
	 * 
	 * @param atomSet0
	 * @param atomSet1
	 * @throws RuntimeException
	 *             null atomSets, atom sets different sizes, bad Morgan
	 * @return map or null if molecules of different length.
	 */
	@SuppressWarnings("unused")
	public CMLMap mapAtomSets(CMLAtomSet atomSet0, CMLAtomSet atomSet1) {
		if (atomSet0 == null) {
			throw new RuntimeException("atomSet is null: " + atomSet0);
		}
		if (atomSet1 == null) {
			throw new RuntimeException("atomSet is null: " + atomSet1);
		}
		CMLLink cmlLink = null;
		if (atomSet0.size() != atomSet1.size() && false) {
			// FIXME may manage this later
			throw new RuntimeException("mapAtomSets wrong sizes:  "
					+ atomSet0.size() + CMLConstants.S_SLASH + atomSet1.size());
		}
		String title = this.getAtomMatchStrategy().toString();
		LOG.trace("Strategy: "+title);
	    CMLMap cmlMap = match(atomSet0, atomSet1, title);
		return cmlMap;
	}
	
	protected static CMLMap makeMap() {
		CMLMap cmlMap = new CMLMap();
		cmlMap.setToType(CMLAtom.TAG);
		cmlMap.setFromType(CMLAtom.TAG);
		return cmlMap;
	}
	
	public abstract CMLMap match(
			CMLAtomSet atomSet0,CMLAtomSet atomSet1, String title);

	public static AtomMatcher createAtomMatcher(Strategy strategy) {
		AtomMatcher atomMatcher = null;
		if (strategy == null) {
			throw new RuntimeException("Null strategy");
		} else if (strategy.equals(Strategy.MATCH_MORGAN)) {
			atomMatcher = new MorganAtomMatcher();
		} else if (strategy.equals(Strategy.MATCH_GEOM)) {
			atomMatcher = new AtomMatcher2D();
		} else if (strategy.equals(Strategy.MATCH_ATOM_TREE_LABEL)) {
			atomMatcher = new AtomTreeMatcher();
		} else {
			throw new RuntimeException("Unknown strategy");
		}
		return atomMatcher;
	}


	/**
	 * map two molecules. returns ordered list of equivalence classes. Map may
	 * not be as large as molecule if molecules have equivalence classes does
	 * not annotate links
	 * 
	 * @param molecule0
	 * @param molecule1
	 * @throws RuntimeException
	 *             molecule different sizes (maybe fix later?), bad Morgan
	 * @return map or null if molecules of different length.
	 */
	public CMLMap mapMolecules(CMLMolecule molecule0, CMLMolecule molecule1)
			throws RuntimeException {
		CMLMap cmlMap = null;
		if (molecule0 != null && molecule1 != null) {
			CMLAtomSet atomSet = MoleculeTool.getOrCreateTool(molecule0)
					.getAtomSet();
//			atomSet.debug("AtomSet to match");
			CMLAtomSet atomSet1 = MoleculeTool.getOrCreateTool(molecule1)
					.getAtomSet();
//			atomSet1.debug("AtomSet to match");
			cmlMap = this.mapAtomSets(atomSet, atomSet1);
		} else {
			throw new RuntimeException("Cannot match null molecules");
		}
		return cmlMap;
	}

	/**
	 * matches molecules pairwise from two arrays. Heuristics currently being
	 * worked out. at present only matches molecules of equal size. each match
	 * returns a CMLMap in the matrix cell, else null. matches can be partial.
	 * array may have null elements (this is a way of deleting molecules from
	 * the problem) any match with null element gives a null CMLMap
	 * 
	 * @param molecule0
	 *            first array of molecules
	 * @param molecule1
	 *            other array (could be a different length)
	 * @throws RuntimeException
	 *             problems in molecule atom-atom matching
	 * @return rectangular array of maps (row = molecule0, cols=molecule1)
	 */
	public CMLMap[][] getMoleculeMatch(List molecule0,
			List molecule1) {
		CMLMap[][] mapMatrix = new CMLMap[molecule0.size()][molecule1.size()];

		for (int i = 0; i < molecule0.size(); i++) {
			if (molecule0.get(i) == null) {
				continue;
			}
			for (int j = 0; j < molecule1.size(); j++) {
				if (molecule1.get(j) == null) {
					continue;
				}
				if (molecule0.get(i).getCMLChildCount(CMLAtom.TAG) != molecule1
						.get(j).getCMLChildCount(CMLAtom.TAG)) {
					continue; // molecules different sizes
				}
				CMLMolecule mol0 = molecule0.get(i);
				CMLMolecule mol1 = molecule1.get(j);
				CMLMap mapij = this.mapMolecules(mol0, mol1);
				if (mapij != null) {
					mapij.setToType(CMLAtom.TAG); // needed for split
					mapij.setFromType(CMLAtom.TAG);
					mapij
							.setTitle(AtomMatcher.Strategy.FROM_UNIQUE_MATCHED_ATOMS
									.toString());
//					if (false) {
//						splitAndProcessAtomSets(mapij, mol0, mol1);
//					}
					if (mapij.getChildElements(CMLLink.TAG, CMLConstants.CML_NS).size() != mol0
							.getCMLChildCount(CMLAtom.TAG)) {
						// incomplete match; currently no action
					}
				}
				mapMatrix[i][j] = mapij;
			}
		}
		return mapMatrix;
	}


	private CMLAtomSet makeAtomSet(Object object) {
		CMLAtomSet atomSet = null;
		if (object == null) {
			atomSet = new CMLAtomSet();
		} else if (object instanceof CMLAtomSet){
			atomSet = (CMLAtomSet) object;
		} else if (object instanceof CMLAtom){
			atomSet = new CMLAtomSet();
			atomSet.addAtom((CMLAtom) object);
		} else {
			throw new RuntimeException("Unknown type: "+object);
		}
		return atomSet;
	}

	/**
	 * finds unique pair but does not remove them from atomSets. uses AtomTree
	 * to distinguish elements at present uses charge and labels but not
	 * hydrogens can be called iteratively until returns null might be useful
	 * later
	 * 
	 * @param reverseAtomMap1
	 * @param uniqueAtomMap2
	 * @param excludeElements
	 * @return atompair
	 */
	// private AtomPair getUniqueMatchedAtoms(Map reverseAtomMap1, Map
	// uniqueAtomMap2, String[] excludeElements) {
	// AtomPair pair = null;
	//
	// // iterate through labelling and try to find first unique equivalence
	// Set keySet1 = reverseAtomMap1.keySet();
	// Iterator it = keySet1.iterator();
	// while (it.hasNext()) {
	// CMLAtom atom1 = (CMLAtom) it.next();
	// String elementType = atom1.getElementType();
	// boolean omit = false;
	// for (String e : excludeElements) {
	// if (e.equals(elementType)) {
	// omit = true;
	// }
	// }
	// if (!omit) {
	// String atomTreeString = (String) reverseAtomMap1.get(atom1);
	// CMLAtom atom2 = (CMLAtom) uniqueAtomMap2.get(atomTreeString);
	// if (atom2 != null) {
	// pair = new AtomPair(atom1, atom2);
	// break;
	// }
	// }
	// }
	// return pair;
	// }
	/**
	 * get map of atoms with same neightbours. can be used recursively
	 * 
	 * @param atomSet00
	 * @param atomSet
	 *            target
	 * @param currentMap
	 * @return map or null if no new atoms
	 * @throws RuntimeException
	 */
	// FIXME
	public CMLMap getAtomsWithSameMappedNeighbours(CMLAtomSet atomSet00,
			CMLAtomSet atomSet, CMLMap currentMap) {
		// clone tools so as to avoid corruption
		CMLAtomSet this0 = new CMLAtomSet(atomSet00);
		CMLAtomSet atomSet0 = new CMLAtomSet(atomSet);
		CMLMap map = null;
		// try to map rest of atoms by neighbours
		while (true) {
			AtomPair pair = this.getAtomsWithSameMappedNeighbours00(this0,
					atomSet0, currentMap);
			// AtomPair pair = this.getAtomsWithSameMappedNeighbours(this0,
			// atomSet0, currentMap);
			LOG.info("PAIR " + pair);
			if (pair == null) {
				break;
			}
			CMLAtom fromAtom = pair.getAtom1();
			CMLAtom toAtom = pair.getAtom2();
			CMLLink link = new CMLLink();
			link.setFrom(fromAtom.getId());
			link.setTo(toAtom.getId());
			link.setTitle("sameMappedNeighbours");
			if (map == null) {
				map = new CMLMap();
			}
			map.addLink(link);
		}
		return map;
	}

	AtomPair getAtomsWithSameMappedNeighbours00(CMLAtomSet atomSet00,
			CMLAtomSet atomSet, CMLMap currentMap) {
		// FIXME
		if (true) {
			throw new RuntimeException("NYI");
		}
		return null;
//		return new AtomPair(new CMLAtom(), new CMLAtom());
	}

	/**
	 * split links containing sets of atoms into individual links. iterates
	 * through all links containing toSet and fromSet for each generates to and
	 * from atomSets. Uses atomMatchStrategy to compare and link them. if
	 * successful removes original link and adds atomSet.size() new ones some
	 * strategies may abort if sets are unequal. Currently no warning if
	 * geometrical manipulation, saves coordinates and then reapplies
	 * 
	 * @param map
	 *            context for from links
	 * @param fromAtomSet
	 *            atomSet context for from links
	 * @param toAtomSet
	 *            atomSet context for to links
	 */
	public void splitAndProcessAtomSets(CMLMap map, CMLAtomSet fromAtomSet,
			CMLAtomSet toAtomSet) {

		// iterate through all links which contain toSet and fromSet

		List atomSetLinks = map.getElementLinks(CMLAtom.TAG);
		for (CMLLink atomSetLink : atomSetLinks) {

			// linkTool.splitAndProcessAtomSet(fromMolecule, toMolecule,
			// atomMatchStrategy); // need to develop linkTool
			String fromSet = Util
					.concatenate(atomSetLink.getFromSet(), CMLConstants.S_SPACE);
			String toSet = Util.concatenate(atomSetLink.getToSet(), CMLConstants.S_SPACE);
			AtomMatcher.Strategy strategy = this.getAtomSetExpansionStrategy();
			if (strategy.equals(AtomMatcher.Strategy.MATCH_GEOM)
					|| strategy
							.equals(AtomMatcher.Strategy.MATCH_DISTANCE_MATRIX)
					|| strategy
							.equals(AtomMatcher.Strategy.MATCH_TOTAL_DISTANCE)) {

				// match geometry - will always come up with a result, even if
				// scientifically unlikely
				List toStrings = new ArrayList();
				for (String s : toSet.split(S_WHITEREGEX)) {
					toStrings.add(s);
				}
				CMLAtomSet toSubAtomSet = toAtomSet.getAtomSetById(toStrings);
				List fromStrings = new ArrayList();
				for (String s : fromSet.split(S_WHITEREGEX)) {
					fromStrings.add(s);
				}
				CMLAtomSet fromSubAtomSet = fromAtomSet
						.getAtomSetById(fromStrings);
				if (fromSubAtomSet == null || fromSubAtomSet == null) {
				}
				if (toSubAtomSet == null || toSubAtomSet == null) {
				}
				// save coords, map atoms and restore coords
				List toCoords = toSubAtomSet.getVector2D();
				List fromCoords = fromSubAtomSet.getVector2D();
				AtomMatcher matcher2d = new AtomMatcher2D();
				// matcher2d.setAtomMatchStrategy(atomMatcher.
				// getAtomSetExpansionStrategy());
				CMLMap overlapMap = matcher2d.match(toSubAtomSet, fromSubAtomSet, "2d");
				// LOG.info("...MATCH_GEOM");
				toSubAtomSet.setVector2D(toCoords);
				fromSubAtomSet.setVector2D(fromCoords);

				map.removeLink(atomSetLink); // remove original link
				map.mergeMap(overlapMap, Direction.NEITHER); // merge maps,
				// without
				// overwriting
			} else {
				LOG.error("*** No atomSet expansion strategy");
			}
		}
	}

	protected CMLMap createLinksInMap(List atoms1, String[] atomIds2,
			int[] serials) {
		CMLMap map;
		CMLMap map1 = new CMLMap();
		for (int i = 0; i < serials.length; i++) {
			CMLLink link = new CMLLink();
			link.setFrom(atoms1.get(i).getId());
			link.setTo(atomIds2[serials[i] - 1]);
			map1.addLink(link);
		}
		map = map1;
		return map;
	}

	/**
	 * split links containing sets of atoms into individual links. iterates
	 * through all links containing toSet and fromSet for each generates to and
	 * from atomSets. Uses atomMatchStrategy to compare and link them. if
	 * successful removes original link and adds atomSet.size() new ones some
	 * strategies may abort if sets are unequal. Currently no warning
	 * 
	 * @param map
	 *            with links
	 * @param fromMolecule
	 *            molecule context for from links
	 * @param toMolecule
	 *            molecule context for to links
	 */
	public void splitAndProcessAtomSets(CMLMap map, CMLMolecule fromMolecule,
			CMLMolecule toMolecule) {
		this.splitAndProcessAtomSets(map, MoleculeTool.getOrCreateTool(
				fromMolecule).getAtomSet(), MoleculeTool.getOrCreateTool(
				toMolecule).getAtomSet());
	}


	/**
	 * match the products of a reaction to the reactants of another.
	 * 
	 * @param reaction0
	 *            from reaction
	 * @param reaction1
	 * @return map
	 * @throws RuntimeException
	 */
	public CMLMap matchProductsToNextReactants(CMLReaction reaction0,
			CMLReaction reaction1) {

		LOG.info("----- mapping products " + reaction0.getId()
				+ " to reactants " + reaction1.getId() + "  -----");
		CMLMap map = new CMLMap();
		// annotate map
		map.setFromType(CMLAtom.TAG);
		map.setFromContext(reaction0.getId(Component.PRODUCT.name));
		map.setToType(CMLAtom.TAG);
		map.setToContext(reaction1.getId(Component.REACTANT.name));
		map.setTitle("Product N to reactant N+1");

		// get all molecules in product of reaction 0

		CMLProductList productList0 = (CMLProductList) reaction0
				.getFirstCMLChild(CMLProductList.TAG);
		if (productList0 == null) {
			return map;
		}
		Elements productMolecules0 = productList0
				.getChildCMLElements(CMLMolecule.TAG);
		List spectatorMolecule0 = reaction0
				.getSpectatorMolecules(Component.PRODUCT.number);
		List molecule0 = concat(productMolecules0,
				spectatorMolecule0);
		// LOG.info("PRODUCTS " + productMolecules0.size()
		// + " [cmlSpectator " + spectatorMolecule0.size() + "] ="
		// + molecule0.size());

		// get all molecules in reactant of reaction 1

		CMLReactantList reactantList1 = (CMLReactantList) reaction1
				.getFirstCMLChild(CMLReactantList.TAG);
		if (reactantList1 == null) {
			return map;
		}
		Elements reactantMolecules1 = reactantList1
				.getChildCMLElements(CMLMolecule.TAG);
		List spectatorMolecule1 = reaction1
				.getSpectatorMolecules(Component.REACTANT.number);
		List molecule1 = concat(reactantMolecules1,
				spectatorMolecule1);
		LOG.info("REACTANTS " + reactantMolecules1.size() + " [cmlSpectator "
				+ spectatorMolecule1.size() + "] =" + molecule1.size());

		// find all maps between equal-atom molecules, incuding partial ones
		CMLMap[][] mapMatrix = this.getMoleculeMatch(molecule0, molecule1);

		// count how many maps might satisfy each row and column
		int[][] linkCount = new int[molecule0.size()][molecule1.size()];
		int[] rowCount = new int[molecule0.size()];
		int[] colCount = new int[molecule1.size()];
		for (int j = 0; j < molecule1.size(); j++) {
			colCount[j] = 0;
		}

		// select unique matches
		for (int i = 0; i < molecule0.size(); i++) {
			rowCount[i] = 0;
			for (int j = 0; j < molecule1.size(); j++) {
				linkCount[i][j] = (mapMatrix[i][j] == null) ? 0
						: mapMatrix[i][j].getCMLChildCount(CMLLink.TAG);
				LOG.info(linkCount[i][j] + CMLConstants.S_SPACE);
				if (linkCount[i][j] > 0) {
					rowCount[i]++;
					colCount[j]++;
				}
			}
			LOG.info("..." + rowCount[i]);
		}
		LOG.info(S_EMPTY);
		for (int j = 0; j < molecule1.size(); j++) {
			LOG.info(colCount[j] + CMLConstants.S_SPACE);
		}
		LOG.info(S_EMPTY);

		// extract best matches
		boolean change = true;
		while (change) {
			change = false;
			for (int i = 0; i < molecule0.size(); i++) {
				// row/col with exactly one match
				if (rowCount[i] != 1) {
					continue;
				}
				for (int j = 0; j < molecule1.size(); j++) {
					if (colCount[j] != 1) {
						continue;
					}
					if (linkCount[i][j] > 0) {
						map.mergeMap(mapMatrix[i][j], CMLMap.Direction.NEITHER);
						mapMatrix[i][j] = null;
						linkCount[i][j] = -1;
						change = true;
						rowCount[i] = -1;
						colCount[j] = -1;
						break;
					}
				}
				if (change) {
					break;
				}
			}
		}
		IntMatrix linkMatrix = new IntMatrix(linkCount);
		LOG.info("LINKS-1 " + linkMatrix);

		// extract next best matches

		for (int i = 0; i < molecule0.size(); i++) {
			rowCount[i] = 0;
		}
		for (int j = 0; j < molecule1.size(); j++) {
			colCount[j] = 0;
		}

		// get counts of complete matches in each row and column
		for (int i = 0; i < molecule0.size(); i++) {
			CMLMolecule mol0 = molecule0.get(i);
			if (mol0 == null) {
				LOG.error("Null molecule: molecule0[" + i + CMLConstants.S_RSQUARE);
				continue;
			}
			for (int j = 0; j < molecule1.size(); j++) {
				CMLMolecule mol1 = molecule1.get(j);
				if (mol1 == null) {
					LOG.error("Null molecule: molecule1[" + j + CMLConstants.S_RSQUARE);
					continue;
				}
				// exact match with molecule?
				if (linkCount[i][j] == mol0.getCMLChildCount(CMLAtom.TAG)
						&& linkCount[i][j] == mol1
								.getCMLChildCount(CMLAtom.TAG)) {
					rowCount[i]++;
					colCount[j]++;
				}
			}
		}

		// now mark the perfect matches and remove the less than perfect matches
		for (int i = 0; i < molecule0.size(); i++) {
			if (rowCount[i] != 1) {
				continue;
			}
			for (int j = 0; j < molecule1.size(); j++) {
				if (colCount[j] != 1) {
					continue;
				}
				if (linkCount[i][j] != molecule0.get(i).getCMLChildCount(
						CMLAtom.TAG)) {
					continue;
				}
				map.mergeMap(mapMatrix[i][j], CMLMap.Direction.NEITHER);
				// set rest of row and column to zero and mark match
				for (int ii = 0; ii < molecule0.size(); ii++) {
					linkCount[ii][j] = 0;
				}
				for (int jj = 0; jj < molecule1.size(); jj++) {
					linkCount[i][jj] = 0;
				}
				linkCount[i][j] = -1;
			}
		}

		linkMatrix = new IntMatrix(linkCount);
		LOG.info("LINKS-2 " + linkMatrix);

		LOG.info("**********************PROD2REACT");
		// CMLElement.debug(mapTool.getMap());
		// map.setDictRef(CMLReaction.MAP_REACTION_ATOM_MAP_COMPLETE);
		// FIXME this may not be true

		// tidyUnmappedAtoms(mapTool, fromAtomSetTool, toAtomSetTool);
		return map;

	}

	/**
	 * combine molecules.
	 * 
	 * @param moleculeNodes1
	 * @param m2
	 * @return list of molecules
	 */
	private static List concat(Elements moleculeNodes1,
			List m2) {
		List m = new ArrayList();
		for (int i = 0; i < moleculeNodes1.size(); i++) {
			m.add((CMLMolecule) moleculeNodes1.get(i));
		}
		for (int i = 0; i < m2.size(); i++) {
			m.add(m2.get(i));
		}
		return m;
	}

//	/**
//	 * moves the molecule children from a reactant+product pair into a
//	 * spectator. the molecules are identified by labelRefs in the spectator,
//	 * e.g. "Gly23". the connection tables in the molecules are expected to be
//	 * identical but might vary if hydrogen decoration was variable. The
//	 * coordinates are matched, so an atomMatcher is necessary
//	 * 
//	 * @param reaction
//	 * @throws RuntimeException
//	 */
//	public void moveReactantProductToSpectator(CMLReaction reaction) {
//		/*
//		 * -- spectators in Macie have two child molecules, a reactant and a
//		 * product
//		 */
//		/*
//		 * --     ...   
//		 *         
//		 *    
//		 *       --
//		 */
//		// do we have any spectators?
//		CMLSpectatorList spectatorList = (CMLSpectatorList) reaction
//				.getFirstCMLChild(CMLSpectatorList.TAG);
//		if (spectatorList == null) {
//			return;
//		}
//		// catches any empty lists
//		CMLReactantList reactantList = (CMLReactantList) reaction
//				.getFirstCMLChild(CMLReactantList.TAG);
//		Elements reactants = reactantList.getChildCMLElements(CMLReactant.TAG);
//		CMLProductList productList = (CMLProductList) reaction
//				.getFirstCMLChild(CMLProductList.TAG);
//		Elements products = productList.getChildCMLElements(CMLProduct.TAG);
//		Elements spectators = spectatorList.getChildCMLElements("spectator");//
//		// iterate through spectators and transfer
//
//		for (int i = 0; i < spectators.size(); i++) {
//			CMLSpectator spectator = (CMLSpectator) spectators.get(i);
//			String ref = ((CMLMolecule) spectator
//					.getFirstCMLChild(CMLMolecule.TAG)).getRef();
//			if (ref != null) {
//				spectator.getFirstCMLChild(CMLMolecule.TAG).detach();
//				spectator.moveLabelledReactantsProducts(reactants, ref);
//				spectator.moveLabelledReactantsProducts(products, ref);
//				if (spectator.getCMLChildCount(CMLMolecule.TAG) < 2) {
//					LOG.error("Cannot find 2 spectators for "
//							+ reaction.getId() + "; only found "
//							+ spectator.getCMLChildCount(CMLMolecule.TAG)
//							+ " cmlSpectator molecules");
//					break;
//				}
//
//				// spectatorList now contains reactant molecule followed by
//				// product molecule
//				// match on connectivity, then split atom sets and append final
//				// map to cmlSpectator
//
//				CMLMolecule reactantMolecule = (CMLMolecule) spectator
//						.getFirstCMLChild(CMLMolecule.TAG);
//				CMLMolecule productMolecule = (CMLMolecule) spectator
//						.getChildCMLElement(CMLMolecule.TAG, 1);
//				// LOG.error("Reactant
//				// (
//				// "+reactantMolecule.getId()+S_SLASH+reactantTool.getAtomCount()+"
//				// )
//				// and product
//				//("+productMolecule.getId()+S_SLASH+productTool.getAtomCount()+"
//				// )
//				// spectators of different lengths");
//				// continue;
//				// }
//				// CMLMap spectatorMap = matchMoleculePair(reactantTool,
//				// productTool, MATCH_MORGAN);
//				// CMLElement.debug(reactantMolecule);
//				// CMLElement.debug(productMolecule);
//				CMLMap spectatorMap = null;
//				try {
//					spectatorMap = this.mapMolecules(productMolecule,
//							reactantMolecule);
//				} catch (RuntimeException cmle) {
//					// molecules of different lengths, create a zero length map
//					LOG.error("Reactant (" + reactantMolecule.getId() + CMLConstants.S_SLASH
//							+ reactantMolecule.getAtomCount()
//							+ ") and product (" + productMolecule.getId()
//							+ CMLConstants.S_SLASH + productMolecule.getAtomCount()
//							+ ") spectators " + cmle);
//					spectatorMap = new CMLMap();
//				}
//				// if (spectatorMap != null) {
//
//				spectatorMap
//						.setTitle(CMLReaction.FROM_SPECTATOR_PRODUCT_TO_REACTANT);
//				CMLMolecule mol1 = (CMLMolecule) ((CMLSpectator) spectators
//						.get(i)).getChildCMLElement(CMLMolecule.TAG, 1);
//				spectatorMap.setFromContext(mol1.getId());
//				CMLMolecule mol0 = (CMLMolecule) ((CMLSpectator) spectators
//						.get(i)).getFirstCMLChild(CMLMolecule.TAG);
//				spectatorMap.setToContext(mol0.getId());
//				spectatorMap.setFromType(CMLAtom.TAG);
//				spectatorMap.setToType(CMLAtom.TAG);
//
//				this.splitAndProcessAtomSets(spectatorMap, productMolecule,
//						reactantMolecule);
//				spectators.get(i).appendChild(spectatorMap);
//
//				if (spectatorMap.getCMLChildCount(CMLLink.TAG) != reactantMolecule
//						.getCMLChildCount(CMLAtom.TAG)) {
//					LOG.error("Unequal cmlSpectator map and molecule");
//					LOG
//							.error("**ERROR**(move reactant to spectator) Unequal cmlSpectator map and molecule***");
//					// CMLElement.debug(spectatorMap);
//					// CMLElement.debug(reactant.getMolecule());
//					CMLAtomSet fromAtomSet = MoleculeTool.getOrCreateTool(
//							productMolecule).getAtomSet();
//					CMLAtomSet toAtomSet = MoleculeTool.getOrCreateTool(
//							reactantMolecule).getAtomSet();
//					fromAtomSet.removeAtoms(spectatorMap, toAtomSet);
//					// have to do an atomOverlap
//					AtomMatcher geomAtomMatcher = new AtomMatcher();
//					geomAtomMatcher
//							.setAtomMatchStrategy(AtomMatcher.Strategy.MATCH_TOTAL_DISTANCE);
//					CMLMap overlapMap = geomAtomMatcher.createMapFrom2DOverlap(
//							fromAtomSet, toAtomSet);
//					spectatorMap.mergeMap(overlapMap, CMLMap.Direction.NEITHER);
//					fromAtomSet.removeAtoms(overlapMap, toAtomSet);
//
//					spectatorMap
//							.setDictRef(CMLReaction.MAP_REACTION_ATOM_MAP_COMPLETE);
//					// FIXME - replace by subroutine
//
//				}
//				// }
//			}
//		}
//
//		// removes empty spectatorLists
//
//		if (spectatorList.getChildElements().size() == 0) {
//			spectatorList.detach();
//		}
//	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy