All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xmlcml.cml.tools.MorganMD5 Maven / Gradle / Ivy

/**
 *    Copyright 2011 Peter Murray-Rust et. al.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package org.xmlcml.cml.tools;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.math.fraction.Fraction;
import org.xmlcml.cml.element.CMLMolecule;
import org.xmlcml.euclid.Util;

public class MorganMD5 {
	public static final String COUNT_START = "*";
	public static final String COUNT_END = "*";
	public final static String MD5PATTERN_S = "[0-9a-f]{25,33}";
	public final static Pattern MD5PATTERN = Pattern.compile(MD5PATTERN_S);
	public final static Pattern COUNTED_MD5PATTERN = Pattern.compile("\\"+COUNT_START+"(\\d+)"+"\\"+COUNT_END+"("+MD5PATTERN_S+")");
 
	private Integer count;
	private String equivalenceString;
	
	private MorganMD5(Integer count, String equivalenceString) {
		if (!MD5PATTERN.matcher(equivalenceString).matches()) {
			throw new RuntimeException("Cannot match as morganMD5 "+equivalenceString);
		}
		this.count = (count == null) ? new Integer(1) : count;
		this.equivalenceString = equivalenceString;
	}
	
	public String getCountedString() {
		return prefixMultiplier(count)+equivalenceString;
	}

	public Integer getCount() {
		return count;
	}

	public String getEquivalenceString() {
		return equivalenceString;
	}


	
	public static List getCountedMorganList(CMLMolecule molecule) {
		return getCountedMorganList(molecule, 1);
	}
	
	public static List getCountedMorganList(CMLMolecule molecule, int  multiplier) {
		List childMolecules = CMLMolecule.getChildMoleculeList(molecule);
		List morganStringList = new ArrayList();
		for (CMLMolecule childMolecule : childMolecules) {
			morganStringList.add(createMorganMD5(childMolecule));
		}
		Collections.sort(morganStringList);
		List morganList = new ArrayList();
		String currentString = null;
		int count = 0;
		for (String childMorganString : morganStringList) {
			if (currentString == null) {
				count = 1;
				currentString = childMorganString;
			} else if (childMorganString.equals(currentString)) {
				count++;
			} else {
				MorganMD5 morganMd5 = new MorganMD5(count, currentString);
				morganList.add(morganMd5);
				count = 1;
				currentString = childMorganString;
			}
		}
		
		MorganMD5 morganMd5 = new MorganMD5(count, currentString);
		morganList.add(morganMd5);
		return morganList;
	}

	public static List getMultiplierList(String morganString1, String morganString2) {
		List morganList1 = MorganMD5.interpretCountedMorganString(morganString1);
		List morganList2 = MorganMD5.interpretCountedMorganString(morganString2);
		return getMultiplierList(morganList1, morganList2);
	}
	

	public static List getMultiplierList(
			List morganList1, List morganList2) {
		List multiplierList = null;
		if (morganList1.size() == morganList2.size()) {
			multiplierList = new ArrayList();
			int serial = 0;
			for (MorganMD5 morgan1 : morganList1) {
				MorganMD5 morgan2 = morganList2.get(serial++);
				Fraction multiplier = morgan1.getRatioOfCountToCount2(morgan2);
				multiplierList.add(multiplier);
			}
		}
		
		return multiplierList;
	}

	public Fraction getRatioOfCountToCount2(MorganMD5 morgan2) {
		Fraction fraction = null;
		if (this.getEquivalenceString().equals(morgan2.getEquivalenceString())) {
			fraction = new Fraction(this.getCount(), morgan2.getCount());
		}
		return fraction;
	}
	
	public static boolean haveIdenticalComponents(String morganString1, String morganString2) {
		List multiplierList = getMultiplierList(morganString1, morganString2);
		if (multiplierList == null) {
			return false;
		}
		for (Fraction fraction : multiplierList) {
			if (fraction == null) {
				return false; 
			}
		}
		return true;
	}

	public static Fraction getCommonMultiplier(String morganString1, String morganString2) {
		List multiplierList = getMultiplierList(morganString1, morganString2);

		Fraction commonFraction = null;
		if (multiplierList != null) {
			for (Fraction fraction : multiplierList) {
				if (fraction == null) {
					return null;
				} else if (commonFraction == null) {
					commonFraction = fraction;
				} else {
					if (!fraction.equals(commonFraction)) {
						return null;
					}
				}
			}
		}
		return commonFraction;
	}

	public static List interpretCountedMorganString(String s) {
		List countedList = new ArrayList();
		if (MD5PATTERN.matcher(s).matches()) {
			countedList.add(new MorganMD5(1, s));
		} else {
			int start = 0;
			Matcher matcher = COUNTED_MD5PATTERN.matcher(s);
			while (matcher.find(start)) {
				if (start != matcher.start()) {
					throw new RuntimeException("concatenated MD5 contains junk after:"+s.substring(start));
				}
				String countS = matcher.group(1);
				String morganString = matcher.group(2);
				countedList.add(new MorganMD5(new Integer(countS), morganString));
				start = matcher.end();
			}
			if (start == 0) {
				throw new RuntimeException("concatenated MD5 has leading junk (must start with count): "+s.substring(start));
			}
			if (start != s.length()) {
				throw new RuntimeException("concatenated MD5 has trailing junk: "+s.substring(start));
			}
		}
		return countedList;
	}
	
	public static String createMorganMD5(CMLMolecule molecule) {
		return createMorganMD5(molecule, 1);
	}	
	
	public static String createMorganMD5(CMLMolecule molecule, int multiplier) {
		String morganString = null;
		if (molecule != null) {
			if (molecule.getMoleculeCount() > 0) {
				List morganList = MorganMD5.getCountedMorganList(molecule, multiplier);
				morganString = MorganMD5.createMorganString(morganList);
			} else {
				Morgan morgan = new Morgan(molecule);
				morganString = Util.calculateMD5(morgan.getEquivalenceString());
				if (multiplier != 1) {
					morganString = prefixMultiplier(multiplier)+morganString;
				}
			}
		}
		return morganString;
	}


	private static String prefixMultiplier(int multiplier) {
		return COUNT_START+multiplier+COUNT_END;
	}

	private static String createMorganString(List morganList) {
		StringBuilder sb = new StringBuilder();
		for (MorganMD5 morganMD5 : morganList) {
			sb.append(morganMD5.getCountedString());
		}
		return sb.toString();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy