All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.reaction.ReactionEncoder Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
 * Copyright (c) 1997 - 2016
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Thomas Sander
 */

package com.actelion.research.chem.reaction;

import com.actelion.research.chem.*;
import com.actelion.research.util.ArrayUtils;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;

public class ReactionEncoder
{
	public static final char MOLECULE_DELIMITER = ' ';
	public static final char PRODUCT_IDENTIFIER = '!';
	public static final char CATALYST_DELIMITER = '+';	// character must not collide with idcode or coordinate encodings
	public static final char OBJECT_DELIMITER = '#';

	public static final String MOLECULE_DELIMITER_STRING = " ";
	public static final String OBJECT_DELIMITER_STRING = "#";

	public static final int INCLUDE_MAPPING = 1;
	public static final int INCLUDE_COORDS = 2;
	public static final int INCLUDE_DRAWING_OBJECTS = 4;
	public static final int INCLUDE_CATALYSTS = 8;

	public static final int INCLUDE_ALL = 15;
	public static final int INCLUDE_RXN_CODE_ONLY = 0;
	public static final int INCLUDE_DEFAULT = INCLUDE_MAPPING | INCLUDE_COORDS;

	public static final int RETAIN_REACTANT_AND_PRODUCT_ORDER = 16;


    private ReactionEncoder()
    {}

	/**
	 * Creates a String containing a canonical reaction code by
	 * creating idcodes of every reactant and product and
	 * concatenating them in lexically sorted order. This creates
	 * a canonical reaction code. The drawback is, however, that
	 * the original order of reactants and products may be changed.
	 * If mapping information is available this will be encoded
	 * in a 2nd string. Otherwise, this will be an empty string.
	 * Coordinates, if available, will be encoded in a 3rd string.
	 * If there are drawing objects assigned to this reaction
	 * then these are encoded in a 4th string.
	 * If the reaction contains catalysts, they are encoded as 5th string.
	 *
	 * @return String[5] with reaction code, mapping, coordinates, drawing objects, catalysts
	 */
	public static String[] encode(Reaction reaction, boolean keepAbsoluteCoordinates) {
		return encode(reaction, keepAbsoluteCoordinates, true);
	}

	/**
	 * Creates a canonical or non-canonical String containing a reaction
	 * code by creating idcodes of every reactant and product and
	 * concatenating them in original or canonical order.
	 * If mapping information is available this will be encoded
	 * in a 2nd string. Otherwise, this will be null.
	 * Coordinates, if available, will be encoded in a 3rd string.
	 * If there are drawing objects assigned to this reaction
	 * then these are encoded in a 4th string.
	 * If the reaction contains catalysts, they are encoded as 5th string.
	 *
	 * @param reaction
	 * @param keepAbsoluteCoordinates
	 * @param sortByIDCode whether to sort reactant and product idcodes to produce a canonical reaction code
	 * @return String[5] with reaction code, mapping, coordinates, drawing objects, catalysts
	 */
	public static String[] encode(Reaction reaction, boolean keepAbsoluteCoordinates, boolean sortByIDCode) {
		if (reaction == null
			|| reaction.getReactants() == 0
			|| reaction.getProducts() == 0) {
			return null;
		}

		String[] idcode = new String[reaction.getMolecules()];
		String[] mapping = new String[reaction.getMolecules()];
		String[] coords = new String[reaction.getMolecules()];

		for (int i = 0; i < reaction.getMolecules(); i++) {
			StereoMolecule mol = reaction.getMolecule(i);

			// reactants may not use cAtomQFRxnParityHint
			if (mol.isFragment() && i < reaction.getReactants())
				for (int atom=0; atom 0) {
				idcodeSequence.append(MOLECULE_DELIMITER);
				mappingSequence.append(MOLECULE_DELIMITER);
				coordsSequence.append(MOLECULE_DELIMITER);
			}
			idcodeSequence.append(idcode[index]);
			mappingSequence.append(mapping[index]);
			coordsSequence.append(coords[index]);
			idcode[index] = "";
		}

		idcodeSequence.append(PRODUCT_IDENTIFIER);
		mappingSequence.append(MOLECULE_DELIMITER);
		coordsSequence.append(MOLECULE_DELIMITER);

		for (int i = reaction.getReactants(); i < reaction.getMolecules(); i++) {
			int index = i;
			if (sortByIDCode) {
				String maxString = "";
				index = -1;
				for (int j = reaction.getReactants(); j < reaction.getMolecules(); j++) {
					if (maxString.compareTo(idcode[j]) < 0) {
						maxString = idcode[j];
						index = j;
					}
				}
			}
			if (i > reaction.getReactants()) {
				idcodeSequence.append(MOLECULE_DELIMITER);
				mappingSequence.append(MOLECULE_DELIMITER);
				coordsSequence.append(MOLECULE_DELIMITER);
			}
			idcodeSequence.append(idcode[index]);
			mappingSequence.append(mapping[index]);
			coordsSequence.append(coords[index]);
			idcode[index] = "";
		}

		String[] result = new String[5];
		result[0] = idcodeSequence.toString();
		if (mappingSequence.length() > reaction.getMolecules() - 1)   // delimiters only
		{
			result[1] = mappingSequence.toString();
		}
		if (coordsSequence.length() > reaction.getMolecules() - 1)   // delimiters only
		{
			result[2] = coordsSequence.toString();
		}
		if (reaction.getDrawingObjects() != null) {
			result[3] = reaction.getDrawingObjects().toString();
		}
		if (reaction.getCatalysts() != 0) {
			result[4] = encodeCatalysts(reaction, keepAbsoluteCoordinates);
		}

		return result;
	}

	private static String encodeCatalysts(Reaction reaction, boolean keepAbsoluteCoordinates) {
		StringBuilder sb = new StringBuilder();
		for (int i=0; i 1
				&& result[1] != null) {
				buf.append(result[1]);
			}
		}

		mode &= ~INCLUDE_MAPPING;
		if (mode != 0) {
			buf.append(OBJECT_DELIMITER);
			if ((mode & INCLUDE_COORDS) != 0
				&& result.length > 2
				&& result[2] != null) {
				buf.append(result[2]);
			}
		}

		mode &= ~INCLUDE_COORDS;
		if (mode != 0) {
			buf.append(OBJECT_DELIMITER);
			if ((mode & INCLUDE_DRAWING_OBJECTS) != 0
				&& result.length > 3
				&& result[3] != null) {
				buf.append(result[3]);
			}
		}

		mode &= ~INCLUDE_DRAWING_OBJECTS;
		if (mode != 0) {
			buf.append(OBJECT_DELIMITER);
			if ((mode & INCLUDE_CATALYSTS) != 0
				&& result.length > 4
				&& result[4] != null) {
				buf.append(result[4]);
			}
		}

		return buf.toString();
	}

	/**
	 * Creates a Reaction object by interpreting a reaction code,
	 * mapping, coordinates and drawing objects that were earlier created
	 * by this class.
	 * If rxnCoords are relative or null, and if ensureCoordinates==true
	 * then all reactants and products are placed automatically along a
	 * horizontal line.
	 *
	 * @return Reaction
	 */
	public static Reaction decode(String rxnCode, String rxnMapping, String rxnCoords,
								  String rxnObjects, String rxnCatalysts, boolean ensureCoordinates, Reaction rxn) {
		if (rxnCode == null || rxnCode.length() == 0) {
			return null;
		}

		boolean isProduct = false;
		int idcodeIndex = 0;
		int mappingIndex = 0;
		int coordsIndex = 0;

		int productIndex = rxnCode.indexOf(PRODUCT_IDENTIFIER);
		if (productIndex == -1) {
			return null;
		}

		if (rxn == null)
			rxn = new Reaction();
		else
			rxn.clear();

		while (idcodeIndex != -1) {
			if (idcodeIndex > productIndex) {
				isProduct = true;
			}

			int delimiterIndex = rxnCode.indexOf(MOLECULE_DELIMITER, idcodeIndex);
			if (!isProduct
				&& (delimiterIndex > productIndex || delimiterIndex == -1)) {
				delimiterIndex = productIndex;
			}

			String idcode = null;
			if (delimiterIndex == -1) {
				idcode = rxnCode.substring(idcodeIndex);
				idcodeIndex = -1;
			} else {
				idcode = rxnCode.substring(idcodeIndex, delimiterIndex);
				idcodeIndex = delimiterIndex + 1;
			}

			String mapping = null;
			if (rxnMapping != null && rxnMapping.length() != 0) {
				delimiterIndex = rxnMapping.indexOf(MOLECULE_DELIMITER, mappingIndex);
				if (delimiterIndex == -1) {
					mapping = rxnMapping.substring(mappingIndex);
				} else {
					mapping = rxnMapping.substring(mappingIndex, delimiterIndex);
					mappingIndex = delimiterIndex + 1;
				}
			}

			String coords = null;
			if (rxnCoords != null && rxnCoords.length() != 0) {
				delimiterIndex = rxnCoords.indexOf(MOLECULE_DELIMITER, coordsIndex);
				if (delimiterIndex == -1) {
					coords = rxnCoords.substring(coordsIndex);
				} else {
					coords = rxnCoords.substring(coordsIndex, delimiterIndex);
					coordsIndex = delimiterIndex + 1;
				}
			}

			IDCodeParser parser = new IDCodeParser(ensureCoordinates);
			StereoMolecule mol = parser.getCompactMolecule(idcode, coords);

			if (mapping != null) {
				parser.parseMapping(mapping.getBytes(StandardCharsets.UTF_8));
			}

			if (isProduct) {
				rxn.addProduct(mol);
			} else {
				rxn.addReactant(mol);
			}
		}

		if (rxnObjects != null && rxnObjects.length() != 0) {
			rxn.setDrawingObjects(new DrawingObjectList(rxnObjects));
		}

		if (rxnCatalysts != null && rxnCatalysts.length() != 0) {
			IDCodeParser parser = new IDCodeParser(ensureCoordinates);
			int index1 = 0;
			int index2 = rxnCatalysts.indexOf(CATALYST_DELIMITER);
			while (index2 != -1) {
				rxn.addCatalyst(parser.getCompactMolecule(rxnCatalysts.substring(index1, index2)));
				index1 = index2+1;
				index2 = rxnCatalysts.indexOf(CATALYST_DELIMITER, index1);
			}
			rxn.addCatalyst(parser.getCompactMolecule(rxnCatalysts.substring(index1)));
		}

		return rxn;
	}

	/**
	 * Creates a Reaction object by interpreting a reaction code,
	 * mapping, coordinates and drawing objects that were earlier created
	 * by this class.
	 * If rxnCoords are relative or null, and if ensureCoordinates==true
	 * then all reactants and products are placed automatically along a
	 * horizontal line.
	 *
	 * @return Reaction
	 */
	public static Reaction decode(byte[] rxnCode, byte[] rxnMapping, byte[] rxnCoords,
								  String rxnObjects, byte[] rxnCatalysts, boolean ensureCoordinates) {
		if (rxnCode == null || rxnCode.length == 0) {
			return null;
		}

		boolean isProduct = false;
		int idcodeIndex = 0;
		int mappingIndex = 0;
		int coordsIndex = 0;

		int productIndex = indexOf(rxnCode, PRODUCT_IDENTIFIER);
		if (productIndex == -1)
			return null;

		Reaction rxn = new Reaction();

		while (idcodeIndex != -1) {
			if (idcodeIndex > productIndex)
				isProduct = true;

			int delimiterIndex = indexOf(rxnCode, MOLECULE_DELIMITER, idcodeIndex);
			if (!isProduct && (delimiterIndex > productIndex || delimiterIndex == -1))
				delimiterIndex = productIndex;

			int idcodeStart = idcodeIndex;
			idcodeIndex = (delimiterIndex == -1) ? -1 : delimiterIndex + 1;

			int mappingStart = -1;
			if (rxnMapping != null && mappingIndex < rxnMapping.length) {
				mappingStart = (rxnMapping[mappingIndex] == MOLECULE_DELIMITER) ? -1 : mappingIndex;
				delimiterIndex = indexOf(rxnMapping, MOLECULE_DELIMITER, mappingIndex);
				if (delimiterIndex != -1)
					mappingIndex = delimiterIndex + 1;
			}

			int coordsStart = -1;
			if (rxnCoords != null && rxnCoords.length != 0) {
				coordsStart = coordsIndex;
				delimiterIndex = indexOf(rxnCoords, MOLECULE_DELIMITER, coordsIndex);
				if (delimiterIndex != -1)
					coordsIndex = delimiterIndex + 1;
			}

			IDCodeParser parser = new IDCodeParser(ensureCoordinates);
			parser.neglectSpaceDelimitedCoordinates();
			StereoMolecule mol = parser.getCompactMolecule(rxnCode, rxnCoords, idcodeStart, coordsStart);

			if (mappingStart != -1)
				parser.parseMapping(rxnMapping, mappingStart);

			if (isProduct)
				rxn.addProduct(mol);
			else
				rxn.addReactant(mol);
		}

		if (rxnObjects != null && rxnObjects.length() != 0) {
			rxn.setDrawingObjects(new DrawingObjectList(rxnObjects));
		}

		if (rxnCatalysts != null && rxnCatalysts.length != 0) {
			IDCodeParser parser = new IDCodeParser(ensureCoordinates);
			int index1 = 0;
			int index2 = indexOf(rxnCatalysts, CATALYST_DELIMITER);
			while (index2 != -1) {
				rxn.addCatalyst(parser.getCompactMolecule(rxnCatalysts, index1));
				index1 = index2+1;
				index2 = indexOf(rxnCatalysts, CATALYST_DELIMITER, index1);
			}
			rxn.addCatalyst(parser.getCompactMolecule(rxnCatalysts, index1));
		}

		return rxn;
	}

	private static int indexOf(byte[] bytes, char ch) {
		for (int i=0; i moleculeList = new ArrayList<>();
		if (includeReactants) {
			int reactantIndex = 0;
			do {
				IDCodeParser parser = new IDCodeParser();
				parser.neglectSpaceDelimitedCoordinates();
				StereoMolecule reactant = parser.getCompactMolecule(rxnBytes, coords, reactantIndex, coordsIndex);
				if (reactant.getAllAtoms() != 0)
					moleculeList.add(reactant);

				reactantIndex = 1+ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.MOLECULE_DELIMITER, reactantIndex);
				if (coords != null)
					coordsIndex = 1+ArrayUtils.indexOf(coords, (byte)ReactionEncoder.MOLECULE_DELIMITER, coordsIndex);
				if (mapping != null) {
					parser.parseMapping(mapping, mappingIndex);
					mappingIndex = 1+ArrayUtils.indexOf(mapping, (byte)ReactionEncoder.MOLECULE_DELIMITER, mappingIndex);
				}
			} while (reactantIndex != 0 && reactantIndex < reactantEnd);
		}
		if (includeProducts) {
			do {
				IDCodeParser parser = new IDCodeParser();
				parser.neglectSpaceDelimitedCoordinates();
				StereoMolecule product = parser.getCompactMolecule(rxnBytes, coords, productIndex, coordsIndex);
				if (product.getAllAtoms() != 0)
					moleculeList.add(product);

				productIndex = 1+ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.MOLECULE_DELIMITER, productIndex);
				if (coords != null)
					coordsIndex = 1+ArrayUtils.indexOf(coords, (byte)ReactionEncoder.MOLECULE_DELIMITER, coordsIndex);
				if (mapping != null) {
					parser.parseMapping(mapping, mappingIndex);
					mappingIndex = 1+ArrayUtils.indexOf(mapping, (byte)ReactionEncoder.MOLECULE_DELIMITER, mappingIndex);
				}
			} while (productIndex != 0 && productIndex < productEnd);
		}

		return moleculeList.size() == 0 ? null : moleculeList.toArray(new StereoMolecule[0]);
	}

	/**
	 * Generates an array of all catalysts of the encoded reaction string as bytes.
	 * If the string includes atom coordinates, these are used.
	 * @param rxnBytes
	 * @return null or StereoMolecule array with at least one molecule
	 */
	public static StereoMolecule[] decodeCatalysts(byte[] rxnBytes) {
		if (rxnBytes == null || rxnBytes.length == 0)
			return null;

		int index = 0;
		for (int i=0; i<4; i++) {
			index = 1 + ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.OBJECT_DELIMITER, index);
			if (index == 0)
				return null;
		}

		if (index == rxnBytes.length)
			return null;

		ArrayList catalystList = new ArrayList();
		while (index != 0 && index < rxnBytes.length) {
			int nextIndex = 1+ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.CATALYST_DELIMITER, index);
			int coordsIndex = 1+ArrayUtils.indexOf(rxnBytes, (byte)' ', index);

			StereoMolecule catalyst = (coordsIndex != 0 && (nextIndex == 0 || nextIndex > coordsIndex)) ?
					  new IDCodeParser().getCompactMolecule(rxnBytes, rxnBytes, index, coordsIndex)
					: new IDCodeParser().getCompactMolecule(rxnBytes, null, index, -1);
			if (catalyst.getAllAtoms() != 0)
				catalystList.add(catalyst);

			index = nextIndex;
		}

		return catalystList.size() == 0 ? null : catalystList.toArray(new StereoMolecule[0]);
	}

	/**
	 * Generates an array of all reactants and/or products of the encoded reaction string as bytes.
	 * If the string includes atom coordinates or if they are explicitly, these are used.
	 * At least one of includeReactants and includeProducts must be true.
	 * @param rxnBytes may contain atom coordinates
	 * @return null (if reactants or products are missing) or StereoMolecule array with at least one molecule
	 */
	public static byte[][] getMoleculeIDCodes(byte[] rxnBytes, boolean includeReactants, boolean includeProducts) {
		if (rxnBytes == null || rxnBytes.length == 0)
			return null;

		int reactantEnd = ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.PRODUCT_IDENTIFIER);
		if (reactantEnd <= 0)
			return null;

		int productIndex = reactantEnd + 1;
		int productEnd = ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.OBJECT_DELIMITER, productIndex);
		if (productEnd == -1)
			productEnd = rxnBytes.length;

		if (productIndex == productEnd)
			return null;

		ArrayList moleculeList = new ArrayList<>();
		if (includeReactants) {
			int reactantIndex = 0;
			while (reactantIndex < reactantEnd) {
				int index2 = ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.MOLECULE_DELIMITER, reactantIndex);
				if (index2 == -1)
					index2 = reactantEnd;
				moleculeList.add(Arrays.copyOfRange(rxnBytes, reactantIndex, index2));
				reactantIndex = 1 + index2;
			}
		}
		if (includeProducts) {
			while (productIndex < productEnd) {
				int index2 = ArrayUtils.indexOf(rxnBytes, (byte)ReactionEncoder.MOLECULE_DELIMITER, productIndex);
				if (index2 == -1)
					index2 = productEnd;
				moleculeList.add(Arrays.copyOfRange(rxnBytes, productIndex, index2));
				productIndex = 1 + index2;
			}
		}

		return moleculeList.size() == 0 ? null : moleculeList.toArray(new byte[0][]);
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy