All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.IDCodeParserWithoutCoordinateInvention Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

package com.actelion.research.chem;

import com.actelion.research.util.DoubleFormat;

import java.nio.charset.StandardCharsets;

/**
 * Typically you should use IDCodeParser instead of this class. You may instantiate this class
 * if you need to avoid a dependency to the CoordinateInventor and if you pass encoded coordinates
 * together with any idcode for parsing.
 * We needed to introduce this class to avoid a cyclic dependency between the IDCodeParser and
 * the CoordinateInventor: If encoded atom coords are not given, then the IDcodeParser needs
 * to invent then in order to assign proper up-/down-bonds. The CoordinateInventor needs the
 * IDCodeParser to unpack its default template list.
 */
public class IDCodeParserWithoutCoordinateInvention {
	private StereoMolecule	mMol;
	private byte[]			mDecodingBytes;
	private	int				mIDCodeBitsAvail,mIDCodeTempData,mIDCodeBufferIndex;
	private boolean         mNeglectSpaceDelimitedCoordinates;

	protected boolean ensure2DCoordinates() {
		return false;
		}

	/**
	 * IDCodeParsers allow passing idcode and coordinates as one String with a space
	 * as separator in between. If an idcode is followed by a space and more, and if
	 * the following shall not be interpreted as encoded coordinates, then call this
	 * method after instantiation.
	 */
	public void neglectSpaceDelimitedCoordinates() {
		mNeglectSpaceDelimitedCoordinates = true;
		}

	/**
	 * Creates and returns a molecule from the idcode with its atom and bond arrays being
	 * just as large as needed to hold the molecule. Use this to conserve memory if no
	 * atoms or bonds are added to the molecule afterwards. This version of the method
	 * allows to pass idcode and atom coordinates in one String object.
	 * @param idcode null or idcode, which may contain coordinates separated by a space character
	 * @return
	 */
	public StereoMolecule getCompactMolecule(String idcode) {
		return (idcode == null || idcode.length() == 0) ? null : getCompactMolecule(idcode.getBytes(StandardCharsets.UTF_8), null);
		}

	/**
	 * Creates and returns a molecule from the idcode with its atom and bond arrays being
	 * just as large as needed to hold the molecule. Use this to conserve memory if no
	 * atoms or bonds are added to the molecule afterwards.
	 * @param idcode may be null
	 * @return
	 */
	public StereoMolecule getCompactMolecule(byte[] idcode) {
		if (idcode == null || idcode.length == 0)
			return null;

		for (int i=2; i 8)	// abits is the version number
			abits = bbits;

		int allAtoms = decodeBits(abits);
		int allBonds = decodeBits(bbits);

		StereoMolecule mol = new StereoMolecule(allAtoms, allBonds);
		parse(mol, idcode, coordinates, idcodeStart, coordsStart);
		return mol;
		}

	/**
	 * Parses the idcode and populates the given molecule to represent the passed idcode.
	 * This version of the method allows to pass idcode and atom coordinates in one String object.
	 * @param mol molecule object to be filled with the idcode content
	 * @param idcode null or idcode, which may contain coordinates separated by a space character
	 */
	public void parse(StereoMolecule mol, String idcode) {
		if (idcode == null || idcode.length() == 0) {
			parse(mol, (byte[])null, (byte[])null);
			return;
			}

		int index = idcode.indexOf(' ');
		if (index > 0 && index < idcode.length()-1)
			parse(mol, idcode.substring(0, index).getBytes(StandardCharsets.UTF_8), idcode.substring(index+1).getBytes(StandardCharsets.UTF_8));
		else
			parse(mol, idcode.getBytes(StandardCharsets.UTF_8), null);
		}

	/**
	 * Parses the idcode and populates the given molecule to represent the passed idcode.
	 * @param mol molecule object to be filled with the idcode content
	 * @param idcode null or valid idcode optionally concatenates with SPACE and encoded coordinates
	 */
	public void parse(StereoMolecule mol, byte[] idcode) {
		parse(mol, idcode, null);
		}

	/**
	 * Parses the idcode and populates the given molecule to represent the passed idcode.
	 * @param mol molecule object to be filled with the idcode content
	 * @param idcode may be null
	 * @param coordinates may be null
	 */
	public void parse(StereoMolecule mol, String idcode, String coordinates) {
		byte[] idcodeBytes = (idcode == null) ? null : idcode.getBytes(StandardCharsets.UTF_8);
		byte[] coordinateBytes = (coordinates == null) ? null : coordinates.getBytes(StandardCharsets.UTF_8);
		parse(mol, idcodeBytes, coordinateBytes);
		}

	/**
	 * Parses the idcode and populates the given molecule to represent the passed idcode.
	 * @param mol molecule object to be filled with the idcode content
	 * @param idcode may be null
	 * @param coordinates may be null
	 */
	public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates) {
		if (idcode == null || idcode.length == 0) {
			mol.clear();
			return;
			}

		parse(mol, idcode, coordinates, 0, 0);
		}

	/**
	 * Parses the idcode and populates the given molecule to represent the passed idcode.
	 * @param mol molecule object to be filled with the idcode content
	 * @param idcode may be null
	 * @param idcodeStart offset in idcode array to first idcode byte
	 */
	public void parse(StereoMolecule mol, byte[] idcode, int idcodeStart) {
		parse(mol, idcode, null, idcodeStart, -1);
		}

	/**
	 * Parses the idcode and populates the given molecule to represent the passed idcode.
	 * @param mol molecule object to be filled with the idcode content
	 * @param idcode may be null
	 * @param coordinates may be null
	 * @param idcodeStart offset in idcode array to first idcode byte
	 * @param coordsStart offset in coordinates array to first coords byte
	 */
	public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates, int idcodeStart, int coordsStart) {
		mol.clear();

		if (idcode==null || idcodeStart < 0 || idcodeStart >= idcode.length)
			return;

		mMol = mol;
		int version = Canonizer.cIDCodeVersion2;

		if (coordinates != null && (coordsStart < 0 || coordsStart >= coordinates.length))
			coordinates = null;

		decodeBitsStart(idcode, idcodeStart);
		int abits = decodeBits(4);
		int bbits = decodeBits(4);

		if (abits > 8) {	// abits is the version number
			version = abits;
			abits = bbits;
			}

		if (abits == 0) {
			mMol.setFragment(decodeBits(1) == 1);
			return;
			}

		int allAtoms = decodeBits(abits);
		int allBonds = decodeBits(bbits);
		int nitrogens = decodeBits(abits);
		int oxygens = decodeBits(abits);
		int otherAtoms = decodeBits(abits);
		int chargedAtoms = decodeBits(abits);
		for (int atom=0; atom= '\'');
		double targetAVBL = 0.0;
		double xOffset = 0.0;
		double yOffset = 0.0;
		double zOffset = 0.0;
		boolean coordsAre3D = false;
		boolean coordsAreAbsolute = false;

		if (decodeOldCoordinates) {	// old coordinate encoding
			if ((coordinates.length > 2*allAtoms-2 && coordinates[2*allAtoms-2] == '\'')
			 || (coordinates.length > 3*allAtoms-3 && coordinates[3*allAtoms-3] == '\'')) {	// old faulty encoding
				coordsAreAbsolute = true;
				coordsAre3D = (coordinates.length == 3*allAtoms-3+9);
				int index = coordsAre3D ? 3*allAtoms-3 : 2*allAtoms-2;
				int avblInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
				targetAVBL = Math.pow(10.0, avblInt/2000.0-1.0);
				index += 2;
				int xInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
				xOffset = Math.pow(10.0, xInt/1500.0-1.0);
				index += 2;
				int yInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
				yOffset = Math.pow(10.0, yInt/1500.0-1.0);
				if (coordsAre3D) {
					index += 2;
					int zInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
					zOffset = Math.pow(10.0, zInt/1500.0-1.0);
					}
				}
			else {
				coordsAre3D = (coordinates.length == 3*allAtoms-3);
				}
			}

		// don't use 3D coordinates, if we need 2D
		if (ensure2DCoordinates() && coordsAre3D) {
			coordinates = null;
			decodeOldCoordinates = false;
			}

		for (int i=1; i mIDCodeBufferIndex+1
		 && (idcode[mIDCodeBufferIndex+1] == ' ' || idcode[mIDCodeBufferIndex+1] == '\t')) {
			coordinates = idcode;
			coordsStart = mIDCodeBufferIndex+2;
			}

		if (coordinates != null) {
			try {
				if (coordinates[coordsStart] == '!' || coordinates[coordsStart] == '#') {    // new coordinate format
					decodeBitsStart(coordinates, coordsStart + 1);
					coordsAre3D = (decodeBits(1) == 1);
					coordsAreAbsolute = (decodeBits(1) == 1);
					int resolutionBits = 2 * decodeBits(4);
					int binCount = (1 << resolutionBits);

					double factor;
					int from = 0;
					int bond = 0;
					for (int atom = 1; atom < allAtoms; atom++) {
						if (bond < allBonds && mMol.getBondAtom(1, bond) == atom) {
							from = mMol.getBondAtom(0, bond++);
							factor = 1.0;
							}
						else {
							from = 0;
							factor = 8.0;
							}
						mMol.setAtomX(atom, mMol.getAtomX(from) + factor * (decodeBits(resolutionBits) - binCount / 2));
						mMol.setAtomY(atom, mMol.getAtomY(from) + factor * (decodeBits(resolutionBits) - binCount / 2));
						if (coordsAre3D)
							mMol.setAtomZ(atom, mMol.getAtomZ(from) + factor * (decodeBits(resolutionBits) - binCount / 2));
						}

					if (coordinates[coordsStart] == '#') {    // we have 3D-coordinates that include implicit hydrogen coordinates
						int hydrogenCount = 0;

						// we need to cache hCount, because otherwise getImplicitHydrogens() would create helper arrays with every call
						int[] hCount = new int[allAtoms];
						for (int atom = 0; atom < allAtoms; atom++)
							hydrogenCount += (hCount[atom] = mMol.getImplicitHydrogens(atom));

						for (int atom = 0; atom < allAtoms; atom++) {
							for (int i = 0; i < hCount[atom]; i++) {
								int hydrogen = mMol.addAtom(1);
								mMol.addBond(atom, hydrogen, Molecule.cBondTypeSingle);

								mMol.setAtomX(hydrogen, mMol.getAtomX(atom) + (decodeBits(resolutionBits) - binCount / 2));
								mMol.setAtomY(hydrogen, mMol.getAtomY(atom) + (decodeBits(resolutionBits) - binCount / 2));
								if (coordsAre3D)
									mMol.setAtomZ(hydrogen, mMol.getAtomZ(atom) + (decodeBits(resolutionBits) - binCount / 2));
								}
							}

						allAtoms += hydrogenCount;
						allBonds += hydrogenCount;
						}

					double avblDefault = coordsAre3D ? 1.5 : Molecule.getDefaultAverageBondLength();
					double avbl = mMol.getAverageBondLength(allAtoms, allBonds, avblDefault);

					if (coordsAreAbsolute) {
						targetAVBL = decodeAVBL(decodeBits(resolutionBits), binCount);
						xOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
						yOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
						if (coordsAre3D)
							zOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);

						factor = targetAVBL / avbl;
						for (int atom = 0; atom < allAtoms; atom++) {
							mMol.setAtomX(atom, xOffset + factor * mMol.getAtomX(atom));
							mMol.setAtomY(atom, yOffset + factor * mMol.getAtomY(atom));
							if (coordsAre3D)
								mMol.setAtomZ(atom, zOffset + factor * mMol.getAtomZ(atom));
							}
						}
					else {    // with new format 2D and 3D coordinates are scaled to average bond lengths of 1.5 Angstrom
						targetAVBL = 1.5;
						factor = targetAVBL / avbl;
						for (int atom = 0; atom < allAtoms; atom++) {
							mMol.setAtomX(atom, factor * mMol.getAtomX(atom));
							mMol.setAtomY(atom, factor * mMol.getAtomY(atom));
							if (coordsAre3D)
								mMol.setAtomZ(atom, factor * mMol.getAtomZ(atom));
							}
						}
					}
				else {    // old coordinate format
					if (coordsAre3D && !coordsAreAbsolute && targetAVBL == 0.0) // if no scaling factor is given, then scale to mean bond length = 1.5
						targetAVBL = 1.5;

					if (targetAVBL != 0.0 && mMol.getAllBonds() != 0) {
						double avbl = 0.0;
						for (int bond = 0; bond < mMol.getAllBonds(); bond++) {
							double dx = mMol.getAtomX(mMol.getBondAtom(0, bond)) - mMol.getAtomX(mMol.getBondAtom(1, bond));
							double dy = mMol.getAtomY(mMol.getBondAtom(0, bond)) - mMol.getAtomY(mMol.getBondAtom(1, bond));
							double dz = coordsAre3D ? mMol.getAtomZ(mMol.getBondAtom(0, bond)) - mMol.getAtomZ(mMol.getBondAtom(1, bond)) : 0.0f;
							avbl += Math.sqrt(dx * dx + dy * dy + dz * dz);
							}
						avbl /= mMol.getAllBonds();
						double f = targetAVBL / avbl;
						for (int atom = 0; atom < mMol.getAllAtoms(); atom++) {
							mMol.setAtomX(atom, mMol.getAtomX(atom) * f + xOffset);
							mMol.setAtomY(atom, mMol.getAtomY(atom) * f + yOffset);
							if (coordsAre3D)
								mMol.setAtomZ(atom, mMol.getAtomZ(atom) * f + zOffset);
							}
						}
					}
				}
			catch (Exception e) {
				e.printStackTrace();
				System.err.println("Faulty id-coordinates:"+e+" "+new String(idcode, StandardCharsets.UTF_8)+" "+new String(coordinates, StandardCharsets.UTF_8));
				coordinates = null;
				coordsAre3D = false;
				}
			}

		boolean coords2DAvailable = (coordinates != null && !coordsAre3D);

		// If we have or create 2D-coordinates, then we need to set all double bonds to a cross bond, which
		// - have distinguishable substituents on both ends, i.e. is a stereo double bond
		// - are not in a small ring
		// Here we don't know, whether a double bond without E/Z parity is a stereo bond with unknown
		// configuration or not a stereo bond. Therefore, we need to set a flag, that causes the Canonizer
		// during the next stereo recognition with atom coordinates to assign an unknown configuration rather
		// than E or Z based on created or given coordinates.
		// In a next step these double bonds are converted into cross bonds by
		if (coords2DAvailable || ensure2DCoordinates()) {
			mMol.ensureHelperArrays(Molecule.cHelperRings);
			for (int bond=0; bond= coordsStart+3*allAtoms-3
				 && coordinates[coordsStart+2*allAtoms-2] != '\'');
			}
		}

	public boolean coordinatesAreAbsolute(String coordinates) {
		return coordinates != null && coordinatesAreAbsolute(coordinates.getBytes(StandardCharsets.UTF_8));
		}

	public boolean coordinatesAreAbsolute(byte[] coordinates) {
		return coordinatesAreAbsolute(coordinates, 0);
		}

	public boolean coordinatesAreAbsolute(byte[] coordinates, int coordStart) {
		if (coordinates == null || coordinates.length <= coordStart)
			return false;

		if (coordinates[coordStart] >= '\'') {	// old format uses ACSII 39 and higher
			for (int i=coordStart; i 8)	// abits is the version number
			version = abits;

		return version;
		}

	public int getAtomCount(String idcode) {
		if (idcode == null || idcode.length() == 0)
			return 0;

		return getAtomCount(idcode.getBytes(StandardCharsets.UTF_8), 0);
		}

	public int getAtomCount(byte[] idcode, int offset) {
		if (idcode == null || idcode.length <= offset)
			return 0;

		decodeBitsStart(idcode, offset);
		int abits = decodeBits(4);
		int bbits = decodeBits(4);

		if (abits > 8)	// abits is the version number
			abits = bbits;

		if (abits == 0)
			return 0;

		return decodeBits(abits);
		}

	/**
	 * Determines atom and bond counts of the given idcode
	 * @param idcode
	 * @param count null or int[2], which is filled and returned
	 * @return int[] with atom and bond count as first and second values
	 */
	public int[] getAtomAndBondCounts(String idcode, int[] count) {
		if (idcode == null || idcode.length() == 0)
			return null;

		return getAtomAndBondCounts(idcode.getBytes(StandardCharsets.UTF_8), 0, count);
		}

	/**
	 * Determines atom and bond counts of the given idcode
	 * @param idcode
	 * @param offset
	 * @param count null or int[2], which is filled and returned
     * @return int[] with atom and bond count as first and second values
     */
	public int[] getAtomAndBondCounts(byte[] idcode, int offset, int[] count) {
		if (idcode == null || idcode.length == 0)
			return null;

		decodeBitsStart(idcode, 0);
		int abits = decodeBits(4);
		int bbits = decodeBits(4);

		if (abits > 8)   // abits is the version number
			abits = bbits;

		if (count == null)
			count = new int[2];

		if (abits == 0) {
			count[0] = 0;
			count[1] = 0;
			}
		else {
			count[0] = decodeBits(abits);
			count[1] = decodeBits(bbits);
			}

		return count;
		}

	private void decodeBitsStart(byte[] bytes, int offset) {
		mIDCodeBitsAvail = 6;
		mIDCodeBufferIndex = offset;
		mDecodingBytes = bytes;
		mIDCodeTempData = (bytes[mIDCodeBufferIndex] & 0x3F) << 11;
		}

	private int decodeBits(int bits) {
		int allBits = bits;

		int data = 0;
		while (bits != 0) {
			if (mIDCodeBitsAvail == 0) {
				mIDCodeTempData = (mDecodingBytes[++mIDCodeBufferIndex] & 0x3F) << 11;
				mIDCodeBitsAvail = 6;
				}
			data |= ((0x00010000 & mIDCodeTempData) >> (16 - allBits + bits));
			mIDCodeTempData <<= 1;
			bits--;
			mIDCodeBitsAvail--;
			}
		return data;
		}

	private double decodeAVBL(int value, int binCount) {
		return Math.pow(10, Math.log10(200/0.1) * value / (binCount - 1) - 1);
		}

	private double decodeShift(int value, int binCount) {
		int halfBinCount = binCount / 2;
		boolean isNegative = (value >= halfBinCount);
		if (isNegative)
			value -= halfBinCount;
		double steepness = binCount/32;
		double doubleValue = steepness * value / (halfBinCount - value);
		return isNegative ? -doubleValue : doubleValue;
		}

	public void printContent(byte[] idcode, byte[] coordinates) {
		try {
			int version = Canonizer.cIDCodeVersion2;

			if (idcode == null || idcode.length == 0)
				return;

			if (coordinates != null && coordinates.length == 0)
				coordinates = null;

			System.out.println("idcode: " + new String(idcode, StandardCharsets.UTF_8));
			if (coordinates != null)
				System.out.println("coords: " + new String(coordinates, StandardCharsets.UTF_8));

			decodeBitsStart(idcode, 0);
			int abits = decodeBits(4);
			int bbits = decodeBits(4);

			if (abits > 8) {    // abits is the version number
				version = abits;
				abits = bbits;
			}

			System.out.println("version:" + version);

			int allAtoms = decodeBits(abits);
			if (allAtoms == 0)
				return;

			int allBonds = decodeBits(bbits);
			int nitrogens = decodeBits(abits);
			int oxygens = decodeBits(abits);
			int otherAtoms = decodeBits(abits);
			int chargedAtoms = decodeBits(abits);

			System.out.println("allAtoms:" + allAtoms + " allBonds:" + allBonds);
			if (nitrogens != 0) {
				System.out.print("nitrogens:");
				for (int i = 0; i < nitrogens; i++)
					System.out.print(" " + decodeBits(abits));
				System.out.println();
			}
			if (oxygens != 0) {
				System.out.print("oxygens:");
				for (int i = 0; i < oxygens; i++)
					System.out.print(" " + decodeBits(abits));
				System.out.println();
			}
			if (otherAtoms != 0) {
				System.out.print("otherAtoms:");
				for (int i = 0; i < otherAtoms; i++)
					System.out.print(" " + decodeBits(abits) + ":" + decodeBits(8));
				System.out.println();
			}
			if (chargedAtoms != 0) {
				System.out.print("chargedAtoms:");
				for (int i = 0; i < chargedAtoms; i++)
					System.out.print(" " + decodeBits(abits) + ":" + (decodeBits(4) - 8));
				System.out.println();
			}

			int closureBonds = 1 + allBonds - allAtoms;
			int dbits = decodeBits(4);
			int base = 0;

			int[][] bondAtom = new int[2][allBonds];
			int bondCount = 0;
			for (int i = 1; i < allAtoms; i++) {
				int dif = decodeBits(dbits);
				if (dif == 0) {
					closureBonds++;
					continue;
				}
				base += dif - 1;
				bondAtom[0][bondCount] = base;
				bondAtom[1][bondCount++] = i;
			}

			for (int i = 0; i < closureBonds; i++) {
				bondAtom[0][bondCount] = decodeBits(abits);
				bondAtom[1][bondCount++] = decodeBits(abits);
			}

			int[] bondOrder = new int[allBonds];
			System.out.print("bonds:");
			for (int bond = 0; bond < allBonds; bond++) {
				System.out.print(" " + bondAtom[0][bond]);
				bondOrder[bond] = decodeBits(2);
				System.out.print(bondOrder[bond] == 0 ? "." : bondOrder[bond] == 1 ? "-" : bondOrder[bond] == 2 ? "=" : "#");
				System.out.print("" + bondAtom[1][bond]);
			}
			System.out.println();

			int THCount = decodeBits(abits);
			if (THCount != 0) {
				System.out.print("parities:");
				for (int i = 0; i < THCount; i++) {
					int atom = decodeBits(abits);
					if (version == Canonizer.cIDCodeVersion2) {
						int parity = decodeBits(2);
						if (parity == 3) {
							// this was the old discontinued Molecule.cAtomParityMix
							// version2 idcodes had never more than one center with parityMix
							System.out.print(" " + atom + ":1&0");
						} else {
							System.out.print(" " + atom + ":" + parity);
						}
					} else {
						int parity = decodeBits(3);
						switch (parity) {
							case Canonizer.cParity1And:
								System.out.print(" " + atom + ":1&" + decodeBits(3));
								break;
							case Canonizer.cParity2And:
								System.out.print(" " + atom + ":2&" + decodeBits(3));
								break;
							case Canonizer.cParity1Or:
								System.out.print(" " + atom + ":1|" + decodeBits(3));
								break;
							case Canonizer.cParity2Or:
								System.out.print(" " + atom + ":2|" + decodeBits(3));
								break;
							default:
								System.out.print(" " + atom + ":" + parity);
						}
					}
				}
				System.out.println();
			}

			if (version == Canonizer.cIDCodeVersion2)
				if ((decodeBits(1) == 0))   // translate chiral flag
					System.out.println("isRacemate");

			int EZCount = decodeBits(bbits);
			if (EZCount != 0) {
				System.out.print("EZ:");
				for (int i = 0; i < EZCount; i++) {
					int bond = decodeBits(bbits);
					if (bondOrder[bond] == 1) {    // BINAP type of axial chirality
						int parity = decodeBits(3);
						switch (parity) {
							case Canonizer.cParity1And:
								System.out.print(" " + bond + ":1&" + decodeBits(3));
								break;
							case Canonizer.cParity2And:
								System.out.print(" " + bond + ":2&" + decodeBits(3));
								break;
							case Canonizer.cParity1Or:
								System.out.print(" " + bond + ":1|" + decodeBits(3));
								break;
							case Canonizer.cParity2Or:
								System.out.print(" " + bond + ":2|" + decodeBits(3));
								break;
							default:
								System.out.print(" " + bond + ":" + parity);
						}
					} else
						System.out.print(" " + bond + ":" + decodeBits(2));
				}
				System.out.println();
			}

			if (decodeBits(1) == 1)
				System.out.println("isFragment = true");

			int offset = 0;
			while (decodeBits(1) == 1) {
				int dataType = offset + decodeBits(4);
				switch (dataType) {
					case 0: //  datatype 'AtomQFNoMoreNeighbours'
						int no = decodeBits(abits);
						System.out.print("noMoreNeighbours:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 1: //  datatype 'isotop'
						no = decodeBits(abits);
						System.out.print("mass:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(8));
						System.out.println();
						break;
					case 2: //  datatype 'bond defined to be delocalized'
						no = decodeBits(bbits);
						System.out.print("delocalizedBonds (outdated, redundant and wrong):");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits));
						System.out.println();
						break;
					case 3: //  datatype 'AtomQFMoreNeighbours'
						no = decodeBits(abits);
						System.out.print("moreNeighbours:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 4: //  datatype 'AtomQFRingState'
						no = decodeBits(abits);
						System.out.print("atomRingState:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFRingStateBits));
						System.out.println();
						break;
					case 5: //  datatype 'AtomQFAromState'
						no = decodeBits(abits);
						System.out.print("atomAromState:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFAromStateBits));
						System.out.println();
						break;
					case 6: //  datatype 'AtomQFAny'
						no = decodeBits(abits);
						System.out.print("atomAny:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 7: //  datatype 'AtomQFHydrogen'
						no = decodeBits(abits);
						System.out.print("atomHydrogen:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFHydrogenBits));
						System.out.println();
						break;
					case 8: //  datatype 'AtomList'
						no = decodeBits(abits);
						System.out.print("atomList:");
						for (int i = 0; i < no; i++) {
							int atom = decodeBits(abits);
							int atoms = decodeBits(4);
							System.out.print(" " + atom);
							for (int j = 0; j < atoms; j++) {
								System.out.print(j == 0 ? ":" : ",");
								System.out.print("" + decodeBits(8));
							}
						}
						System.out.println();
						break;
					case 9: //  datatype 'BondQFRingState'
						no = decodeBits(bbits);
						System.out.print("bondRingState:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFRingStateBits));
						System.out.println();
						break;
					case 10://  datatype 'BondQFBondTypes'
						no = decodeBits(bbits);
						System.out.print("bondTypes:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFBondTypesBits));
						System.out.println();
						break;
					case 11:    //  datatype 'AtomQFMatchStereo'
						no = decodeBits(abits);
						System.out.print("atomMatchStereo:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 12:    //  datatype 'bond defined to be a bridge from n1 to n2 atoms'
						no = decodeBits(bbits);
						for (int i = 0; i < no; i++) {
							System.out.print("bridgeBond:" + decodeBits(bbits));
							int min = decodeBits(Molecule.cBondQFBridgeMinBits);
							int max = min + decodeBits(Molecule.cBondQFBridgeSpanBits);
							System.out.println("(" + min + "-" + max + ")");
						}
						break;
					case 13: //  datatype 'AtomQFPiElectrons'
						no = decodeBits(abits);
						System.out.print("atomPiElectrons:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFPiElectronBits));
						System.out.println();
						break;
					case 14: //  datatype 'AtomQFNeighbours'
						no = decodeBits(abits);
						System.out.print("AtomQFNeighbours:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFNeighbourBits));
						System.out.println();
						break;
					case 15: //  datatype 'start second feature set'
					case 31:
						offset += 16;
						System.out.println("");
						break;
					case 16: //  datatype 'AtomQFSmallRingSize'
						no = decodeBits(abits);
						System.out.print("AtomQFSmallRingSize:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFSmallRingSizeBits));
						System.out.println();
						break;
					case 17: //  datatype 'AtomAbnormalValence'
						no = decodeBits(abits);
						System.out.print("AtomAbnormalValence:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(4));
						System.out.println();
						break;
					case 18: //  datatype 'AtomCustomLabel'
						no = decodeBits(abits);
						System.out.print("AtomCustomLabel:");
						int lbits = decodeBits(4);
						for (int i = 0; i < no; i++) {
							int atom = decodeBits(abits);
							int count = decodeBits(lbits);
							byte[] label = new byte[count];
							for (int j = 0; j < count; j++)
								label[j] = (byte) decodeBits(7);
							System.out.print(" " + atom + ":" + new String(label, StandardCharsets.UTF_8));
						}
						System.out.println();
						break;
					case 19: //  datatype 'AtomQFCharge'
						no = decodeBits(abits);
						System.out.print("AtomQFCharge:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFChargeBits));
						System.out.println();
						break;
					case 20: //  datatype 'BondQFRingSize'
						no = decodeBits(bbits);
						System.out.print("BondQFRingSize:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFRingSizeBits));
						System.out.println();
						break;
					case 21: //  datatype 'AtomRadicalState'
						no = decodeBits(abits);
						System.out.print("AtomRadicalState:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(2));
						System.out.println();
						break;
					case 22:    //	datatype 'flat nitrogen'
						no = decodeBits(abits);
						System.out.print("AtomQFFlatNitrogen:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 23:    //	datatype 'cBondQFMatchStereo'
						no = decodeBits(bbits);
						System.out.print("cBondQFMatchStereo:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 24:    //	datatype 'cBondQFAromatic'
						no = decodeBits(bbits);
						System.out.print("BondQFAromState:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFAromStateBits));
						System.out.println();
						break;
					case 25:    //	datatype 'atom selection'
						System.out.print("AtomSelection:");
						for (int i = 0; i < allAtoms; i++)
							if (decodeBits(1) == 1)
								System.out.print(" " + i);
						System.out.println();
						break;
					case 26:    //	datatype 'delocalized high order bond'
						System.out.print("DelocalizedHigherOrderBonds:");
						no = decodeBits(bbits);
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits));
						break;
					case 27:    //	datatype 'part of an exclude group'
						no = decodeBits(abits);
						System.out.print("AtomQFExcludeGroup:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 28:    //	datatype 'coordinate bond'
						no = decodeBits(bbits);
						System.out.print("Coordinate Bonds:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits));
						System.out.println();
						break;
					case 29:
						no = decodeBits(abits);
						System.out.print("ReactionParityHint:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFRxnParityBits));
						System.out.println();
						break;
					case 30: //  datatype 'AtomQFNewRingSize'
						no = decodeBits(abits);
						System.out.print("AtomQFNewRingSize:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFNewRingSizeBits));
						System.out.println();
						break;
					case 32: //  datatype 'AtomQFStereoStateBits'
						no = decodeBits(abits);
						System.out.print("AtomQFStereoState:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFStereoStateBits));
						System.out.println();
						break;
					case 33: //  datatype 'AtomQFENeighbours'
						no = decodeBits(abits);
						System.out.print("AtomQFENeighbours:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFENeighbourBits));
						System.out.println();
						break;
					case 34:    //	datatype 'in hetero aromatic ring'
						no = decodeBits(abits);
						System.out.print("AtomQFHeteroAromatic:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 35:    //	datatype 'cBondQFMatchFormalOrder'
						no = decodeBits(bbits);
						System.out.print("BondQFMatchFormalOrder:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(abits));
						System.out.println();
						break;
					case 36:    //	datatype 'cBondQFRareBondType'
						no = decodeBits(bbits);
						System.out.print("BondQFRareBondType:");
						for (int i = 0; i < no; i++)
							System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFRareBondTypesBits));
						System.out.println();
						break;
					case 37:    // datatype 'rare bond type'
						no = decodeBits(bbits);
						System.out.print("Rare Bond Type:");
						for (int i=0; i 0 && distance < lowDistance)
										lowDistance = distance;
								}
							}
							avbl = (lowDistance == Double.MAX_VALUE) ? defaultAVBL : lowDistance;
						}
					}

					if (coordinates[0] == '#') {    // we have 3D-coordinates that include implicit hydrogen coordinates
						System.out.print("hydrogen coords (" + hydrogenCount + " expected): ");
						int hydrogen = allAtoms;
						for (int atom = 0; atom < allAtoms; atom++) {
							if (hCount[atom] != 0)
								System.out.print(atom);
							for (int i = 0; i < hCount[atom]; i++) {
								System.out.print(" (");
								coords[0][hydrogen] = coords[0][atom] + (decodeBits(resolutionBits) - binCount / 2);
								System.out.print((int) coords[0][hydrogen] + ",");
								coords[1][hydrogen] = coords[1][atom] + (decodeBits(resolutionBits) - binCount / 2);
								System.out.print((int) coords[1][hydrogen]);
								if (coordsAre3D) {
									coords[2][hydrogen] = coords[2][atom] + (decodeBits(resolutionBits) - binCount / 2);
									System.out.print("," + (int) coords[2][hydrogen]);
								}
								System.out.print("), ");
								hydrogen++;
							}
						}
						System.out.println();
					}

					System.out.print(coordsAreAbsolute ? "absolute coords:" : "relative coords:");
					if (hydrogenCount != 0)
						System.out.println("Coordinates contain " + hydrogenCount + " hydrogen atoms!");

					if (coordsAreAbsolute) {
						double targetAVBL = decodeAVBL(decodeBits(resolutionBits), binCount);
						double xOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
						double yOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
						double zOffset = 0;
						if (coordsAre3D)
							zOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
						System.out.println("Abs-coord transformation: targetAVBL:" + targetAVBL + " xOffset:" + xOffset + " yOffset:" + yOffset + " zOffset:" + zOffset);

						factor = targetAVBL / avbl;
						for (int atom = 0; atom < allAtoms; atom++) {
							coords[0][atom] = xOffset + factor * coords[0][atom];
							coords[1][atom] = xOffset + factor * coords[1][atom];
							if (coordsAre3D)
								coords[2][atom] = xOffset + factor * coords[2][atom];
						}
					} else {
						double targetAVBL = 1.5;
						factor = targetAVBL / avbl;
						for (int atom = 0; atom < allAtoms; atom++) {
							System.out.print(atom + " (");
							coords[0][atom] = coords[0][atom] * factor;
							System.out.print(DoubleFormat.toString(coords[0][atom]) + ",");
							coords[1][atom] = coords[1][atom] * factor;
							System.out.print(DoubleFormat.toString(coords[1][atom]));
							if (coordsAre3D) {
								coords[2][atom] = coords[2][atom] * factor;
								System.out.print("," + DoubleFormat.toString(coords[2][atom]));
							}
							System.out.print("), ");
							if ((atom & 3) == 3 || atom == allAtoms - 1)
								System.out.println();
						}
					}
				}
			}
			System.out.println();
		}
		catch (Exception e) {
			e.printStackTrace();
		}
	}

	private double getDistance(double[][] coords, int atom1, int atom2, boolean coordsAre3D) {
		double dx = coords[0][atom1] - coords[0][atom2];
		double dy = coords[1][atom1] - coords[1][atom2];
		double dz = coordsAre3D ? coords[2][atom1] - coords[2][atom2] : 0;
		return Math.sqrt(dx*dx + dy*dy + dz*dz);
		}
	}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy