com.actelion.research.chem.IDCodeParserWithoutCoordinateInvention Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package com.actelion.research.chem;
import com.actelion.research.util.DoubleFormat;
import java.nio.charset.StandardCharsets;
/**
* Typically you should use IDCodeParser instead of this class. You may instantiate this class
* if you need to avoid a dependency to the CoordinateInventor and if you pass encoded coordinates
* together with any idcode for parsing.
* We needed to introduce this class to avoid a cyclic dependency between the IDCodeParser and
* the CoordinateInventor: If encoded atom coords are not given, then the IDcodeParser needs
* to invent then in order to assign proper up-/down-bonds. The CoordinateInventor needs the
* IDCodeParser to unpack its default template list.
*/
public class IDCodeParserWithoutCoordinateInvention {
private StereoMolecule mMol;
private byte[] mDecodingBytes;
private int mIDCodeBitsAvail,mIDCodeTempData,mIDCodeBufferIndex;
private boolean mNeglectSpaceDelimitedCoordinates;
protected boolean ensure2DCoordinates() {
return false;
}
/**
* IDCodeParsers allow passing idcode and coordinates as one String with a space
* as separator in between. If an idcode is followed by a space and more, and if
* the following shall not be interpreted as encoded coordinates, then call this
* method after instantiation.
*/
public void neglectSpaceDelimitedCoordinates() {
mNeglectSpaceDelimitedCoordinates = true;
}
/**
* Creates and returns a molecule from the idcode with its atom and bond arrays being
* just as large as needed to hold the molecule. Use this to conserve memory if no
* atoms or bonds are added to the molecule afterwards. This version of the method
* allows to pass idcode and atom coordinates in one String object.
* @param idcode null or idcode, which may contain coordinates separated by a space character
* @return
*/
public StereoMolecule getCompactMolecule(String idcode) {
return (idcode == null || idcode.isEmpty()) ? null : getCompactMolecule(idcode.getBytes(StandardCharsets.UTF_8));
}
/**
* Creates and returns a molecule from the idcode with its atom and bond arrays being
* just as large as needed to hold the molecule. Use this to conserve memory if no
* atoms or bonds are added to the molecule afterwards.
* @param idcode may be null
* @return
*/
public StereoMolecule getCompactMolecule(byte[] idcode) {
if (idcode == null || idcode.length == 0)
return null;
for (int i=2; i 8) // abits is the version number
abits = bbits;
int allAtoms = decodeBits(abits);
int allBonds = decodeBits(bbits);
StereoMolecule mol = new StereoMolecule(allAtoms, allBonds);
parse(mol, idcode, coordinates, idcodeStart, coordsStart);
return mol;
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* This version of the method allows to pass idcode and atom coordinates in one String object.
* @param mol molecule object to be filled with the idcode content
* @param idcode null or idcode, which may contain coordinates separated by a space character
*/
public void parse(StereoMolecule mol, String idcode) {
if (idcode == null || idcode.isEmpty()) {
parse(mol, (byte[])null, (byte[])null);
return;
}
int index = idcode.indexOf(' ');
if (index > 0 && index < idcode.length()-1)
parse(mol, idcode.substring(0, index).getBytes(StandardCharsets.UTF_8), idcode.substring(index+1).getBytes(StandardCharsets.UTF_8));
else
parse(mol, idcode.getBytes(StandardCharsets.UTF_8), null);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode null or valid idcode optionally concatenates with SPACE and encoded coordinates
*/
public void parse(StereoMolecule mol, byte[] idcode) {
parse(mol, idcode, null);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode may be null
* @param coordinates may be null
*/
public void parse(StereoMolecule mol, String idcode, String coordinates) {
byte[] idcodeBytes = (idcode == null) ? null : idcode.getBytes(StandardCharsets.UTF_8);
byte[] coordinateBytes = (coordinates == null) ? null : coordinates.getBytes(StandardCharsets.UTF_8);
parse(mol, idcodeBytes, coordinateBytes);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode may be null
* @param coordinates may be null
*/
public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates) {
if (idcode == null || idcode.length == 0) {
mol.clear();
return;
}
parse(mol, idcode, coordinates, 0, 0);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode may be null
* @param idcodeStart offset in idcode array to first idcode byte
*/
public void parse(StereoMolecule mol, byte[] idcode, int idcodeStart) {
parse(mol, idcode, null, idcodeStart, -1);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode may be null
* @param coordinates may be null
* @param idcodeStart offset in idcode array to first idcode byte
* @param coordsStart offset in coordinates array to first coords byte
*/
public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates, int idcodeStart, int coordsStart) {
mol.clear();
if (idcode==null || idcodeStart < 0 || idcodeStart >= idcode.length)
return;
mMol = mol;
int version = Canonizer.cIDCodeVersion2;
if (coordinates != null && (coordsStart < 0 || coordsStart >= coordinates.length))
coordinates = null;
decodeBitsStart(idcode, idcodeStart);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) { // abits is the version number
version = abits;
abits = bbits;
}
if (abits == 0) {
mMol.setFragment(decodeBits(1) == 1);
return;
}
int allAtoms = decodeBits(abits);
int allBonds = decodeBits(bbits);
int nitrogens = decodeBits(abits);
int oxygens = decodeBits(abits);
int otherAtoms = decodeBits(abits);
int chargedAtoms = decodeBits(abits);
for (int atom=0; atom= '\'');
double targetAVBL = 0.0;
double xOffset = 0.0;
double yOffset = 0.0;
double zOffset = 0.0;
boolean coordsAre3D = false;
boolean coordsAreAbsolute = false;
if (decodeOldCoordinates) { // old coordinate encoding
if ((coordinates.length > 2*allAtoms-2 && coordinates[2*allAtoms-2] == '\'')
|| (coordinates.length > 3*allAtoms-3 && coordinates[3*allAtoms-3] == '\'')) { // old faulty encoding
coordsAreAbsolute = true;
coordsAre3D = (coordinates.length == 3*allAtoms-3+9);
int index = coordsAre3D ? 3*allAtoms-3 : 2*allAtoms-2;
int avblInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
targetAVBL = Math.pow(10.0, avblInt/2000.0-1.0);
index += 2;
int xInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
xOffset = Math.pow(10.0, xInt/1500.0-1.0);
index += 2;
int yInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
yOffset = Math.pow(10.0, yInt/1500.0-1.0);
if (coordsAre3D) {
index += 2;
int zInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
zOffset = Math.pow(10.0, zInt/1500.0-1.0);
}
}
else {
coordsAre3D = (coordinates.length == 3*allAtoms-3);
}
}
// don't use 3D coordinates, if we need 2D
if (ensure2DCoordinates() && coordsAre3D) {
coordinates = null;
decodeOldCoordinates = false;
}
for (int i=1; i mIDCodeBufferIndex+1
&& (idcode[mIDCodeBufferIndex+1] == ' ' || idcode[mIDCodeBufferIndex+1] == '\t')) {
coordinates = idcode;
coordsStart = mIDCodeBufferIndex+2;
}
if (coordinates != null) {
try {
if (coordinates[coordsStart] == '!' || coordinates[coordsStart] == '#') { // new coordinate format
decodeBitsStart(coordinates, coordsStart + 1);
coordsAre3D = (decodeBits(1) == 1);
coordsAreAbsolute = (decodeBits(1) == 1);
int resolutionBits = 2 * decodeBits(4);
int binCount = (1 << resolutionBits);
double factor;
int from = 0;
int bond = 0;
for (int atom = 1; atom < allAtoms; atom++) {
if (bond < allBonds && mMol.getBondAtom(1, bond) == atom) {
from = mMol.getBondAtom(0, bond++);
factor = 1.0;
}
else {
from = 0;
factor = 8.0;
}
mMol.setAtomX(atom, mMol.getAtomX(from) + factor * (decodeBits(resolutionBits) - binCount / 2.0));
mMol.setAtomY(atom, mMol.getAtomY(from) + factor * (decodeBits(resolutionBits) - binCount / 2.0));
if (coordsAre3D)
mMol.setAtomZ(atom, mMol.getAtomZ(from) + factor * (decodeBits(resolutionBits) - binCount / 2.0));
}
if (coordinates[coordsStart] == '#') { // we have 3D-coordinates that include implicit hydrogen coordinates
int hydrogenCount = 0;
// we need to cache hCount, because otherwise getImplicitHydrogens() would create helper arrays with every call
int[] hCount = new int[allAtoms];
for (int atom = 0; atom < allAtoms; atom++)
hydrogenCount += (hCount[atom] = mMol.getImplicitHydrogens(atom));
for (int atom = 0; atom < allAtoms; atom++) {
for (int i = 0; i < hCount[atom]; i++) {
int hydrogen = mMol.addAtom(1);
mMol.addBond(atom, hydrogen, Molecule.cBondTypeSingle);
mMol.setAtomX(hydrogen, mMol.getAtomX(atom) + (decodeBits(resolutionBits) - binCount / 2.0));
mMol.setAtomY(hydrogen, mMol.getAtomY(atom) + (decodeBits(resolutionBits) - binCount / 2.0));
if (coordsAre3D)
mMol.setAtomZ(hydrogen, mMol.getAtomZ(atom) + (decodeBits(resolutionBits) - binCount / 2.0));
if (mMol.isSelectedAtom(atom))
mMol.setAtomSelection(hydrogen, true);
}
}
allAtoms += hydrogenCount;
allBonds += hydrogenCount;
}
double avblDefault = coordsAre3D ? 1.5 : Molecule.getDefaultAverageBondLength();
double avbl = mMol.getAverageBondLength(allAtoms, allBonds, avblDefault);
if (coordsAreAbsolute) {
targetAVBL = decodeAVBL(decodeBits(resolutionBits), binCount);
xOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
yOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
if (coordsAre3D)
zOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
factor = targetAVBL / avbl;
for (int atom = 0; atom < allAtoms; atom++) {
mMol.setAtomX(atom, xOffset + factor * mMol.getAtomX(atom));
mMol.setAtomY(atom, yOffset + factor * mMol.getAtomY(atom));
if (coordsAre3D)
mMol.setAtomZ(atom, zOffset + factor * mMol.getAtomZ(atom));
}
}
else { // with new format 2D and 3D coordinates are scaled to average bond lengths of 1.5 Angstrom
targetAVBL = 1.5;
factor = targetAVBL / avbl;
for (int atom = 0; atom < allAtoms; atom++) {
mMol.setAtomX(atom, factor * mMol.getAtomX(atom));
mMol.setAtomY(atom, factor * mMol.getAtomY(atom));
if (coordsAre3D)
mMol.setAtomZ(atom, factor * mMol.getAtomZ(atom));
}
}
}
else { // old coordinate format
if (coordsAre3D && !coordsAreAbsolute && targetAVBL == 0.0) // if no scaling factor is given, then scale to mean bond length = 1.5
targetAVBL = 1.5;
if (targetAVBL != 0.0 && mMol.getAllBonds() != 0) {
double avbl = 0.0;
for (int bond = 0; bond < mMol.getAllBonds(); bond++) {
double dx = mMol.getAtomX(mMol.getBondAtom(0, bond)) - mMol.getAtomX(mMol.getBondAtom(1, bond));
double dy = mMol.getAtomY(mMol.getBondAtom(0, bond)) - mMol.getAtomY(mMol.getBondAtom(1, bond));
double dz = coordsAre3D ? mMol.getAtomZ(mMol.getBondAtom(0, bond)) - mMol.getAtomZ(mMol.getBondAtom(1, bond)) : 0.0f;
avbl += Math.sqrt(dx * dx + dy * dy + dz * dz);
}
avbl /= mMol.getAllBonds();
double f = targetAVBL / avbl;
for (int atom = 0; atom < mMol.getAllAtoms(); atom++) {
mMol.setAtomX(atom, mMol.getAtomX(atom) * f + xOffset);
mMol.setAtomY(atom, mMol.getAtomY(atom) * f + yOffset);
if (coordsAre3D)
mMol.setAtomZ(atom, mMol.getAtomZ(atom) * f + zOffset);
}
}
}
}
catch (Exception e) {
e.printStackTrace();
System.err.println("Faulty id-coordinates:"+e+" "+new String(idcode, StandardCharsets.UTF_8)+" "+new String(coordinates, StandardCharsets.UTF_8));
coordinates = null;
coordsAre3D = false;
}
}
boolean coords2DAvailable = (coordinates != null && !coordsAre3D);
// If we have or create 2D-coordinates, then we need to set all double bonds to a cross bond, which
// - have distinguishable substituents on both ends, i.e. is a stereo double bond
// - are not in a small ring
// Here we don't know, whether a double bond without E/Z parity is a stereo bond with unknown
// configuration or not a stereo bond. Therefore, we need to set a flag, that causes the Canonizer
// during the next stereo recognition with atom coordinates to assign an unknown configuration rather
// than E or Z based on created or given coordinates.
// In a next step these double bonds are converted into cross bonds by
if (coords2DAvailable || ensure2DCoordinates()) {
mMol.ensureHelperArrays(Molecule.cHelperRings);
for (int bond=0; bond= coordsStart+3*allAtoms-3
&& coordinates[coordsStart+2*allAtoms-2] != '\'');
}
}
public boolean coordinatesAreAbsolute(String coordinates) {
return coordinates != null && coordinatesAreAbsolute(coordinates.getBytes(StandardCharsets.UTF_8));
}
public boolean coordinatesAreAbsolute(byte[] coordinates) {
return coordinatesAreAbsolute(coordinates, 0);
}
public boolean coordinatesAreAbsolute(byte[] coordinates, int coordStart) {
if (coordinates == null || coordinates.length <= coordStart)
return false;
if (coordinates[coordStart] >= '\'') { // old format uses ACSII 39 and higher
for (int i=coordStart; i 8) // abits is the version number
version = abits;
return version;
}
public int getAtomCount(String idcode) {
if (idcode == null || idcode.isEmpty())
return 0;
return getAtomCount(idcode.getBytes(StandardCharsets.UTF_8), 0);
}
public int getAtomCount(byte[] idcode, int offset) {
if (idcode == null || idcode.length <= offset)
return 0;
decodeBitsStart(idcode, offset);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) // abits is the version number
abits = bbits;
if (abits == 0)
return 0;
return decodeBits(abits);
}
/**
* Determines atom and bond counts of the given idcode
* @param idcode
* @param count null or int[2], which is filled and returned
* @return int[] with atom and bond count as first and second values
*/
public int[] getAtomAndBondCounts(String idcode, int[] count) {
if (idcode == null || idcode.isEmpty())
return null;
return getAtomAndBondCounts(idcode.getBytes(StandardCharsets.UTF_8), 0, count);
}
/**
* Determines atom and bond counts of the given idcode
* @param idcode
* @param offset
* @param count null or int[2], which is filled and returned
* @return int[] with atom and bond count as first and second values
*/
public int[] getAtomAndBondCounts(byte[] idcode, int offset, int[] count) {
if (idcode == null || idcode.length == 0)
return null;
decodeBitsStart(idcode, 0);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) // abits is the version number
abits = bbits;
if (count == null)
count = new int[2];
if (abits == 0) {
count[0] = 0;
count[1] = 0;
}
else {
count[0] = decodeBits(abits);
count[1] = decodeBits(bbits);
}
return count;
}
private void decodeBitsStart(byte[] bytes, int offset) {
mIDCodeBitsAvail = 6;
mIDCodeBufferIndex = offset;
mDecodingBytes = bytes;
mIDCodeTempData = (bytes[mIDCodeBufferIndex] & 0x3F) << 11;
}
private int decodeBits(int bits) {
int allBits = bits;
int data = 0;
while (bits != 0) {
if (mIDCodeBitsAvail == 0) {
mIDCodeTempData = (mDecodingBytes[++mIDCodeBufferIndex] & 0x3F) << 11;
mIDCodeBitsAvail = 6;
}
data |= ((0x00010000 & mIDCodeTempData) >> (16 - allBits + bits));
mIDCodeTempData <<= 1;
bits--;
mIDCodeBitsAvail--;
}
return data;
}
private double decodeAVBL(int value, int binCount) {
return Math.pow(10, Math.log10(200/0.1) * value / (binCount - 1) - 1);
}
private double decodeShift(int value, int binCount) {
int halfBinCount = binCount / 2;
boolean isNegative = (value >= halfBinCount);
if (isNegative)
value -= halfBinCount;
double steepness = binCount/32.0;
double doubleValue = steepness * value / (halfBinCount - value);
return isNegative ? -doubleValue : doubleValue;
}
public void printContent(byte[] idcode, byte[] coordinates) {
try {
int version = Canonizer.cIDCodeVersion2;
if (idcode == null || idcode.length == 0)
return;
if (coordinates != null && coordinates.length == 0)
coordinates = null;
System.out.println("idcode: " + new String(idcode, StandardCharsets.UTF_8));
if (coordinates != null)
System.out.println("coords: " + new String(coordinates, StandardCharsets.UTF_8));
decodeBitsStart(idcode, 0);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) { // abits is the version number
version = abits;
abits = bbits;
}
System.out.println("version:" + version);
int allAtoms = decodeBits(abits);
if (allAtoms == 0)
return;
int allBonds = decodeBits(bbits);
int nitrogens = decodeBits(abits);
int oxygens = decodeBits(abits);
int otherAtoms = decodeBits(abits);
int chargedAtoms = decodeBits(abits);
System.out.println("allAtoms:" + allAtoms + " allBonds:" + allBonds);
if (nitrogens != 0) {
System.out.print("nitrogens:");
for (int i = 0; i < nitrogens; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
}
if (oxygens != 0) {
System.out.print("oxygens:");
for (int i = 0; i < oxygens; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
}
if (otherAtoms != 0) {
System.out.print("otherAtoms:");
for (int i = 0; i < otherAtoms; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(8));
System.out.println();
}
if (chargedAtoms != 0) {
System.out.print("chargedAtoms:");
for (int i = 0; i < chargedAtoms; i++)
System.out.print(" " + decodeBits(abits) + ":" + (decodeBits(4) - 8));
System.out.println();
}
int closureBonds = 1 + allBonds - allAtoms;
int dbits = decodeBits(4);
int base = 0;
int[][] bondAtom = new int[2][allBonds];
int bondCount = 0;
for (int i = 1; i < allAtoms; i++) {
int dif = decodeBits(dbits);
if (dif == 0) {
closureBonds++;
continue;
}
base += dif - 1;
bondAtom[0][bondCount] = base;
bondAtom[1][bondCount++] = i;
}
for (int i = 0; i < closureBonds; i++) {
bondAtom[0][bondCount] = decodeBits(abits);
bondAtom[1][bondCount++] = decodeBits(abits);
}
int[] bondOrder = new int[allBonds];
System.out.print("bonds:");
for (int bond = 0; bond < allBonds; bond++) {
System.out.print(" " + bondAtom[0][bond]);
bondOrder[bond] = decodeBits(2);
System.out.print(bondOrder[bond] == 0 ? "." : bondOrder[bond] == 1 ? "-" : bondOrder[bond] == 2 ? "=" : "#");
System.out.print("" + bondAtom[1][bond]);
}
System.out.println();
int THCount = decodeBits(abits);
if (THCount != 0) {
System.out.print("parities:");
for (int i = 0; i < THCount; i++) {
int atom = decodeBits(abits);
if (version == Canonizer.cIDCodeVersion2) {
int parity = decodeBits(2);
if (parity == 3) {
// this was the old discontinued Molecule.cAtomParityMix
// version2 idcodes had never more than one center with parityMix
System.out.print(" " + atom + ":1&0");
} else {
System.out.print(" " + atom + ":" + parity);
}
} else {
int parity = decodeBits(3);
switch (parity) {
case Canonizer.cParity1And:
System.out.print(" " + atom + ":1&" + decodeBits(3));
break;
case Canonizer.cParity2And:
System.out.print(" " + atom + ":2&" + decodeBits(3));
break;
case Canonizer.cParity1Or:
System.out.print(" " + atom + ":1|" + decodeBits(3));
break;
case Canonizer.cParity2Or:
System.out.print(" " + atom + ":2|" + decodeBits(3));
break;
default:
System.out.print(" " + atom + ":" + parity);
}
}
}
System.out.println();
}
if (version == Canonizer.cIDCodeVersion2)
if ((decodeBits(1) == 0)) // translate chiral flag
System.out.println("isRacemate");
int EZCount = decodeBits(bbits);
if (EZCount != 0) {
System.out.print("EZ:");
for (int i = 0; i < EZCount; i++) {
int bond = decodeBits(bbits);
if (bondOrder[bond] == 1) { // BINAP type of axial chirality
int parity = decodeBits(3);
switch (parity) {
case Canonizer.cParity1And:
System.out.print(" " + bond + ":1&" + decodeBits(3));
break;
case Canonizer.cParity2And:
System.out.print(" " + bond + ":2&" + decodeBits(3));
break;
case Canonizer.cParity1Or:
System.out.print(" " + bond + ":1|" + decodeBits(3));
break;
case Canonizer.cParity2Or:
System.out.print(" " + bond + ":2|" + decodeBits(3));
break;
default:
System.out.print(" " + bond + ":" + parity);
}
} else
System.out.print(" " + bond + ":" + decodeBits(2));
}
System.out.println();
}
if (decodeBits(1) == 1)
System.out.println("isFragment = true");
int offset = 0;
while (decodeBits(1) == 1) {
int dataType = offset + decodeBits(4);
int no;
switch (dataType) {
case 0: // datatype 'AtomQFNoMoreNeighbours'
no = decodeBits(abits);
System.out.print("noMoreNeighbours:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 1: // datatype 'isotop'
no = decodeBits(abits);
System.out.print("mass:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(8));
System.out.println();
break;
case 2: // datatype 'bond defined to be delocalized'
no = decodeBits(bbits);
System.out.print("delocalizedBonds (outdated, redundant and wrong):");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits));
System.out.println();
break;
case 3: // datatype 'AtomQFMoreNeighbours'
no = decodeBits(abits);
System.out.print("moreNeighbours:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 4: // datatype 'AtomQFRingState'
no = decodeBits(abits);
System.out.print("atomRingState:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFRingStateBits));
System.out.println();
break;
case 5: // datatype 'AtomQFAromState'
no = decodeBits(abits);
System.out.print("atomAromState:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFAromStateBits));
System.out.println();
break;
case 6: // datatype 'AtomQFAny'
no = decodeBits(abits);
System.out.print("atomAny:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 7: // datatype 'AtomQFHydrogen'
no = decodeBits(abits);
System.out.print("atomHydrogen:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFHydrogenBits));
System.out.println();
break;
case 8: // datatype 'AtomList'
no = decodeBits(abits);
System.out.print("atomList:");
for (int i = 0; i < no; i++) {
int atom = decodeBits(abits);
int atoms = decodeBits(4);
System.out.print(" " + atom);
for (int j = 0; j < atoms; j++) {
System.out.print(j == 0 ? ":" : ",");
System.out.print("" + decodeBits(8));
}
}
System.out.println();
break;
case 9: // datatype 'BondQFRingState'
no = decodeBits(bbits);
System.out.print("bondRingState:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFRingStateBits));
System.out.println();
break;
case 10:// datatype 'BondQFBondTypes'
no = decodeBits(bbits);
System.out.print("bondTypes:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFBondTypesBits));
System.out.println();
break;
case 11: // datatype 'AtomQFMatchStereo'
no = decodeBits(abits);
System.out.print("atomMatchStereo:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 12: // datatype 'bond defined to be a bridge from n1 to n2 atoms'
no = decodeBits(bbits);
for (int i = 0; i < no; i++) {
System.out.print("bridgeBond:" + decodeBits(bbits));
int min = decodeBits(Molecule.cBondQFBridgeMinBits);
int max = min + decodeBits(Molecule.cBondQFBridgeSpanBits);
System.out.println("(" + min + "-" + max + ")");
}
break;
case 13: // datatype 'AtomQFPiElectrons'
no = decodeBits(abits);
System.out.print("atomPiElectrons:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFPiElectronBits));
System.out.println();
break;
case 14: // datatype 'AtomQFNeighbours'
no = decodeBits(abits);
System.out.print("AtomQFNeighbours:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFNeighbourBits));
System.out.println();
break;
case 15: // datatype 'start second feature set'
case 31:
offset += 16;
System.out.println("");
break;
case 16: // datatype 'AtomQFSmallRingSize'
no = decodeBits(abits);
System.out.print("AtomQFSmallRingSize:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFSmallRingSizeBits));
System.out.println();
break;
case 17: // datatype 'AtomAbnormalValence'
no = decodeBits(abits);
System.out.print("AtomAbnormalValence:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(4));
System.out.println();
break;
case 18: // datatype 'AtomCustomLabel'
no = decodeBits(abits);
System.out.print("AtomCustomLabel:");
int lbits = decodeBits(4);
for (int i = 0; i < no; i++) {
int atom = decodeBits(abits);
int count = decodeBits(lbits);
byte[] label = new byte[count];
for (int j = 0; j < count; j++)
label[j] = (byte) decodeBits(7);
System.out.print(" " + atom + ":" + new String(label, StandardCharsets.UTF_8));
}
System.out.println();
break;
case 19: // datatype 'AtomQFCharge'
no = decodeBits(abits);
System.out.print("AtomQFCharge:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFChargeBits));
System.out.println();
break;
case 20: // datatype 'BondQFRingSize'
no = decodeBits(bbits);
System.out.print("BondQFRingSize:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFRingSizeBits));
System.out.println();
break;
case 21: // datatype 'AtomRadicalState'
no = decodeBits(abits);
System.out.print("AtomRadicalState:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(2));
System.out.println();
break;
case 22: // datatype 'flat nitrogen'
no = decodeBits(abits);
System.out.print("AtomQFFlatNitrogen:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 23: // datatype 'cBondQFMatchStereo'
no = decodeBits(bbits);
System.out.print("cBondQFMatchStereo:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 24: // datatype 'cBondQFAromatic'
no = decodeBits(bbits);
System.out.print("BondQFAromState:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFAromStateBits));
System.out.println();
break;
case 25: // datatype 'atom selection'
System.out.print("AtomSelection:");
for (int i = 0; i < allAtoms; i++)
if (decodeBits(1) == 1)
System.out.print(" " + i);
System.out.println();
break;
case 26: // datatype 'delocalized high order bond'
System.out.print("DelocalizedHigherOrderBonds:");
no = decodeBits(bbits);
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits));
break;
case 27: // datatype 'part of an exclude group'
no = decodeBits(abits);
System.out.print("AtomQFExcludeGroup:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 28: // datatype 'coordinate bond'
no = decodeBits(bbits);
System.out.print("Coordinate Bonds:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits));
System.out.println();
break;
case 29:
no = decodeBits(abits);
System.out.print("ReactionParityHint:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFRxnParityBits));
System.out.println();
break;
case 30: // datatype 'AtomQFNewRingSize'
no = decodeBits(abits);
System.out.print("AtomQFNewRingSize:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFNewRingSizeBits));
System.out.println();
break;
case 32: // datatype 'AtomQFStereoStateBits'
no = decodeBits(abits);
System.out.print("AtomQFStereoState:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFStereoStateBits));
System.out.println();
break;
case 33: // datatype 'AtomQFENeighbours'
no = decodeBits(abits);
System.out.print("AtomQFENeighbours:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits) + ":" + decodeBits(Molecule.cAtomQFENeighbourBits));
System.out.println();
break;
case 34: // datatype 'in hetero aromatic ring'
no = decodeBits(abits);
System.out.print("AtomQFHeteroAromatic:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 35: // datatype 'cBondQFMatchFormalOrder'
no = decodeBits(bbits);
System.out.print("BondQFMatchFormalOrder:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(abits));
System.out.println();
break;
case 36: // datatype 'cBondQFRareBondType'
no = decodeBits(bbits);
System.out.print("BondQFRareBondType:");
for (int i = 0; i < no; i++)
System.out.print(" " + decodeBits(bbits) + ":" + decodeBits(Molecule.cBondQFRareBondTypesBits));
System.out.println();
break;
case 37: // datatype 'rare bond type'
no = decodeBits(bbits);
System.out.print("Rare Bond Type:");
for (int i=0; i 0 && distance < lowDistance)
lowDistance = distance;
}
}
avbl = (lowDistance == Double.MAX_VALUE) ? defaultAVBL : lowDistance;
}
}
if (coordinates[0] == '#') { // we have 3D-coordinates that include implicit hydrogen coordinates
System.out.print("hydrogen coords (" + hydrogenCount + " expected): ");
int hydrogen = allAtoms;
for (int atom = 0; atom < allAtoms; atom++) {
if (hCount[atom] != 0)
System.out.print(atom);
for (int i = 0; i < hCount[atom]; i++) {
System.out.print(" (");
coords[0][hydrogen] = coords[0][atom] + (decodeBits(resolutionBits) - binCount / 2.0);
System.out.print((int) coords[0][hydrogen] + ",");
coords[1][hydrogen] = coords[1][atom] + (decodeBits(resolutionBits) - binCount / 2.0);
System.out.print((int) coords[1][hydrogen]);
if (coordsAre3D) {
coords[2][hydrogen] = coords[2][atom] + (decodeBits(resolutionBits) - binCount / 2.0);
System.out.print("," + (int) coords[2][hydrogen]);
}
System.out.print("), ");
hydrogen++;
}
}
System.out.println();
}
System.out.print(coordsAreAbsolute ? "absolute coords:" : "relative coords:");
if (hydrogenCount != 0)
System.out.println("Coordinates contain " + hydrogenCount + " hydrogen atoms!");
if (coordsAreAbsolute) {
double targetAVBL = decodeAVBL(decodeBits(resolutionBits), binCount);
double xOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
double yOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
double zOffset = 0;
if (coordsAre3D)
zOffset = targetAVBL * decodeShift(decodeBits(resolutionBits), binCount);
System.out.println("Abs-coord transformation: targetAVBL:" + targetAVBL + " xOffset:" + xOffset + " yOffset:" + yOffset + " zOffset:" + zOffset);
factor = targetAVBL / avbl;
for (int atom = 0; atom < allAtoms; atom++) {
coords[0][atom] = xOffset + factor * coords[0][atom];
coords[1][atom] = xOffset + factor * coords[1][atom];
if (coordsAre3D)
coords[2][atom] = xOffset + factor * coords[2][atom];
}
} else {
double targetAVBL = 1.5;
factor = targetAVBL / avbl;
for (int atom = 0; atom < allAtoms; atom++) {
System.out.print(atom + " (");
coords[0][atom] = coords[0][atom] * factor;
System.out.print(DoubleFormat.toString(coords[0][atom]) + ",");
coords[1][atom] = coords[1][atom] * factor;
System.out.print(DoubleFormat.toString(coords[1][atom]));
if (coordsAre3D) {
coords[2][atom] = coords[2][atom] * factor;
System.out.print("," + DoubleFormat.toString(coords[2][atom]));
}
System.out.print("), ");
if ((atom & 3) == 3 || atom == allAtoms - 1)
System.out.println();
}
}
}
}
System.out.println();
}
catch (Exception e) {
e.printStackTrace();
}
}
private double getDistance(double[][] coords, int atom1, int atom2, boolean coordsAre3D) {
double dx = coords[0][atom1] - coords[0][atom2];
double dy = coords[1][atom1] - coords[1][atom2];
double dz = coordsAre3D ? coords[2][atom1] - coords[2][atom2] : 0;
return Math.sqrt(dx*dx + dy*dy + dz*dz);
}
}