com.actelion.research.chem.IDCodeParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
package com.actelion.research.chem;
public class IDCodeParser {
private StereoMolecule mMol;
private byte[] mDecodingBytes;
private int mIDCodeBitsAvail,mIDCodeTempData,mIDCodeBufferIndex;
private boolean mEnsure2DCoordinates;
/**
* This default constructor creates molecules guaranteed to have 2D-atom-coordinates.
* If 2D-coordinates are not supplied with the idcode, or if supplied coordinates are 3D,
* then new 2D-coordinates are created on the fly.
*/
public IDCodeParser() {
this(true);
}
/**
*
* @param ensure2DCoordinates If TRUE and no coordinates are passed with the idcode, then
* the parser generates atom coordinates of any molecule and assigns up/down bonds reflecting
* given atom parities. Generating coordinates is potentially error prone, such that providing
* original coordinates, where available, should be the preferred option.
*
WARNING: If FALSE: In this case stereo parities are taken directly from the idcode,
* missing explicitly 'unknown' parities, because they are not part of the idcode.
* Without atom coordinates up/down bonds cannot be assigned. If further processing relies
* on up/down bond stereo information or needs to distinguish parities 'none' from 'unknown',
* (e.g. idcode creation, checking for stereo centers, calculating the skeletonSpheres descriptor),
* or if you are not exactly sure, what to do, then use the constructor IDCodeParser(true).
* If you supply encoded 3D-coordinates, then use IDCodeParser(false).
*/
public IDCodeParser(boolean ensure2DCoordinates) {
mEnsure2DCoordinates = ensure2DCoordinates;
}
/**
* Creates and returns a molecule from the idcode with its atom and bond arrays being
* just as large as needed to hold the molecule. Use this to conserve memory if no
* atoms or bonds are added to the molecule afterwards. This version of the method
* allows to pass idcode and atom coordinates in one String object.
* @param idcode null or idcode, which may contain coordinates separated by a space character
* @return
*/
public StereoMolecule getCompactMolecule(String idcode) {
if (idcode == null || idcode.length() == 0)
return null;
int index = idcode.indexOf(' ');
if (index > 0 && index < idcode.length()-1)
return getCompactMolecule(idcode.substring(0, index).getBytes(), idcode.substring(index+1).getBytes());
else
return getCompactMolecule(idcode.getBytes(), null);
}
/**
* Creates and returns a molecule from the idcode with its atom and bond arrays being
* just as large as needed to hold the molecule. Use this to conserve memory if no
* atoms or bonds are added to the molecule afterwards.
* @param idcode may be null
* @return
*/
public StereoMolecule getCompactMolecule(byte[] idcode) {
return getCompactMolecule(idcode, null);
}
/**
* Creates and returns a molecule from the idcode with its atom and bond arrays being
* just as large as needed to hold the molecule. Use this to conserve memory if no
* atoms or bonds are added to the molecule afterwards.
* @param idcode may be null
* @param coordinates may be null
* @return
*/
public StereoMolecule getCompactMolecule(String idcode, String coordinates) {
return (idcode == null) ? null : getCompactMolecule(idcode.getBytes(),
(coordinates == null) ? null : coordinates.getBytes());
}
/**
* Creates and returns a molecule from the idcode with its atom and bond arrays being
* just as large as needed to hold the molecule. Use this to conserve memory if no
* atoms or bonds are added to the molecule afterwards.
* @param idcode may be null
* @param coordinates may be null
* @return
*/
public StereoMolecule getCompactMolecule(byte[] idcode, byte[] coordinates) {
if (idcode == null)
return null;
decodeBitsStart(idcode, 0);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) // abits is the version number
abits = bbits;
int allAtoms = decodeBits(abits);
int allBonds = decodeBits(bbits);
StereoMolecule mol = new StereoMolecule(allAtoms, allBonds);
parse(mol, idcode, coordinates);
return mol;
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* This version of the method allows to pass idcode and atom coordinates in one String object.
* @param mol molecule object to be filled with the idcode content
* @param idcode null or idcode, which may contain coordinates separated by a space character
* @return
*/
public void parse(StereoMolecule mol, String idcode) {
if (idcode == null || idcode.length() == 0) {
parse(mol, (byte[])null, (byte[])null);
return;
}
int index = idcode.indexOf(' ');
if (index > 0 && index < idcode.length()-1)
parse(mol, idcode.substring(0, index).getBytes(), idcode.substring(index+1).getBytes());
else
parse(mol, idcode.getBytes(), null);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode may be null
* @return
*/
public void parse(StereoMolecule mol, byte[] idcode) {
parse(mol, idcode, null);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode may be null
* @param coordinates may be null
* @return
*/
public void parse(StereoMolecule mol, String idcode, String coordinates) {
byte[] idcodeBytes = (idcode == null) ? null : idcode.getBytes();
byte[] coordinateBytes = (coordinates == null) ? null : coordinates.getBytes();
parse(mol, idcodeBytes, coordinateBytes);
}
/**
* Parses the idcode and populates the given molecule to represent the passed idcode.
* @param mol molecule object to be filled with the idcode content
* @param idcode may be null
* @param coordinates may be null
* @return
*/
public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates) {
int version = Canonizer.cIDCodeVersion2;
mMol = mol;
mMol.deleteMolecule();
if (idcode==null || idcode.length==0)
return;
if (coordinates != null && coordinates.length == 0)
coordinates = null;
decodeBitsStart(idcode, 0);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) { // abits is the version number
version = abits;
abits = bbits;
}
if (abits == 0) {
mMol.setFragment((decodeBits(1) == 1) ? true : false);
return;
}
int allAtoms = decodeBits(abits);
int allBonds = decodeBits(bbits);
int nitrogens = decodeBits(abits);
int oxygens = decodeBits(abits);
int otherAtoms = decodeBits(abits);
int chargedAtoms = decodeBits(abits);
for (int atom=0; atom= '\'');
float targetAVBL = 0.0f;
float xOffset = 0.0f;
float yOffset = 0.0f;
float zOffset = 0.0f;
boolean coordsAre3D = false;
boolean coordsAreAbsolute = false;
if (decodeOldCoordinates) { // old coordinate encoding
if ((coordinates.length > 2*allAtoms-2 && coordinates[2*allAtoms-2] == '\'')
|| (coordinates.length > 3*allAtoms-3 && coordinates[3*allAtoms-3] == '\'')) { // old faulty encoding
coordsAreAbsolute = true;
coordsAre3D = (coordinates.length == 3*allAtoms-3+9);
int index = coordsAre3D ? 3*allAtoms-3 : 2*allAtoms-2;
int avblInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
targetAVBL = (float)Math.pow(10.0, avblInt/2000.0-1.0);
index += 2;
int xInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
xOffset = (float)Math.pow(10.0, xInt/1500.0-1.0);
index += 2;
int yInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
yOffset = (float)Math.pow(10.0, yInt/1500.0-1.0);
if (coordsAre3D) {
index += 2;
int zInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40;
zOffset = (float)Math.pow(10.0, zInt/1500.0-1.0);
}
}
else {
coordsAre3D = (coordinates.length == 3*allAtoms-3);
}
}
// don't use 3D coordinates, if we need 2D
if (mEnsure2DCoordinates && coordsAre3D) {
coordinates = null;
decodeOldCoordinates = false;
}
for (int i=1; i= 3*allAtoms-3
&& coordinates[2*allAtoms-2] != '\'');
}
}
public boolean coordinatesAreAbsolute(String coordinates) {
return (coordinates == null) ? false : coordinatesAreAbsolute(coordinates.getBytes());
}
public boolean coordinatesAreAbsolute(byte[] coordinates) {
if (coordinates == null || coordinates.length == 0)
return false;
if (coordinates[0] >= '\'') { // old format uses ACSII 39 and higher
for (int i=0; i 8) // abits is the version number
version = abits;
return version;
}
public int getAtomCount(String idcode) {
if (idcode == null || idcode.length() == 0)
return 0;
return getAtomCount(idcode.getBytes(), 0);
}
public int getAtomCount(byte[] idcode, int offset) {
if (idcode == null || idcode.length <= offset)
return 0;
decodeBitsStart(idcode, offset);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) // abits is the version number
abits = bbits;
if (abits == 0)
return 0;
return decodeBits(abits);
}
/**
* Determines atom and bond counts of the given idcode
* @param idcode
* @param count null or int[2], which is filled and returned
* @return int[] with atom and bond count as first and second values
*/
public int[] getAtomAndBondCounts(String idcode, int[] count) {
if (idcode == null || idcode.length() == 0)
return null;
return getAtomAndBondCounts(idcode.getBytes(), 0, count);
}
/**
* Determines atom and bond counts of the given idcode
* @param idcode
* @param offset
* @param count null or int[2], which is filled and returned
* @return int[] with atom and bond count as first and second values
*/
public int[] getAtomAndBondCounts(byte[] idcode, int offset, int[] count) {
if (idcode == null || idcode.length == 0)
return null;
decodeBitsStart(idcode, 0);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) // abits is the version number
abits = bbits;
if (count == null)
count = new int[2];
if (abits == 0) {
count[0] = 0;
count[1] = 0;
}
else {
count[0] = decodeBits(abits);
count[1] = decodeBits(bbits);
}
return count;
}
private void decodeBitsStart(byte[] bytes, int offset) {
mIDCodeBitsAvail = 6;
mIDCodeBufferIndex = offset;
mDecodingBytes = bytes;
mIDCodeTempData = (bytes[mIDCodeBufferIndex] - 64) << 11;
}
private int decodeBits(int bits) {
int allBits = bits;
int data = 0;
while (bits != 0) {
if (mIDCodeBitsAvail == 0) {
mIDCodeTempData = (mDecodingBytes[++mIDCodeBufferIndex] - 64) << 11;
mIDCodeBitsAvail = 6;
}
data |= ((0x00010000 & mIDCodeTempData) >> (16 - allBits + bits));
mIDCodeTempData <<= 1;
bits--;
mIDCodeBitsAvail--;
}
return data;
}
private float decodeAVBL(int value, int binCount) {
return (float)Math.pow(10, Math.log10(200/0.1) * value / (binCount - 1) - 1f);
}
private float decodeShift(int value, int binCount) {
int halfBinCount = binCount / 2;
boolean isNegative = (value >= halfBinCount);
if (isNegative)
value -= halfBinCount;
float steepness = (float)binCount/100f;
float floatValue = steepness * value / ((float)halfBinCount - 1 - value);
return isNegative ? -floatValue : floatValue;
}
public void printContent(byte[] idcode, byte[] coordinates) {
int version = Canonizer.cIDCodeVersion2;
System.out.println("IDCode: "+new String(idcode));
if (idcode==null || idcode.length==0)
return;
decodeBitsStart(idcode, 0);
int abits = decodeBits(4);
int bbits = decodeBits(4);
if (abits > 8) { // abits is the version number
version = abits;
abits = bbits;
}
System.out.println("version:"+version);
int allAtoms = decodeBits(abits);
if (allAtoms == 0)
return;
int allBonds = decodeBits(bbits);
int nitrogens = decodeBits(abits);
int oxygens = decodeBits(abits);
int otherAtoms = decodeBits(abits);
int chargedAtoms = decodeBits(abits);
System.out.println("allAtoms:"+allAtoms+" allBonds:"+allBonds);
if (nitrogens != 0) {
System.out.print("nitrogens:");
for (int i=0; i");
break;
case 16: // datatype 'AtomQFRingSize'
no = decodeBits(abits);
System.out.print("AtomQFRingSize:");
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy