All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.IDCodeParser Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

package com.actelion.research.chem;

public class IDCodeParser {
	private StereoMolecule mMol;
	private byte[]		mDecodingBytes;
	private	int			mIDCodeBitsAvail,mIDCodeTempData,mIDCodeBufferIndex;
	private boolean		mEnsure2DCoordinates;

	/**
	 * This default constructor creates molecules guaranteed to have 2D-atom-coordinates.
	 * If 2D-coordinates are not supplied with the idcode, or if supplied coordinates are 3D,
	 * then new 2D-coordinates are created on the fly.
	 */
	public IDCodeParser() {
		this(true);
		}

	/**
	 * 
	 * @param ensure2DCoordinates If TRUE and no coordinates are passed with the idcode, then
	 * the parser generates atom coordinates of any molecule and assigns up/down bonds reflecting
	 * given atom parities. Generating coordinates is potentially error prone, such that providing
	 * original coordinates, where available, should be the preferred option.
	 * 
WARNING: If FALSE: In this case stereo parities are taken directly from the idcode, * missing explicitly 'unknown' parities, because they are not part of the idcode. * Without atom coordinates up/down bonds cannot be assigned. If further processing relies * on up/down bond stereo information or needs to distinguish parities 'none' from 'unknown', * (e.g. idcode creation, checking for stereo centers, calculating the skeletonSpheres descriptor), * or if you are not exactly sure, what to do, then use the constructor IDCodeParser(true). * If you supply encoded 3D-coordinates, then use IDCodeParser(false). */ public IDCodeParser(boolean ensure2DCoordinates) { mEnsure2DCoordinates = ensure2DCoordinates; } /** * Creates and returns a molecule from the idcode with its atom and bond arrays being * just as large as needed to hold the molecule. Use this to conserve memory if no * atoms or bonds are added to the molecule afterwards. This version of the method * allows to pass idcode and atom coordinates in one String object. * @param idcode null or idcode, which may contain coordinates separated by a space character * @return */ public StereoMolecule getCompactMolecule(String idcode) { if (idcode == null || idcode.length() == 0) return null; int index = idcode.indexOf(' '); if (index > 0 && index < idcode.length()-1) return getCompactMolecule(idcode.substring(0, index).getBytes(), idcode.substring(index+1).getBytes()); else return getCompactMolecule(idcode.getBytes(), null); } /** * Creates and returns a molecule from the idcode with its atom and bond arrays being * just as large as needed to hold the molecule. Use this to conserve memory if no * atoms or bonds are added to the molecule afterwards. * @param idcode may be null * @return */ public StereoMolecule getCompactMolecule(byte[] idcode) { return getCompactMolecule(idcode, null); } /** * Creates and returns a molecule from the idcode with its atom and bond arrays being * just as large as needed to hold the molecule. Use this to conserve memory if no * atoms or bonds are added to the molecule afterwards. * @param idcode may be null * @param coordinates may be null * @return */ public StereoMolecule getCompactMolecule(String idcode, String coordinates) { return (idcode == null) ? null : getCompactMolecule(idcode.getBytes(), (coordinates == null) ? null : coordinates.getBytes()); } /** * Creates and returns a molecule from the idcode with its atom and bond arrays being * just as large as needed to hold the molecule. Use this to conserve memory if no * atoms or bonds are added to the molecule afterwards. * @param idcode may be null * @param coordinates may be null * @return */ public StereoMolecule getCompactMolecule(byte[] idcode, byte[] coordinates) { if (idcode == null) return null; decodeBitsStart(idcode, 0); int abits = decodeBits(4); int bbits = decodeBits(4); if (abits > 8) // abits is the version number abits = bbits; int allAtoms = decodeBits(abits); int allBonds = decodeBits(bbits); StereoMolecule mol = new StereoMolecule(allAtoms, allBonds); parse(mol, idcode, coordinates); return mol; } /** * Parses the idcode and populates the given molecule to represent the passed idcode. * This version of the method allows to pass idcode and atom coordinates in one String object. * @param mol molecule object to be filled with the idcode content * @param idcode null or idcode, which may contain coordinates separated by a space character * @return */ public void parse(StereoMolecule mol, String idcode) { if (idcode == null || idcode.length() == 0) { parse(mol, (byte[])null, (byte[])null); return; } int index = idcode.indexOf(' '); if (index > 0 && index < idcode.length()-1) parse(mol, idcode.substring(0, index).getBytes(), idcode.substring(index+1).getBytes()); else parse(mol, idcode.getBytes(), null); } /** * Parses the idcode and populates the given molecule to represent the passed idcode. * @param mol molecule object to be filled with the idcode content * @param idcode may be null * @return */ public void parse(StereoMolecule mol, byte[] idcode) { parse(mol, idcode, null); } /** * Parses the idcode and populates the given molecule to represent the passed idcode. * @param mol molecule object to be filled with the idcode content * @param idcode may be null * @param coordinates may be null * @return */ public void parse(StereoMolecule mol, String idcode, String coordinates) { byte[] idcodeBytes = (idcode == null) ? null : idcode.getBytes(); byte[] coordinateBytes = (coordinates == null) ? null : coordinates.getBytes(); parse(mol, idcodeBytes, coordinateBytes); } /** * Parses the idcode and populates the given molecule to represent the passed idcode. * @param mol molecule object to be filled with the idcode content * @param idcode may be null * @param coordinates may be null * @return */ public void parse(StereoMolecule mol, byte[] idcode, byte[] coordinates) { int version = Canonizer.cIDCodeVersion2; mMol = mol; mMol.deleteMolecule(); if (idcode==null || idcode.length==0) return; if (coordinates != null && coordinates.length == 0) coordinates = null; decodeBitsStart(idcode, 0); int abits = decodeBits(4); int bbits = decodeBits(4); if (abits > 8) { // abits is the version number version = abits; abits = bbits; } if (abits == 0) { mMol.setFragment((decodeBits(1) == 1) ? true : false); return; } int allAtoms = decodeBits(abits); int allBonds = decodeBits(bbits); int nitrogens = decodeBits(abits); int oxygens = decodeBits(abits); int otherAtoms = decodeBits(abits); int chargedAtoms = decodeBits(abits); for (int atom=0; atom= '\''); double targetAVBL = 0.0; double xOffset = 0.0; double yOffset = 0.0; double zOffset = 0.0; boolean coordsAre3D = false; boolean coordsAreAbsolute = false; if (decodeOldCoordinates) { // old coordinate encoding if ((coordinates.length > 2*allAtoms-2 && coordinates[2*allAtoms-2] == '\'') || (coordinates.length > 3*allAtoms-3 && coordinates[3*allAtoms-3] == '\'')) { // old faulty encoding coordsAreAbsolute = true; coordsAre3D = (coordinates.length == 3*allAtoms-3+9); int index = coordsAre3D ? 3*allAtoms-3 : 2*allAtoms-2; int avblInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40; targetAVBL = Math.pow(10.0, avblInt/2000.0-1.0); index += 2; int xInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40; xOffset = Math.pow(10.0, xInt/1500.0-1.0); index += 2; int yInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40; yOffset = Math.pow(10.0, yInt/1500.0-1.0); if (coordsAre3D) { index += 2; int zInt = 86*((int)coordinates[index+1]-40)+(int)coordinates[index+2]-40; zOffset = Math.pow(10.0, zInt/1500.0-1.0); } } else { coordsAre3D = (coordinates.length == 3*allAtoms-3); } } // don't use 3D coordinates, if we need 2D if (mEnsure2DCoordinates && coordsAre3D) { coordinates = null; decodeOldCoordinates = false; } for (int i=1; i= 3*allAtoms-3 && coordinates[2*allAtoms-2] != '\''); } } public boolean coordinatesAreAbsolute(String coordinates) { return (coordinates == null) ? false : coordinatesAreAbsolute(coordinates.getBytes()); } public boolean coordinatesAreAbsolute(byte[] coordinates) { if (coordinates == null || coordinates.length == 0) return false; if (coordinates[0] >= '\'') { // old format uses ACSII 39 and higher for (int i=0; i 8) // abits is the version number version = abits; return version; } public int getAtomCount(String idcode) { if (idcode == null || idcode.length() == 0) return 0; return getAtomCount(idcode.getBytes(), 0); } public int getAtomCount(byte[] idcode, int offset) { if (idcode == null || idcode.length <= offset) return 0; decodeBitsStart(idcode, offset); int abits = decodeBits(4); int bbits = decodeBits(4); if (abits > 8) // abits is the version number abits = bbits; if (abits == 0) return 0; return decodeBits(abits); } /** * Determines atom and bond counts of the given idcode * @param idcode * @param count null or int[2], which is filled and returned * @return int[] with atom and bond count as first and second values */ public int[] getAtomAndBondCounts(String idcode, int[] count) { if (idcode == null || idcode.length() == 0) return null; return getAtomAndBondCounts(idcode.getBytes(), 0, count); } /** * Determines atom and bond counts of the given idcode * @param idcode * @param offset * @param count null or int[2], which is filled and returned * @return int[] with atom and bond count as first and second values */ public int[] getAtomAndBondCounts(byte[] idcode, int offset, int[] count) { if (idcode == null || idcode.length == 0) return null; decodeBitsStart(idcode, 0); int abits = decodeBits(4); int bbits = decodeBits(4); if (abits > 8) // abits is the version number abits = bbits; if (count == null) count = new int[2]; if (abits == 0) { count[0] = 0; count[1] = 0; } else { count[0] = decodeBits(abits); count[1] = decodeBits(bbits); } return count; } private void decodeBitsStart(byte[] bytes, int offset) { mIDCodeBitsAvail = 6; mIDCodeBufferIndex = offset; mDecodingBytes = bytes; mIDCodeTempData = (bytes[mIDCodeBufferIndex] - 64) << 11; } private int decodeBits(int bits) { int allBits = bits; int data = 0; while (bits != 0) { if (mIDCodeBitsAvail == 0) { mIDCodeTempData = (mDecodingBytes[++mIDCodeBufferIndex] - 64) << 11; mIDCodeBitsAvail = 6; } data |= ((0x00010000 & mIDCodeTempData) >> (16 - allBits + bits)); mIDCodeTempData <<= 1; bits--; mIDCodeBitsAvail--; } return data; } private double decodeAVBL(int value, int binCount) { return Math.pow(10, Math.log10(200/0.1) * value / (binCount - 1) - 1); } private double decodeShift(int value, int binCount) { int halfBinCount = binCount / 2; boolean isNegative = (value >= halfBinCount); if (isNegative) value -= halfBinCount; double steepness = binCount/32; double doubleValue = steepness * value / (halfBinCount - value); return isNegative ? -doubleValue : doubleValue; } public void printContent(byte[] idcode, byte[] coordinates) { int version = Canonizer.cIDCodeVersion2; System.out.println("IDCode: "+new String(idcode)); if (idcode==null || idcode.length==0) return; decodeBitsStart(idcode, 0); int abits = decodeBits(4); int bbits = decodeBits(4); if (abits > 8) { // abits is the version number version = abits; abits = bbits; } System.out.println("version:"+version); int allAtoms = decodeBits(abits); if (allAtoms == 0) return; int allBonds = decodeBits(bbits); int nitrogens = decodeBits(abits); int oxygens = decodeBits(abits); int otherAtoms = decodeBits(abits); int chargedAtoms = decodeBits(abits); System.out.println("allAtoms:"+allAtoms+" allBonds:"+allBonds); if (nitrogens != 0) { System.out.print("nitrogens:"); for (int i=0; i"); break; case 16: // datatype 'AtomQFRingSize' no = decodeBits(abits); System.out.print("AtomQFRingSize:"); for (int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy