All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.MolfileV3Creator Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/


package com.actelion.research.chem;

import java.io.IOException;
import java.io.Writer;

/**
 * This class generates an MDL molfile version 3.0 from a StereoMolecule
 * as described by MDL in 'CTFile Formats June 2005'.
 * Since the MDL enhanced stereo recognition concept doesn't include
 * support for axial chirality as bond property, we added object type 'BONDS'
 * to the internal collection types STEABS,STERAC and STEREL, in order to
 * properly encode ESR assignments of axial stereo bonds, e.g. BINAP kind of
 * stereo bonds.
 * @author sandert
 *
 */
public class MolfileV3Creator
{
    private StringBuilder mMolfile;
    private static final double TARGET_AVBL = 1.5;
    private static final double PRECISION_FACTOR = 10000;

    private double mScalingFactor = 1.0;

    /**
     * This creates a new molfile version 3 from the given molecule.
     * If the average bond length is smaller than 1.0 or larger than 3.0,
     * then all coordinates are scaled to achieve an average bond length of 1.5.
     * @param mol
     */
    public MolfileV3Creator(StereoMolecule mol) {
        this(mol, true);
    	}

    /**
     * This creates a new molfile version 3 from the given molecule.
     * If allowScaling==true and the average bond length is smaller than 1.0 or larger than 3.0,
     * then all coordinates are scaled to achieve an average bond length of 1.5.
     * @param mol
     * @param allowScaling
     */
    public MolfileV3Creator(StereoMolecule mol, boolean allowScaling) {
        this(mol, allowScaling, 0.0, new StringBuilder(32768));
    	}

	/**
	 * This creates a new molfile version 3 from the given molecule.
	 * If allowScaling==true and the average bond length is smaller than 1.0 or larger than 3.0,
	 * then all coordinates are scaled to achieve an average bond length of 1.5.
	 * If a StringBuilder is given, then the molfile will be appended to that.
	 * @param mol
	 * @param allowScaling
	 * @param builder null or StringBuilder to append to
	 */
	public MolfileV3Creator(StereoMolecule mol, boolean allowScaling, StringBuilder builder) {
		this(mol, allowScaling, 0.0, builder);
		}

    /**
     * This creates a new molfile version 3 from the given molecule.
     * If allowScaling==true and the average bond length is smaller than 1.0 or larger than 3.0,
     * then all coordinates are scaled to achieve an average bond length of 1.5.
     * If a StringBuilder is given, then the molfile will be appended to that.
     * @param mol
     * @param allowScaling
     * @param builder null or StringBuilder to append to
     */
    public MolfileV3Creator(StereoMolecule mol, boolean allowScaling, double scalingFactor, StringBuilder builder) {
		mol.ensureHelperArrays(Molecule.cHelperParities);
		final String nl = System.lineSeparator();

		mMolfile = (builder == null) ? new StringBuilder() : builder;

		String name = (mol.getName() != null) ? mol.getName() : "";
		mMolfile.append(name + nl);
		mMolfile.append("Actelion Java MolfileCreator 2.0"+nl+nl);
		mMolfile.append("  0  0  0  0  0  0              0 V3000"+nl);

		mScalingFactor = 1.0;

		boolean hasCoordinates = hasCoordinates(mol);
		if (hasCoordinates) {
			if (scalingFactor != 0)
				mScalingFactor = scalingFactor;
			else if (allowScaling)
				mScalingFactor = calculateScalingFactor(mol);
			}

		writeBody(mol, hasCoordinates);
		mMolfile.append("M  END"+nl);
		}

	private static boolean hasCoordinates(StereoMolecule mol) {
		if (mol.getAllAtoms() == 1)
			return true;	// we can only assume

		for(int atom=1; atom 3.0)
				scalingFactor = TARGET_AVBL / avbl;
			}
		else { // make the minimum distance between any two atoms twice as long as TARGET_AVBL
			double minDistance = Float.MAX_VALUE;
			for (int atom1=1; atom1 distance)
						minDistance = distance;
					}
				}
			scalingFactor = 2.0 * TARGET_AVBL / Math.max(TARGET_AVBL / 2, minDistance);
			}

		return scalingFactor;
    	}

	/**
	 * @param mol
	 * @param scalingFactor
	 * @return a CTAB V3 with scaled atom coordinates
	 */
	public static String writeCTAB(StereoMolecule mol, double scalingFactor) {
		MolfileV3Creator mf = new MolfileV3Creator();
		mf.mScalingFactor = scalingFactor;
		mol.ensureHelperArrays(Molecule.cHelperParities);
		mf.writeBody(mol, true);
		return mf.getMolfile();
	}

	/**
	 * @param mol
	 * @param hasCoordinates
	 * @return a CTAB V3 without any coordinate scaling
	 */
	private static String writeCTAB(StereoMolecule mol, boolean hasCoordinates) {
        MolfileV3Creator mf = new MolfileV3Creator();
        mol.ensureHelperArrays(Molecule.cHelperParities);
        mf.writeBody(mol, hasCoordinates);
        return mf.getMolfile();
    	}

    private MolfileV3Creator() {
        mMolfile = new StringBuilder(32768);
    	}

    private void writeBody(StereoMolecule mol, boolean hasCoordinates) {
		final String nl = System.lineSeparator();
        mMolfile.append("M  V30 BEGIN CTAB"+nl);
        mMolfile.append("M  V30 COUNTS " + mol.getAllAtoms() + " " + mol.getAllBonds() + " 0 0 0"+nl);
        mMolfile.append("M  V30 BEGIN ATOM"+nl);

        for (int atom=0; atom 0) {
                        mMolfile.append(",");
                    	}
                    String label = Molecule.cAtomLabel[atomList[i]];
                    switch (label.length()) {
                        case 1:
                            mMolfile.append(label);
                            break;
                        case 2:
                            mMolfile.append(label);
                            break;
                        case 3:
                            mMolfile.append(label);
                            break;
                        default:
                            mMolfile.append("?");
                            break;
                    	}
                	}
                mMolfile.append("]");
            	}
            else if((mol.getAtomQueryFeatures(atom) & Molecule.cAtomQFAny) != 0) {
                mMolfile.append(" A");
            	}
            else if((mol.getAtomicNo(atom) >= 129 && mol.getAtomicNo(atom) <= 144) || mol.getAtomicNo(atom) == 154) {
	            mMolfile.append(" R#");
	            }
            else {
                mMolfile.append(" " + mol.getAtomLabel(atom));
            	}

            if (hasCoordinates) {
                mMolfile.append(" " + ((double)((int)(PRECISION_FACTOR * mScalingFactor * mol.getAtomX(atom))) / PRECISION_FACTOR));
                mMolfile.append(" " + ((double)((int)(PRECISION_FACTOR * mScalingFactor * -mol.getAtomY(atom))) / PRECISION_FACTOR));
                mMolfile.append(" " + ((double)((int)(PRECISION_FACTOR * mScalingFactor * -mol.getAtomZ(atom))) / PRECISION_FACTOR));
            	}
            else {
                mMolfile.append(" 0 0 0");
            	}

            mMolfile.append(" " + mol.getAtomMapNo(atom));

            if (mol.getAtomCharge(atom) != 0) {
                mMolfile.append(" CHG=" + mol.getAtomCharge(atom));
            	}

            if (mol.getAtomRadical(atom) != 0) {
                mMolfile.append(" RAD=");
                switch (mol.getAtomRadical(atom)) {
                    case Molecule.cAtomRadicalStateS:
                        mMolfile.append("1");
                        break;
                    case Molecule.cAtomRadicalStateD:
                        mMolfile.append("2");
                        break;
                    case Molecule.cAtomRadicalStateT:
                        mMolfile.append("3");
                        break;
                	}
            	}

            if (mol.getAtomParity(atom) == Molecule.cAtomParity1
             || mol.getAtomParity(atom) == Molecule.cAtomParity2) {
                mMolfile.append(" CFG=");
                if (mol.getAtomParity(atom) == Molecule.cAtomParity1) {
                    mMolfile.append("1");
                	}
                else {
                    mMolfile.append("2");
                	}
            	}

            if (mol.getAtomMass(atom) != 0) {
                mMolfile.append(" MASS=" + mol.getAtomMass(atom));
            	}

            int valence = mol.getAtomAbnormalValence(atom);
            if (valence != -1) {
                mMolfile.append(" VAL=" + ((valence == 0) ? "-1" : valence));
            	}

	        int atomicNo = mol.getAtomicNo(atom);
	        if ((atomicNo >= 129 && atomicNo <= 144) || atomicNo == 154) {
		        mMolfile.append(" RGROUPS=(1 " + (atomicNo == 154 ? 0 : atomicNo >= 142 ? atomicNo - 141 : atomicNo - 125) +")");
	            }

	        long hydrogenFlags = Molecule.cAtomQFHydrogen & mol.getAtomQueryFeatures(atom);
            if (hydrogenFlags == (Molecule.cAtomQFNot0Hydrogen | Molecule.cAtomQFNot1Hydrogen)) {
                mMolfile.append(" HCOUNT=2"); // at least 2 hydrogens
            	}
            else if(hydrogenFlags == Molecule.cAtomQFNot0Hydrogen) {
                mMolfile.append(" HCOUNT=1"); // at least 1 hydrogens
            	}
            else if(hydrogenFlags == (Molecule.cAtomQFNot1Hydrogen | Molecule.cAtomQFNot2Hydrogen | Molecule.cAtomQFNot3Hydrogen)) {
                mMolfile.append(" HCOUNT=-1"); // no hydrogens
            	}
            else if(hydrogenFlags == (Molecule.cAtomQFNot0Hydrogen | Molecule.cAtomQFNot2Hydrogen | Molecule.cAtomQFNot3Hydrogen)) {
                mMolfile.append(" HCOUNT=1"); // use at least 1 hydrogens as closest match for exactly one
            	}

            long substitution = mol.getAtomQueryFeatures(atom) & (Molecule.cAtomQFMoreNeighbours | Molecule.cAtomQFNoMoreNeighbours);
            if (substitution != 0) {
                if ((substitution & Molecule.cAtomQFMoreNeighbours) != 0) {
                    mMolfile.append(" SUBST=" + (mol.getAllConnAtoms(atom) + 1));
                	}
                else {
                    mMolfile.append(" SUBST=-1");
                	}
            	}

	        long ringFeatures = mol.getAtomQueryFeatures(atom) & Molecule.cAtomQFRingState;
            if (ringFeatures != 0) {
                if (ringFeatures == (Molecule.cAtomQFNot2RingBonds | Molecule.cAtomQFNot3RingBonds | Molecule.cAtomQFNot4RingBonds))
                    mMolfile.append(" RBCNT=-1");
	            else if (ringFeatures == Molecule.cAtomQFNotChain)
                    mMolfile.append(" RBCNT=2"); // any ring atom; there is no MDL equivalent
                else if (ringFeatures == (Molecule.cAtomQFNotChain | Molecule.cAtomQFNot3RingBonds | Molecule.cAtomQFNot4RingBonds))
                    mMolfile.append(" RBCNT=2");
	            else if (ringFeatures == (Molecule.cAtomQFNotChain | Molecule.cAtomQFNot2RingBonds | Molecule.cAtomQFNot4RingBonds))
                    mMolfile.append(" RBCNT=3");
	            else if (ringFeatures == (Molecule.cAtomQFNotChain | Molecule.cAtomQFNot2RingBonds | Molecule.cAtomQFNot3RingBonds))
                    mMolfile.append(" RBCNT=4");
            	}

            mMolfile.append(nl);
        	}

        mMolfile.append("M  V30 END ATOM"+nl);
        mMolfile.append("M  V30 BEGIN BOND"+nl);

        for (int bond=0; bond




© 2015 - 2024 Weber Informatics LLC | Privacy Policy