All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.io.NativeMDLReactionReader Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
 * Copyright (c) 1997 - 2016
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Thomas Sander
 */

package com.actelion.research.chem.io;

import com.actelion.research.chem.AromaticityResolver;
import com.actelion.research.chem.ExtendedMolecule;
import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.reaction.Reaction;
import com.actelion.research.util.DoubleFormat;

import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;

public class NativeMDLReactionReader {
	private static final int BUFFER_SIZE = 512;

	private static final int kErrNoError = 0;
	private static final int kErrGetBranchNoData = -1;
	private static final int kErrGetMolInfoNoParent = -2;
	private static final int kErrGetMolInfoNoData = -3;
	private static final int kErrVariationUnavailable = -4;

	private static final int kMaxReactants = 16;
	private static final int kMaxSolvents = 40;
	private static final int kMaxCatalysts = 40;

	private String	mDirectory;
	private DTP[]	mDTPDir;
	private SBF[]	mSBFDir;
	private DTP		mRootDTP;
	private int		mReactionCount;
	private int[]	mBuffer;
	private int		mBufferIndex;
	private int		mBitmask;
	private double	mYield;
	private Reaction mReaction;
	private StringBuffer mReactantData,mProductData,mSolventData,mCatalystData;
	private int		mSolventCount,mCatalystCount,mPointerErrors;
	private int		mFieldCount;
	private int[]	mMolRegNo,mSolventRegNo,mCatalystRegNo;
	private ArrayList mSolvents,mCatalysts;

//	private long[][] pointerStatistics;


	public NativeMDLReactionReader(String directory) throws IOException {
		mDirectory = directory + File.separator;
		readDTP("DTPDIR.DAT");
		readSBF("SBFDIR.DAT");

//		pointerStatistics = new long[mDTPDir.length+1][4];
//		for (int i=0; i();
		mCatalysts = new ArrayList();
		mMolRegNo = new int[kMaxReactants];
		mSolventRegNo = new int[kMaxSolvents];
		mCatalystRegNo = new int[kMaxCatalysts];

		mPointerErrors = 0;
		}

//	public void printPointerStatistics() {
//		System.out.println("\tmin\tmax\tmean\tcount\tentries\tdrsize");
//		for (int i=0; i= blocks)
					throw new IOException("invalid variation");
				variationPointer = mBuffer[variation];
				break;
				}
			}

		if (variationPointer == -1)
			throw new IOException("no VARIATION data type");

		for (int i=0; i getCatalysts() {
		return mCatalysts;
		}

	public ArrayList getSolvents() {
		return mSolvents;
		}

	public String getReactantData() {
		return mReactantData.length() == 0 ? null : mReactantData.toString();
		}

	public String getProductData() {
		return mProductData.length() == 0 ? null : mProductData.toString();
		}

	public String getSolventData() {
		return mSolventData.length() == 0 ? null : mSolventData.toString();
		}

	public String getCatalystData() {
		return mCatalystData.length() == 0 ? null : mCatalystData.toString();
		}

	public String[] getFieldNames() {
		mFieldCount = 0;
		for (int i=0; i= blocks)
				return kErrVariationUnavailable;

			data[0] = data[variation];
			blocks = 1;
			}

		int indentation = 0;
		for (int i=1; i<4; i++)
			if (dtp.rootID[i] != 0)
				indentation++;

		if (dtp.isparent != 0) {
			for (int eintrag=0; eintrag data[offset])
							break;
					int datapoin = offset+mSBFDir[sbf].begin-(dtp.length == 0 ? 0 : 1);
					if (mSBFDir[sbf].type == 1) {	  // float-float range
						if (mSBFDir[sbf].format2.length() == 0 || data[datapoin] == 0x20202020)
							continue;

						String text = filterText(formatedString(data, datapoin, mSBFDir[sbf]));
						appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
						}
					else if (mSBFDir[sbf].type == 2) { // fixed length text
						if (data[datapoin] == 0x80808080)
							continue;

						StringBuffer buf = new StringBuffer();
						int v = 0;
						for (int i=0; i> 2)] : v >>> 8;
							buf.append((char)(v & 0x000000FF));
							}

						String text = filterText(buf.toString());
						appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
						}
					else if (mSBFDir[sbf].type == 4) {  // integer
						if (data[datapoin] == 0x20202020)
							continue;

						String text = ""+data[datapoin];
						appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
						}
					else if (mSBFDir[sbf].type == 5) {  // variable length text
						if (data[datapoin] == 0 || data[datapoin] == 0x80808080)
							continue;

						int length = 4*(data[offset]+1-mSBFDir[sbf].begin);
						StringBuffer buf = new StringBuffer();
						int v = 0;
						for (int i=0; i> 2)] : v >>> 8;
							buf.append((char)(v & 0x000000FF));
							}
						String text = filterText(buf.toString());
						appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
						}
					}
				offset += (dtp.length != 0) ? dtp.length+1 : data[offset]+2;
				}

/*		  if ((mReactionTextP->getArrowLines() == 0) && !strncmp(mDTPDir[dtp].dtpnam,"RXNTEXT",7)) {
				offset = 0;
				for (block=0; block 0)
						length--;
					mReactionTextP->addArrowText( tptr, length );
					offset += *(data+offset)+2;
					}
				}*/
			}
		return kErrNoError;
		}

	private void appendFieldData(String[] fieldData, int index, String text) {
		if (fieldData[index] == null)
			fieldData[index] = text;
		else
			fieldData[index] = fieldData[index] + '\n' + text;
		}

	private int putMolText(int entry, StringBuffer text, int mol, DTP dtp) {
		int[] data = new int[BUFFER_SIZE];

		if (dtp.isparent == 1)
			return kErrGetMolInfoNoParent;
		
		int blocks = 0;
		try {
			blocks = getData(entry, data, dtp);
			}
		catch (IOException e) {}

		if (blocks == 0)
			return kErrGetMolInfoNoData;
		
		if (dtp.typno == 0) {
			int offset = 0;
			for (int block=0; block data[offset])
							break;

					text1 = mSBFDir[sbf].name;
					int datapoin = offset+mSBFDir[sbf].begin-(dtp.length == 0 ? 0 : 1);

					if (mSBFDir[sbf].type == 1) {	  // float-float range
						if (mSBFDir[sbf].format2.length() == 0 || data[datapoin] == 0x20202020) {
							text2 = "";
							continue;
							}
						text2 = filterText(formatedString(data, datapoin, mSBFDir[sbf]));
						}
					else if (mSBFDir[sbf].type == 2) { // fixed length text
						if (data[datapoin] == 0x80808080) {
							text2 = "";
							continue;
							}
						StringBuffer buf = new StringBuffer();
						int v = 0;
						for (int i=0; i> 2)] : v >>> 8;
							buf.append((char)(v & 0x000000FF));
							}
						text2 = filterText(buf.toString());
						}
					else if (mSBFDir[sbf].type == 4) {  // integer
						if (data[datapoin] == 0x20202020) {
							text2 = "";
							continue;
							}
						text2 = ""+data[datapoin];
						}
					else if (mSBFDir[sbf].type == 5) {  // variable length text
						if (data[datapoin] == 0 || data[datapoin] == 0x80808080) {
							text2 = "";
							continue;
							}
						int length = 4*(data[offset]+1-mSBFDir[sbf].begin);
						StringBuffer buf = new StringBuffer();
						int v = 0;
						for (int i=0; i> 2)] : v >>> 8;
							buf.append((char)(v & 0x000000FF));
							}
						text2 = filterText(buf.toString());
						}
					if (text2 != null) {
						if (text.length() != 0)
							text.append('\n');
						text.append(""+(mol+1)+") "+text1);
						if (!text1.endsWith(":"))
							text.append(":");
						text.append(text2);
						}
					}
				offset += (dtp.length != 0) ? dtp.length+1 : data[offset]+2;
				}
			}
		
		return kErrNoError;
		}
		
	private String formatedString(int[] data, int datapoin, SBF sbf) {
		double[] range = new double[2];
		StringBuffer string = new StringBuffer();
		int dataCount = 0;
		int formatpoin = 0;
		int lengthAfterR1 = 0;
		while (formatpoin < sbf.format2.length()) {
			if (sbf.format2.charAt(formatpoin) == 'R') {
				if (dataCount > 1)
					return string.toString();
				if (dataCount == 1) {
					char previous = string.charAt(string.length()-1);
					if (previous >= '0' && previous <= 9)
						string.append(' ');
					}
				range[dataCount] = convertFloat(data[datapoin]);
				if (dataCount != 0) {   // second float value
					if (range[0] == range[1]) {
						string.setLength(lengthAfterR1);
						formatpoin++;
						continue;
						}
					}
				string.append(DoubleFormat.toString(1.00000001*range[dataCount]));
				if (dataCount == 0)
					lengthAfterR1 = string.length();
				dataCount++;
				datapoin++;
				}
		
			if (sbf.format2.charAt(formatpoin) == '\'') {
				formatpoin ++;
				while (sbf.format2.charAt(formatpoin) != '\'' && (formatpoin < 20)) {
					if (sbf.format2.charAt(formatpoin) == '-')
						string.append(" - ");
					else
						string.append(sbf.format2.charAt(formatpoin));
					formatpoin++;
					}
				}
		
			formatpoin++;
			}
		return string.toString();
		}

	private String filterText(String s) {
		return s;
		}
	
	private void getDeepCatalysts(int entry, DTP dtp) throws IOException {
		//get info about catalysts/solvents if datatypes are two levels down VARIATION

		int[] data = new int[50];
		int blocks = getData(entry, data, mDTPDir[dtp.rootID[1]-1]);

		for (int block=0; block= size) {
			dis.close();
			throw new IOException("pointer >= filesize");
			}
		dis.skipBytes(4*pointer);

		int offset = 0;
		int blocks = 0;
		switch (dtp.length) {
		case 0:						 // typ: n,data,[x,n,data ...],0
			do {
				data[offset] = readInt(dis);
				if (data[offset] < 0 || data[offset] >= BUFFER_SIZE-offset-2) {
					dis.close();
					throw new IOException("getData() unexpected value");
					}
				for (int i=0; i<=data[offset]; i++)
					data[offset+i+1] = readInt(dis);
				offset += data[offset]+2;
				blocks++;
				} while ((dtp.access2 == 'M') && (data[offset-1] == pointer+offset));
			break;
		default:						// typ: fixed length,[x,fixed length, ...],0
			do {
				if ((dtp.length < 0) || (offset+dtp.length > BUFFER_SIZE-2)) {
					dis.close();
					throw new IOException("getData() unexpected value");
					}
				for (int i=0; i<=dtp.length; i++)
					data[offset+i] = readInt(dis);
				offset += dtp.length+1;
				blocks++;
				} while ((dtp.access2 == 'M') && (data[offset-1] == pointer+offset));
			break;
			}

		dis.close();
	
		return blocks;
		}

	private void getReaction(int entry, DTP dtp) throws IOException {
		int[] data = new int[20];

		if (getData(entry, data, dtp) != 1)
			throw new IOException("getReaction() no molecules");

		mReaction = new Reaction();
		for (int i=1; i<=data[0]; i++) {
			mMolRegNo[i-1] = Math.abs(data[i]);
			if (data[i] < 0)
				mReaction.addReactant(getMolecule(-data[i]));
			else
				mReaction.addProduct(getMolecule(data[i]));
			}
		}

	private StereoMolecule getMolecule(int regNo) throws IOException {
		StereoMolecule mol = new StereoMolecule();

		for(int i=0; i mol.getMaxAtoms()
		 || mol.getAllBonds() > mol.getMaxBonds())
			throw new IOException("getSema() max atoms or bonds exceeded");
	
		int fragments = readBits(entryLen);
	
		if (mol.getAllBonds() < mol.getAllAtoms()-fragments)
			throw new IOException("getSema() unexpected few bonds");
	
		int bnd = 0;
		for (int i=0; i bond in ring
			if (order == 0)
				continue;

			order &= 3;
			mol.setBondType(bnd, order == 0 ? Molecule.cBondTypeDelocalized :
								 order == 1 ? Molecule.cBondTypeSingle :
								 order == 2 ? Molecule.cBondTypeDouble
											: Molecule.cBondTypeTriple);
			bnd++;
			}
		mol.setAllBonds(bnd);
	
		int unknowns = readBits(entryLen);   // trash E/Z bond info
		for (int i=0; i> 1;
		int datalen2 = (1 + readBits(8)) >> 1;
		readBits(12);
		int entryLen = readBits(4);

		mBufferIndex = 2+datalen1;
		int[] atms = new int[mReaction.getMolecules()];
		for (int mol=0; mol entry)
//	pointerStatistics[dtp.lnum][0] = entry;
//if (pointerStatistics[dtp.lnum][1] < entry)
//	pointerStatistics[dtp.lnum][1] = entry;
//pointerStatistics[dtp.lnum][2] += entry;
//pointerStatistics[dtp.lnum][3] ++;


		DataInputStream dis = getDataInputStream(pointerfile(dtp.drpoin));
		dis.skipBytes(4+(1+entry)*dtp.drsize*4);
		try {
			int pointer = readInt(dis);
			dis.close();
			return pointer;
			}
		catch (EOFException e) {
			File f = getFile(pointerfile(dtp.drpoin));
//			System.out.println("getPointer(entry:"+Integer.toHexString(entry)+","+dtp.lnum+") skip:"+Integer.toHexString(4+(1+entry)*dtp.drsize*4)+" EOF filesize:"+Long.toHexString(f.length())+" "+f.getName());
			dis.close();
			mPointerErrors++;
			return 0;
			}
		catch (IOException e) {
//			System.out.println("getPointer(entry:"+Integer.toHexString(entry)+","+dtp.lnum+") skip:"+Integer.toHexString(4+(1+entry)*dtp.drsize*4)+" "+e.getMessage());
			dis.close();
			mPointerErrors++;
			return 0;
			}
		}

	private int readFileSize(DataInputStream dis) throws IOException {
		int size = readInt(dis);
		return (size < 0) ? -size			// REACCS format
						  : dis.available(); // ISIS format
		}

	private int readInt(DataInputStream dis) throws IOException {
		return invertInt(dis.readInt());
		}

	private int invertInt(int i) {
		return ((i & 0x000000FF) << 24)
			 + ((i & 0x0000FF00) << 8)
			 + ((i & 0x00FF0000) >>> 8)
			 + ((i & 0xFF000000) >>> 24);
		}

	private void extractFloatYield() {
		if (mBuffer[0] != 0x20202020) {
			double yield1 = convertFloat(mBuffer[0]);
			double yield2 = convertFloat(mBuffer[1]);
			if (yield1 >= 0 && yield1 <= 100.1) {
				if (yield2 >= 0 && yield2 <= 100.1)
					yield1 = (yield1 + yield2) / 2;
				mYield = (int)(yield1 + 0.5);
				}
			}
		}

	private void extractIntYield() {
		if (mBuffer[0] >= 0 && mBuffer[0] <= 100)
			mYield = mBuffer[0];
		}

	private double convertFloat(int i) {	  // changes VAX float to Java double
		if (i == 0) return 0;

		int e = (i & 0x00007F80) >> 7;
		int m = ((i & 0x0000007F) << 16) | ((i & 0xFFFF0000) >>> 16) | 0x00800000;
		double v = (double)m/(double)0x01000000 * Math.pow(2, e-128);
		return ((i & 0x00008000) == 0) ? v : -v;
		}

	private int readBits(int count) {
		int retval = 0;
		for(int i=0; i>>= 1;
			if (mBitmask == 0) {
				mBitmask = 0x80000000;
				mBufferIndex++;
				}
			}
		return retval;
		}

	private File getFile(String filename) throws IOException {
		File file = new File(mDirectory+filename);
		if (!file.exists())
			file = new File(mDirectory+filename.toLowerCase());

		return file;
		}

	private DataInputStream getDataInputStream(String filename) throws IOException {
		if (new File(mDirectory+filename).exists())
			return new DataInputStream(new FileInputStream(mDirectory+filename));
			
		return new DataInputStream(new FileInputStream(mDirectory+filename.toLowerCase()));
		}
	}

class DTP {
	static final int SIZE = 188;
	int lnum;
	int drpoin;
	int parentID;
	int length;
	int typno;
	int security;
	int sbfpoin;
	int sbfnum;
	int hash;
	int unk5;
	int unk6;
	int unk7;
	String dtpnam;  // 20 bytes
	byte access1;
	byte access2;
	byte[] empty;   // 2 bytes
	int isparent;
	int[] depdata;  // 20 ints
	int[] rootID;   // 4 ints
	int unk9;
	int unk10;
	int drsize;
	int unk11;
	}

class SBF {
	static final int SIZE = 160;
	String shortnam;	// 20 bytes
	String format1;	 // 20 bytes
	String format2;	 // 20 bytes
	String name;		// 20 bytes
	int lnum;
	int type;
	int a2;
	int a3;
	int datalen;
	int datatyp;
	int a5;
	int dtppoin;
	int a7;
	int begin;
	int a8;
	int a9;
	int a10;
	int a11;
	int a12;
	int a13;
	int a14;
	int a15;
	int a16;
	int a17;
	int fieldNo;
	}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy