com.actelion.research.chem.io.NativeMDLReactionReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @author Thomas Sander
*/
package com.actelion.research.chem.io;
import com.actelion.research.chem.AromaticityResolver;
import com.actelion.research.chem.ExtendedMolecule;
import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.reaction.Reaction;
import com.actelion.research.util.DoubleFormat;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
public class NativeMDLReactionReader {
private static final int BUFFER_SIZE = 512;
private static final int kErrNoError = 0;
private static final int kErrGetBranchNoData = -1;
private static final int kErrGetMolInfoNoParent = -2;
private static final int kErrGetMolInfoNoData = -3;
private static final int kErrVariationUnavailable = -4;
private static final int kMaxReactants = 16;
private static final int kMaxSolvents = 40;
private static final int kMaxCatalysts = 40;
private String mDirectory;
private DTP[] mDTPDir;
private SBF[] mSBFDir;
private DTP mRootDTP;
private int mReactionCount;
private int[] mBuffer;
private int mBufferIndex;
private int mBitmask;
private double mYield;
private Reaction mReaction;
private StringBuffer mReactantData,mProductData,mSolventData,mCatalystData;
private int mSolventCount,mCatalystCount,mPointerErrors;
private int mFieldCount;
private int[] mMolRegNo,mSolventRegNo,mCatalystRegNo;
private ArrayList mSolvents,mCatalysts;
// private long[][] pointerStatistics;
public NativeMDLReactionReader(String directory) throws IOException {
mDirectory = directory + File.separator;
readDTP("DTPDIR.DAT");
readSBF("SBFDIR.DAT");
// pointerStatistics = new long[mDTPDir.length+1][4];
// for (int i=0; i();
mCatalysts = new ArrayList();
mMolRegNo = new int[kMaxReactants];
mSolventRegNo = new int[kMaxSolvents];
mCatalystRegNo = new int[kMaxCatalysts];
mPointerErrors = 0;
}
// public void printPointerStatistics() {
// System.out.println("\tmin\tmax\tmean\tcount\tentries\tdrsize");
// for (int i=0; i= blocks)
throw new IOException("invalid variation");
variationPointer = mBuffer[variation];
break;
}
}
if (variationPointer == -1)
throw new IOException("no VARIATION data type");
for (int i=0; i getCatalysts() {
return mCatalysts;
}
public ArrayList getSolvents() {
return mSolvents;
}
public String getReactantData() {
return mReactantData.length() == 0 ? null : mReactantData.toString();
}
public String getProductData() {
return mProductData.length() == 0 ? null : mProductData.toString();
}
public String getSolventData() {
return mSolventData.length() == 0 ? null : mSolventData.toString();
}
public String getCatalystData() {
return mCatalystData.length() == 0 ? null : mCatalystData.toString();
}
public String[] getFieldNames() {
mFieldCount = 0;
for (int i=0; i= blocks)
return kErrVariationUnavailable;
data[0] = data[variation];
blocks = 1;
}
int indentation = 0;
for (int i=1; i<4; i++)
if (dtp.rootID[i] != 0)
indentation++;
if (dtp.isparent != 0) {
for (int eintrag=0; eintrag data[offset])
break;
int datapoin = offset+mSBFDir[sbf].begin-(dtp.length == 0 ? 0 : 1);
if (mSBFDir[sbf].type == 1) { // float-float range
if (mSBFDir[sbf].format2.length() == 0 || data[datapoin] == 0x20202020)
continue;
String text = filterText(formatedString(data, datapoin, mSBFDir[sbf]));
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
else if (mSBFDir[sbf].type == 2) { // fixed length text
if (data[datapoin] == 0x80808080)
continue;
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
String text = filterText(buf.toString());
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
else if (mSBFDir[sbf].type == 4) { // integer
if (data[datapoin] == 0x20202020)
continue;
String text = ""+data[datapoin];
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
else if (mSBFDir[sbf].type == 5) { // variable length text
if (data[datapoin] == 0 || data[datapoin] == 0x80808080)
continue;
int length = 4*(data[offset]+1-mSBFDir[sbf].begin);
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
String text = filterText(buf.toString());
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
}
offset += (dtp.length != 0) ? dtp.length+1 : data[offset]+2;
}
/* if ((mReactionTextP->getArrowLines() == 0) && !strncmp(mDTPDir[dtp].dtpnam,"RXNTEXT",7)) {
offset = 0;
for (block=0; block 0)
length--;
mReactionTextP->addArrowText( tptr, length );
offset += *(data+offset)+2;
}
}*/
}
return kErrNoError;
}
private void appendFieldData(String[] fieldData, int index, String text) {
if (fieldData[index] == null)
fieldData[index] = text;
else
fieldData[index] = fieldData[index] + '\n' + text;
}
private int putMolText(int entry, StringBuffer text, int mol, DTP dtp) {
int[] data = new int[BUFFER_SIZE];
if (dtp.isparent == 1)
return kErrGetMolInfoNoParent;
int blocks = 0;
try {
blocks = getData(entry, data, dtp);
}
catch (IOException e) {}
if (blocks == 0)
return kErrGetMolInfoNoData;
if (dtp.typno == 0) {
int offset = 0;
for (int block=0; block data[offset])
break;
text1 = mSBFDir[sbf].name;
int datapoin = offset+mSBFDir[sbf].begin-(dtp.length == 0 ? 0 : 1);
if (mSBFDir[sbf].type == 1) { // float-float range
if (mSBFDir[sbf].format2.length() == 0 || data[datapoin] == 0x20202020) {
text2 = "";
continue;
}
text2 = filterText(formatedString(data, datapoin, mSBFDir[sbf]));
}
else if (mSBFDir[sbf].type == 2) { // fixed length text
if (data[datapoin] == 0x80808080) {
text2 = "";
continue;
}
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
text2 = filterText(buf.toString());
}
else if (mSBFDir[sbf].type == 4) { // integer
if (data[datapoin] == 0x20202020) {
text2 = "";
continue;
}
text2 = ""+data[datapoin];
}
else if (mSBFDir[sbf].type == 5) { // variable length text
if (data[datapoin] == 0 || data[datapoin] == 0x80808080) {
text2 = "";
continue;
}
int length = 4*(data[offset]+1-mSBFDir[sbf].begin);
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
text2 = filterText(buf.toString());
}
if (text2 != null) {
if (text.length() != 0)
text.append('\n');
text.append(""+(mol+1)+") "+text1);
if (!text1.endsWith(":"))
text.append(":");
text.append(text2);
}
}
offset += (dtp.length != 0) ? dtp.length+1 : data[offset]+2;
}
}
return kErrNoError;
}
private String formatedString(int[] data, int datapoin, SBF sbf) {
double[] range = new double[2];
StringBuffer string = new StringBuffer();
int dataCount = 0;
int formatpoin = 0;
int lengthAfterR1 = 0;
while (formatpoin < sbf.format2.length()) {
if (sbf.format2.charAt(formatpoin) == 'R') {
if (dataCount > 1)
return string.toString();
if (dataCount == 1) {
char previous = string.charAt(string.length()-1);
if (previous >= '0' && previous <= 9)
string.append(' ');
}
range[dataCount] = convertFloat(data[datapoin]);
if (dataCount != 0) { // second float value
if (range[0] == range[1]) {
string.setLength(lengthAfterR1);
formatpoin++;
continue;
}
}
string.append(DoubleFormat.toString(1.00000001*range[dataCount]));
if (dataCount == 0)
lengthAfterR1 = string.length();
dataCount++;
datapoin++;
}
if (sbf.format2.charAt(formatpoin) == '\'') {
formatpoin ++;
while (sbf.format2.charAt(formatpoin) != '\'' && (formatpoin < 20)) {
if (sbf.format2.charAt(formatpoin) == '-')
string.append(" - ");
else
string.append(sbf.format2.charAt(formatpoin));
formatpoin++;
}
}
formatpoin++;
}
return string.toString();
}
private String filterText(String s) {
return s;
}
private void getDeepCatalysts(int entry, DTP dtp) throws IOException {
//get info about catalysts/solvents if datatypes are two levels down VARIATION
int[] data = new int[50];
int blocks = getData(entry, data, mDTPDir[dtp.rootID[1]-1]);
for (int block=0; block= size) {
dis.close();
throw new IOException("pointer >= filesize");
}
dis.skipBytes(4*pointer);
int offset = 0;
int blocks = 0;
switch (dtp.length) {
case 0: // typ: n,data,[x,n,data ...],0
do {
data[offset] = readInt(dis);
if (data[offset] < 0 || data[offset] >= BUFFER_SIZE-offset-2) {
dis.close();
throw new IOException("getData() unexpected value");
}
for (int i=0; i<=data[offset]; i++)
data[offset+i+1] = readInt(dis);
offset += data[offset]+2;
blocks++;
} while ((dtp.access2 == 'M') && (data[offset-1] == pointer+offset));
break;
default: // typ: fixed length,[x,fixed length, ...],0
do {
if ((dtp.length < 0) || (offset+dtp.length > BUFFER_SIZE-2)) {
dis.close();
throw new IOException("getData() unexpected value");
}
for (int i=0; i<=dtp.length; i++)
data[offset+i] = readInt(dis);
offset += dtp.length+1;
blocks++;
} while ((dtp.access2 == 'M') && (data[offset-1] == pointer+offset));
break;
}
dis.close();
return blocks;
}
private void getReaction(int entry, DTP dtp) throws IOException {
int[] data = new int[20];
if (getData(entry, data, dtp) != 1)
throw new IOException("getReaction() no molecules");
mReaction = new Reaction();
for (int i=1; i<=data[0]; i++) {
mMolRegNo[i-1] = Math.abs(data[i]);
if (data[i] < 0)
mReaction.addReactant(getMolecule(-data[i]));
else
mReaction.addProduct(getMolecule(data[i]));
}
}
private StereoMolecule getMolecule(int regNo) throws IOException {
StereoMolecule mol = new StereoMolecule();
for(int i=0; i mol.getMaxAtoms()
|| mol.getAllBonds() > mol.getMaxBonds())
throw new IOException("getSema() max atoms or bonds exceeded");
int fragments = readBits(entryLen);
if (mol.getAllBonds() < mol.getAllAtoms()-fragments)
throw new IOException("getSema() unexpected few bonds");
int bnd = 0;
for (int i=0; i bond in ring
if (order == 0)
continue;
order &= 3;
mol.setBondType(bnd, order == 0 ? Molecule.cBondTypeDelocalized :
order == 1 ? Molecule.cBondTypeSingle :
order == 2 ? Molecule.cBondTypeDouble
: Molecule.cBondTypeTriple);
bnd++;
}
mol.setAllBonds(bnd);
int unknowns = readBits(entryLen); // trash E/Z bond info
for (int i=0; i> 1;
int datalen2 = (1 + readBits(8)) >> 1;
readBits(12);
int entryLen = readBits(4);
mBufferIndex = 2+datalen1;
int[] atms = new int[mReaction.getMolecules()];
for (int mol=0; mol entry)
// pointerStatistics[dtp.lnum][0] = entry;
//if (pointerStatistics[dtp.lnum][1] < entry)
// pointerStatistics[dtp.lnum][1] = entry;
//pointerStatistics[dtp.lnum][2] += entry;
//pointerStatistics[dtp.lnum][3] ++;
DataInputStream dis = getDataInputStream(pointerfile(dtp.drpoin));
dis.skipBytes(4+(1+entry)*dtp.drsize*4);
try {
int pointer = readInt(dis);
dis.close();
return pointer;
}
catch (EOFException e) {
File f = getFile(pointerfile(dtp.drpoin));
// System.out.println("getPointer(entry:"+Integer.toHexString(entry)+","+dtp.lnum+") skip:"+Integer.toHexString(4+(1+entry)*dtp.drsize*4)+" EOF filesize:"+Long.toHexString(f.length())+" "+f.getName());
dis.close();
mPointerErrors++;
return 0;
}
catch (IOException e) {
// System.out.println("getPointer(entry:"+Integer.toHexString(entry)+","+dtp.lnum+") skip:"+Integer.toHexString(4+(1+entry)*dtp.drsize*4)+" "+e.getMessage());
dis.close();
mPointerErrors++;
return 0;
}
}
private int readFileSize(DataInputStream dis) throws IOException {
int size = readInt(dis);
return (size < 0) ? -size // REACCS format
: dis.available(); // ISIS format
}
private int readInt(DataInputStream dis) throws IOException {
return invertInt(dis.readInt());
}
private int invertInt(int i) {
return ((i & 0x000000FF) << 24)
+ ((i & 0x0000FF00) << 8)
+ ((i & 0x00FF0000) >>> 8)
+ ((i & 0xFF000000) >>> 24);
}
private void extractFloatYield() {
if (mBuffer[0] != 0x20202020) {
double yield1 = convertFloat(mBuffer[0]);
double yield2 = convertFloat(mBuffer[1]);
if (yield1 >= 0 && yield1 <= 100.1) {
if (yield2 >= 0 && yield2 <= 100.1)
yield1 = (yield1 + yield2) / 2;
mYield = (int)(yield1 + 0.5);
}
}
}
private void extractIntYield() {
if (mBuffer[0] >= 0 && mBuffer[0] <= 100)
mYield = mBuffer[0];
}
private double convertFloat(int i) { // changes VAX float to Java double
if (i == 0) return 0;
int e = (i & 0x00007F80) >> 7;
int m = ((i & 0x0000007F) << 16) | ((i & 0xFFFF0000) >>> 16) | 0x00800000;
double v = (double)m/(double)0x01000000 * Math.pow(2, e-128);
return ((i & 0x00008000) == 0) ? v : -v;
}
private int readBits(int count) {
int retval = 0;
for(int i=0; i>>= 1;
if (mBitmask == 0) {
mBitmask = 0x80000000;
mBufferIndex++;
}
}
return retval;
}
private File getFile(String filename) throws IOException {
File file = new File(mDirectory+filename);
if (!file.exists())
file = new File(mDirectory+filename.toLowerCase());
return file;
}
private DataInputStream getDataInputStream(String filename) throws IOException {
if (new File(mDirectory+filename).exists())
return new DataInputStream(new FileInputStream(mDirectory+filename));
return new DataInputStream(new FileInputStream(mDirectory+filename.toLowerCase()));
}
}
class DTP {
static final int SIZE = 188;
int lnum;
int drpoin;
int parentID;
int length;
int typno;
int security;
int sbfpoin;
int sbfnum;
int hash;
int unk5;
int unk6;
int unk7;
String dtpnam; // 20 bytes
byte access1;
byte access2;
byte[] empty; // 2 bytes
int isparent;
int[] depdata; // 20 ints
int[] rootID; // 4 ints
int unk9;
int unk10;
int drsize;
int unk11;
}
class SBF {
static final int SIZE = 160;
String shortnam; // 20 bytes
String format1; // 20 bytes
String format2; // 20 bytes
String name; // 20 bytes
int lnum;
int type;
int a2;
int a3;
int datalen;
int datatyp;
int a5;
int dtppoin;
int a7;
int begin;
int a8;
int a9;
int a10;
int a11;
int a12;
int a13;
int a14;
int a15;
int a16;
int a17;
int fieldNo;
}