com.actelion.research.chem.io.NativeMDLReactionReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
/*
* Copyright 2017 Idorsia Pharmaceuticals Ltd., Hegenheimermattweg 91, CH-4123 Allschwil, Switzerland
*
* This file is part of DataWarrior.
*
* DataWarrior is free software: you can redistribute it and/or modify it under the terms of the
* GNU General Public License as published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* DataWarrior is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with DataWarrior.
* If not, see http://www.gnu.org/licenses/.
*
* @author Thomas Sander
*/
package com.actelion.research.chem.io;
import com.actelion.research.chem.AromaticityResolver;
import com.actelion.research.chem.ExtendedMolecule;
import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.reaction.Reaction;
import com.actelion.research.util.DoubleFormat;
import java.io.*;
import java.util.ArrayList;
public class NativeMDLReactionReader {
private static final int BUFFER_SIZE = 512;
private static final int kErrNoError = 0;
private static final int kErrGetBranchNoData = -1;
private static final int kErrGetMolInfoNoParent = -2;
private static final int kErrGetMolInfoNoData = -3;
private static final int kErrVariationUnavailable = -4;
private static final int kMaxReactants = 16;
private static final int kMaxSolvents = 40;
private static final int kMaxCatalysts = 40;
private String mDirectory;
private DTP[] mDTPDir;
private SBF[] mSBFDir;
private DTP mRootDTP;
private int mReactionCount;
private int[] mBuffer;
private int mBufferIndex;
private int mBitmask;
private double mYield;
private Reaction mReaction;
private StringBuffer mReactantData,mProductData,mSolventData,mCatalystData;
private int mSolventCount,mCatalystCount,mPointerErrors;
private int mFieldCount;
private int[] mMolRegNo,mSolventRegNo,mCatalystRegNo;
private ArrayList mSolvents,mCatalysts;
// private long[][] pointerStatistics;
public NativeMDLReactionReader(String directory) throws IOException {
mDirectory = directory + File.separator;
readDTP("DTPDIR.DAT");
readSBF("SBFDIR.DAT");
// pointerStatistics = new long[mDTPDir.length+1][4];
// for (int i=0; i();
mCatalysts = new ArrayList();
mMolRegNo = new int[kMaxReactants];
mSolventRegNo = new int[kMaxSolvents];
mCatalystRegNo = new int[kMaxCatalysts];
mPointerErrors = 0;
}
// public void printPointerStatistics() {
// System.out.println("\tmin\tmax\tmean\tcount\tentries\tdrsize");
// for (int i=0; i= blocks)
throw new IOException("invalid variation");
variationPointer = mBuffer[variation];
break;
}
}
if (variationPointer == -1)
throw new IOException("no VARIATION data type");
for (int i=0; i getCatalysts() {
return mCatalysts;
}
public ArrayList getSolvents() {
return mSolvents;
}
public String getReactantData() {
return mReactantData.length() == 0 ? null : mReactantData.toString();
}
public String getProductData() {
return mProductData.length() == 0 ? null : mProductData.toString();
}
public String getSolventData() {
return mSolventData.length() == 0 ? null : mSolventData.toString();
}
public String getCatalystData() {
return mCatalystData.length() == 0 ? null : mCatalystData.toString();
}
public String[] getFieldNames() {
mFieldCount = 0;
for (int i=0; i= blocks)
return kErrVariationUnavailable;
data[0] = data[variation];
blocks = 1;
}
int indentation = 0;
for (int i=1; i<4; i++)
if (dtp.rootID[i] != 0)
indentation++;
if (dtp.isparent != 0) {
for (int eintrag=0; eintrag data[offset])
break;
int datapoin = offset+mSBFDir[sbf].begin-(dtp.length == 0 ? 0 : 1);
if (mSBFDir[sbf].type == 1) { // float-float range
if (mSBFDir[sbf].format2.length() == 0 || data[datapoin] == 0x20202020)
continue;
String text = filterText(formatedString(data, datapoin, mSBFDir[sbf]));
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
else if (mSBFDir[sbf].type == 2) { // fixed length text
if (data[datapoin] == 0x80808080)
continue;
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
String text = filterText(buf.toString());
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
else if (mSBFDir[sbf].type == 4) { // integer
if (data[datapoin] == 0x20202020)
continue;
String text = ""+data[datapoin];
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
else if (mSBFDir[sbf].type == 5) { // variable length text
if (data[datapoin] == 0 || data[datapoin] == 0x80808080)
continue;
int length = 4*(data[offset]+1-mSBFDir[sbf].begin);
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
String text = filterText(buf.toString());
appendFieldData(fieldData, mSBFDir[sbf].fieldNo, text);
}
}
offset += (dtp.length != 0) ? dtp.length+1 : data[offset]+2;
}
/* if ((mReactionTextP->getArrowLines() == 0) && !strncmp(mDTPDir[dtp].dtpnam,"RXNTEXT",7)) {
offset = 0;
for (block=0; block 0)
length--;
mReactionTextP->addArrowText( tptr, length );
offset += *(data+offset)+2;
}
}*/
}
return kErrNoError;
}
private void appendFieldData(String[] fieldData, int index, String text) {
if (fieldData[index] == null)
fieldData[index] = text;
else
fieldData[index] = fieldData[index] + '\n' + text;
}
private int putMolText(int entry, StringBuffer text, int mol, DTP dtp) {
int[] data = new int[BUFFER_SIZE];
if (dtp.isparent == 1)
return kErrGetMolInfoNoParent;
int blocks = 0;
try {
blocks = getData(entry, data, dtp);
}
catch (IOException e) {}
if (blocks == 0)
return kErrGetMolInfoNoData;
if (dtp.typno == 0) {
int offset = 0;
for (int block=0; block data[offset])
break;
text1 = mSBFDir[sbf].name;
int datapoin = offset+mSBFDir[sbf].begin-(dtp.length == 0 ? 0 : 1);
if (mSBFDir[sbf].type == 1) { // float-float range
if (mSBFDir[sbf].format2.length() == 0 || data[datapoin] == 0x20202020) {
text2 = "";
continue;
}
text2 = filterText(formatedString(data, datapoin, mSBFDir[sbf]));
}
else if (mSBFDir[sbf].type == 2) { // fixed length text
if (data[datapoin] == 0x80808080) {
text2 = "";
continue;
}
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
text2 = filterText(buf.toString());
}
else if (mSBFDir[sbf].type == 4) { // integer
if (data[datapoin] == 0x20202020) {
text2 = "";
continue;
}
text2 = ""+data[datapoin];
}
else if (mSBFDir[sbf].type == 5) { // variable length text
if (data[datapoin] == 0 || data[datapoin] == 0x80808080) {
text2 = "";
continue;
}
int length = 4*(data[offset]+1-mSBFDir[sbf].begin);
StringBuffer buf = new StringBuffer();
int v = 0;
for (int i=0; i> 2)] : v >>> 8;
buf.append((char)(v & 0x000000FF));
}
text2 = filterText(buf.toString());
}
if (text2 != null) {
if (text.length() != 0)
text.append('\n');
text.append(""+(mol+1)+") "+text1);
if (!text1.endsWith(":"))
text.append(":");
text.append(text2);
}
}
offset += (dtp.length != 0) ? dtp.length+1 : data[offset]+2;
}
}
return kErrNoError;
}
private String formatedString(int[] data, int datapoin, SBF sbf) {
double[] range = new double[2];
StringBuffer string = new StringBuffer();
int dataCount = 0;
int formatpoin = 0;
int lengthAfterR1 = 0;
while (formatpoin < sbf.format2.length()) {
if (sbf.format2.charAt(formatpoin) == 'R') {
if (dataCount > 1)
return string.toString();
if (dataCount == 1) {
char previous = string.charAt(string.length()-1);
if (previous >= '0' && previous <= 9)
string.append(' ');
}
range[dataCount] = convertFloat(data[datapoin]);
if (dataCount != 0) { // second float value
if (range[0] == range[1]) {
string.setLength(lengthAfterR1);
formatpoin++;
continue;
}
}
string.append(DoubleFormat.toString(1.00000001*range[dataCount]));
if (dataCount == 0)
lengthAfterR1 = string.length();
dataCount++;
datapoin++;
}
if (sbf.format2.charAt(formatpoin) == '\'') {
formatpoin ++;
while (sbf.format2.charAt(formatpoin) != '\'' && (formatpoin < 20)) {
if (sbf.format2.charAt(formatpoin) == '-')
string.append(" - ");
else
string.append(sbf.format2.charAt(formatpoin));
formatpoin++;
}
}
formatpoin++;
}
return string.toString();
}
private String filterText(String s) {
return s;
}
private void getDeepCatalysts(int entry, DTP dtp) throws IOException {
//get info about catalysts/solvents if datatypes are two levels down VARIATION
int[] data = new int[50];
int blocks = getData(entry, data, mDTPDir[dtp.rootID[1]-1]);
for (int block=0; block= size) {
dis.close();
throw new IOException("pointer >= filesize");
}
dis.skipBytes(4*pointer);
int offset = 0;
int blocks = 0;
switch (dtp.length) {
case 0: // typ: n,data,[x,n,data ...],0
do {
data[offset] = readInt(dis);
if (data[offset] < 0 || data[offset] >= BUFFER_SIZE-offset-2) {
dis.close();
throw new IOException("getData() unexpected value");
}
for (int i=0; i<=data[offset]; i++)
data[offset+i+1] = readInt(dis);
offset += data[offset]+2;
blocks++;
} while ((dtp.access2 == 'M') && (data[offset-1] == pointer+offset));
break;
default: // typ: fixed length,[x,fixed length, ...],0
do {
if ((dtp.length < 0) || (offset+dtp.length > BUFFER_SIZE-2)) {
dis.close();
throw new IOException("getData() unexpected value");
}
for (int i=0; i<=dtp.length; i++)
data[offset+i] = readInt(dis);
offset += dtp.length+1;
blocks++;
} while ((dtp.access2 == 'M') && (data[offset-1] == pointer+offset));
break;
}
dis.close();
return blocks;
}
private void getReaction(int entry, DTP dtp) throws IOException {
int[] data = new int[20];
if (getData(entry, data, dtp) != 1)
throw new IOException("getReaction() no molecules");
mReaction = new Reaction();
for (int i=1; i<=data[0]; i++) {
mMolRegNo[i-1] = Math.abs(data[i]);
if (data[i] < 0)
mReaction.addReactant(getMolecule(-data[i]));
else
mReaction.addProduct(getMolecule(data[i]));
}
}
private StereoMolecule getMolecule(int regNo) throws IOException {
StereoMolecule mol = new StereoMolecule();
for(int i=0; i mol.getMaxAtoms()
|| mol.getAllBonds() > mol.getMaxBonds())
throw new IOException("getSema() max atoms or bonds exceeded");
int fragments = readBits(entryLen);
if (mol.getAllBonds() < mol.getAllAtoms()-fragments)
throw new IOException("getSema() unexpected few bonds");
int bnd = 0;
for (int i=0; i bond in ring
if (order == 0)
continue;
order &= 3;
mol.setBondType(bnd, order == 0 ? Molecule.cBondTypeDelocalized :
order == 1 ? Molecule.cBondTypeSingle :
order == 2 ? Molecule.cBondTypeDouble
: Molecule.cBondTypeTriple);
bnd++;
}
mol.setAllBonds(bnd);
int unknowns = readBits(entryLen); // trash E/Z bond info
for (int i=0; i> 1;
int datalen2 = (1 + readBits(8)) >> 1;
readBits(12);
int entryLen = readBits(4);
mBufferIndex = 2+datalen1;
int[] atms = new int[mReaction.getMolecules()];
for (int mol=0; mol entry)
// pointerStatistics[dtp.lnum][0] = entry;
//if (pointerStatistics[dtp.lnum][1] < entry)
// pointerStatistics[dtp.lnum][1] = entry;
//pointerStatistics[dtp.lnum][2] += entry;
//pointerStatistics[dtp.lnum][3] ++;
DataInputStream dis = getDataInputStream(pointerfile(dtp.drpoin));
dis.skipBytes(4+(1+entry)*dtp.drsize*4);
try {
int pointer = readInt(dis);
dis.close();
return pointer;
}
catch (EOFException e) {
File f = getFile(pointerfile(dtp.drpoin));
// System.out.println("getPointer(entry:"+Integer.toHexString(entry)+","+dtp.lnum+") skip:"+Integer.toHexString(4+(1+entry)*dtp.drsize*4)+" EOF filesize:"+Long.toHexString(f.length())+" "+f.getName());
dis.close();
mPointerErrors++;
return 0;
}
catch (IOException e) {
// System.out.println("getPointer(entry:"+Integer.toHexString(entry)+","+dtp.lnum+") skip:"+Integer.toHexString(4+(1+entry)*dtp.drsize*4)+" "+e.getMessage());
dis.close();
mPointerErrors++;
return 0;
}
}
private int readFileSize(DataInputStream dis) throws IOException {
int size = readInt(dis);
return (size < 0) ? -size // REACCS format
: dis.available(); // ISIS format
}
private int readInt(DataInputStream dis) throws IOException {
return invertInt(dis.readInt());
}
private int invertInt(int i) {
return ((i & 0x000000FF) << 24)
+ ((i & 0x0000FF00) << 8)
+ ((i & 0x00FF0000) >>> 8)
+ ((i & 0xFF000000) >>> 24);
}
private void extractFloatYield() {
if (mBuffer[0] != 0x20202020) {
double yield1 = convertFloat(mBuffer[0]);
double yield2 = convertFloat(mBuffer[1]);
if (yield1 >= 0 && yield1 <= 100.1) {
if (yield2 >= 0 && yield2 <= 100.1)
yield1 = (yield1 + yield2) / 2;
mYield = (int)(yield1 + 0.5);
}
}
}
private void extractIntYield() {
if (mBuffer[0] >= 0 && mBuffer[0] <= 100)
mYield = mBuffer[0];
}
private double convertFloat(int i) { // changes VAX float to Java double
if (i == 0) return 0;
int e = (i & 0x00007F80) >> 7;
int m = ((i & 0x0000007F) << 16) | ((i & 0xFFFF0000) >>> 16) | 0x00800000;
double v = (double)m/(double)0x01000000 * Math.pow(2, e-128);
return ((i & 0x00008000) == 0) ? v : -v;
}
private int readBits(int count) {
int retval = 0;
for(int i=0; i>>= 1;
if (mBitmask == 0) {
mBitmask = 0x80000000;
mBufferIndex++;
}
}
return retval;
}
private File getFile(String filename) throws IOException {
File file = new File(mDirectory+filename);
if (!file.exists())
file = new File(mDirectory+filename.toLowerCase());
return file;
}
private DataInputStream getDataInputStream(String filename) throws IOException {
if (new File(mDirectory+filename).exists())
return new DataInputStream(new FileInputStream(mDirectory+filename));
return new DataInputStream(new FileInputStream(mDirectory+filename.toLowerCase()));
}
}
class DTP {
static final int SIZE = 188;
int lnum;
int drpoin;
int parentID;
int length;
int typno;
int security;
int sbfpoin;
int sbfnum;
int hash;
int unk5;
int unk6;
int unk7;
String dtpnam; // 20 bytes
byte access1;
byte access2;
byte[] empty; // 2 bytes
int isparent;
int[] depdata; // 20 ints
int[] rootID; // 4 ints
int unk9;
int unk10;
int drsize;
int unk11;
}
class SBF {
static final int SIZE = 160;
String shortnam; // 20 bytes
String format1; // 20 bytes
String format2; // 20 bytes
String name; // 20 bytes
int lnum;
int type;
int a2;
int a3;
int datalen;
int datatyp;
int a5;
int dtppoin;
int a7;
int begin;
int a8;
int a9;
int a10;
int a11;
int a12;
int a13;
int a14;
int a15;
int a16;
int a17;
int fieldNo;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy