org.openscience.cdk.io.random.RandomAccessReader Maven / Gradle / Ivy
/* Copyright (C) 2005-2008 Nina Jeliazkova
* 2009 Egon Willighagen
*
* Contact: [email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
* All we ask is that proper credit is given for our work, which includes
* - but is not limited to - adding the above copyright notice to the beginning
* of your source code files, and to any copyright notice that you may distribute
* with programs based on this work.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.io.random;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.StringReader;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.io.IChemObjectReader;
import org.openscience.cdk.io.ISimpleChemObjectReader;
import org.openscience.cdk.io.ReaderEvent;
import org.openscience.cdk.io.listener.IChemObjectIOListener;
import org.openscience.cdk.io.listener.IReaderListener;
import org.openscience.cdk.io.setting.IOSetting;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
/**
* Random access to text files of compounds.
* Reads the file as a text and builds an index file, if the index file doesn't already exist.
* The index stores offset, length and a third field reserved for future use.
* Subsequent access for a record N uses this index to seek the record and return the molecule.
* Useful for very big files.
*
* @author Nina Jeliazkova
* @cdk.module io
* @cdk.githash
*/
public abstract class RandomAccessReader extends DefaultRandomAccessChemObjectReader implements
IRandomAccessChemObjectReader {
protected static ILoggingTool logger = LoggingToolFactory.createLoggingTool(RandomAccessReader.class);
protected RandomAccessFile raFile;
protected IOSetting[] headerOptions = null;
private final String filename;
protected ISimpleChemObjectReader chemObjectReader;
protected int indexVersion = 1;
/*
* index[record][0] - record offset in file index[record][1] - record length
* index[record][2] - number of atoms (if available)
*/
protected long[][] index = null;
protected int records;
protected int currentRecord = 0;
protected byte[] b;
protected IChemObjectBuilder builder;
protected boolean indexCreated = false;
/**
* Reads the file and builds an index file, if the index file doesn't already exist.
*
* @param file the file object containg the molecules to be indexed
* @param builder a chem object builder
* @throws IOException if there is an error during reading
*/
public RandomAccessReader(File file, IChemObjectBuilder builder) throws IOException {
this(file, builder, null);
}
/**
* Reads the file and builds an index file, if the index file doesn't already exist.
*
* @param file file the file object containg the molecules to be indexed
* @param builder builder a chem object builder
* @param listener listen for read event
* @throws IOException if there is an error during reading
*/
public RandomAccessReader(File file, IChemObjectBuilder builder, IReaderListener listener) throws IOException {
super();
this.filename = file.getAbsolutePath();
this.builder = builder;
setChemObjectReader(createChemObjectReader());
if (listener != null) addChemObjectIOListener(listener);
raFile = new RandomAccessFile(file, "r");
records = 0;
setIndexCreated(false);
indexTheFile();
}
@Override
protected void finalize() throws Throwable {
try {
close();
} catch (Exception x) {
logger.debug("Error during finalize");
}
super.finalize();
}
/**
* Returns the object at given record No.
*
* Record numbers are zero-based!
*/
@Override
public synchronized IChemObject readRecord(int record) throws Exception {
String buffer = readContent(record);
if (chemObjectReader == null)
throw new CDKException("No chemobject reader!");
else {
chemObjectReader.setReader(new StringReader(buffer));
currentRecord = record;
return processContent();
}
}
/**
* Reads the record text content into a String.
*
* @param record The record number
* @return A String representation of the record
* @throws java.io.IOException if error occurs during reading
* @throws org.openscience.cdk.exception.CDKException if the record number is invalid
*/
protected String readContent(int record) throws IOException, CDKException {
logger.debug("Current record ", record);
if ((record < 0) || (record >= records)) {
throw new CDKException("No such record " + record);
}
//fireFrameRead();
raFile.seek(index[record][0]);
int length = (int) index[record][1];
raFile.read(b, 0, length);
return new String(b, 0, length);
}
/**
* The reader is already set to read the record buffer.
* @return the read IChemObject
* @throws CDKException an error occurred whilst reading the file
*/
protected IChemObject processContent() throws CDKException {
return chemObjectReader.read(builder.newInstance(IChemFile.class));
}
protected long[][] resize(long[][] index, int newLength) {
long[][] newIndex = new long[newLength][3];
for (int i = 0; i < index.length; i++) {
newIndex[i][0] = index[i][0];
newIndex[i][1] = index[i][1];
newIndex[i][2] = index[i][2];
}
return newIndex;
}
protected abstract boolean isRecordEnd(String line);
protected synchronized void saveIndex(File file) throws Exception {
if (records == 0) {
file.delete();
return;
}
FileWriter out = new FileWriter(file);
out.write(Integer.toString(indexVersion));
out.write('\n');
out.write(filename);
out.write('\n');
out.write(Long.toString(raFile.length()));
out.write('\n');
out.write(Integer.toString(records));
out.write('\n');
for (int i = 0; i < records; i++) {
out.write(Long.toString(index[i][0]));
out.write('\t');
out.write(Long.toString(index[i][1]));
out.write('\t');
out.write(Long.toString(index[i][2]));
out.write('\n');
}
out.write(Integer.toString(records));
out.write('\n');
out.write(filename);
out.write('\n');
out.close();
}
protected synchronized void loadIndex(File file) throws Exception {
BufferedReader in = new BufferedReader(new FileReader(file));
String version = in.readLine();
try {
if (Integer.parseInt(version) != indexVersion) {
in.close();
throw new Exception("Expected index version " + indexVersion + " instead of " + version);
}
} catch (Exception x) {
in.close();
throw new Exception("Invalid index version " + version);
}
String fileIndexed = in.readLine();
if (!filename.equals(fileIndexed)) {
in.close();
throw new Exception("Index for " + fileIndexed + " found instead of " + filename + ". Creating new index.");
}
String line = in.readLine();
int fileLength = Integer.parseInt(line);
if (fileLength != raFile.length()) {
in.close();
throw new Exception("Index for file of size " + fileLength + " found instead of " + raFile.length());
}
line = in.readLine();
int indexLength = Integer.parseInt(line);
if (indexLength <= 0) {
in.close();
throw new Exception("Index of zero lenght! " + file.getAbsolutePath());
}
index = new long[indexLength][3];
records = 0;
int maxRecordLength = 0;
for (int i = 0; i < index.length; i++) {
line = in.readLine();
String[] result = line.split("\t");
for (int j = 0; j < 3; j++)
try {
index[i][j] = Long.parseLong(result[j]);
} catch (Exception x) {
in.close();
throw new Exception("Error reading index! " + result[j], x);
}
if (maxRecordLength < index[records][1]) maxRecordLength = (int) index[records][1];
records++;
}
line = in.readLine();
int indexLength2 = Integer.parseInt(line);
if (indexLength2 <= 0) {
in.close();
throw new Exception("Index of zero lenght!");
}
if (indexLength2 != indexLength) {
in.close();
throw new Exception("Wrong index length!");
}
line = in.readLine();
if (!line.equals(filename)) {
in.close();
throw new Exception("Index for " + line + " found instead of " + filename);
}
in.close();
b = new byte[maxRecordLength];
//fireFrameRead();
}
/**
* The index file {@link #getIndexFile(String)} is loaded, if already exists, or created a new.
* @throws Exception
*/
protected synchronized void makeIndex() throws Exception {
File indexFile = getIndexFile(filename);
if (indexFile.exists()) try {
loadIndex(indexFile);
setIndexCreated(true);
return;
} catch (Exception x) {
logger.warn(x.getMessage());
}
indexCreated = false;
long now = System.currentTimeMillis();
int recordLength = 1000;
int maxRecords = 1;
int maxRecordLength = 0;
maxRecords = (int) raFile.length() / recordLength;
if (maxRecords == 0) maxRecords = 1;
index = new long[maxRecords][3];
String s = null;
long start = 0;
long end = 0;
raFile.seek(0);
records = 0;
recordLength = 0;
while ((s = raFile.readLine()) != null) {
if (start == -1) start = raFile.getFilePointer();
if (isRecordEnd(s)) {
//fireFrameRead();
if (records >= maxRecords) {
index = resize(index,
records
+ (int) (records + (raFile.length() - records * raFile.getFilePointer())
/ recordLength));
}
end += 4;
index[records][0] = start;
index[records][1] = end - start;
index[records][2] = -1;
if (maxRecordLength < index[records][1]) maxRecordLength = (int) index[records][1];
records++;
recordLength += end - start;
start = raFile.getFilePointer();
} else {
end = raFile.getFilePointer();
}
}
b = new byte[maxRecordLength];
//fireFrameRead();
logger.info("Index created in " + (System.currentTimeMillis() - now) + " ms.");
try {
saveIndex(indexFile);
} catch (Exception x) {
logger.error(x);
}
}
/**
* Opens the file index _cdk.index in a temporary folder, as specified by "java.io.tmpdir" property.
*
* @param filename the name of the file for which the index was generated
* @return a file object representing the index file
*/
public static File getIndexFile(String filename) {
String tmpDir = System.getProperty("java.io.tmpdir");
File f = new File(filename);
File indexFile = new File(tmpDir, f.getName() + "_cdk.index");
f = null;
return indexFile;
}
/*
* (non-Javadoc)
* @see java.io.Closeable#close()
*/
public void close() throws IOException {
raFile.close();
//TODO
//removeChemObjectIOListener(listener)
}
public synchronized IChemObjectReader getChemObjectReader() {
return chemObjectReader;
}
public abstract ISimpleChemObjectReader createChemObjectReader();
public synchronized void setChemObjectReader(ISimpleChemObjectReader chemObjectReader) {
this.chemObjectReader = chemObjectReader;
}
/*
* (non-Javadoc)
* @see java.util.Iterator#hasNext()
*/
@Override
public boolean hasNext() {
return currentRecord < (records - 1);
}
@Override
public boolean hasPrevious() {
return currentRecord > 0;
}
@Override
public IChemObject first() {
try {
return readRecord(0);
} catch (Exception x) {
logger.error(x);
return null;
}
}
@Override
public IChemObject last() {
try {
return readRecord(records - 1);
} catch (Exception x) {
logger.error(x);
return null;
}
}
/*
* (non-Javadoc)
* @see java.util.Iterator#next()
*/
@Override
public IChemObject next() {
try {
return readRecord(currentRecord + 1);
} catch (Exception x) {
logger.error(x);
return null;
}
}
/*
* (non-Javadoc)
*/
@Override
public IChemObject previous() {
try {
return readRecord(currentRecord - 1);
} catch (Exception x) {
logger.error(x);
return null;
}
}
@Override
public void set(IChemObject arg0) {
}
@Override
public void add(IChemObject arg0) {
}
@Override
public int previousIndex() {
return currentRecord - 1;
}
@Override
public int nextIndex() {
return currentRecord + 1;
}
@Override
public int size() {
return records;
}
@Override
public void addChemObjectIOListener(IChemObjectIOListener listener) {
super.addChemObjectIOListener(listener);
if (chemObjectReader != null) chemObjectReader.addChemObjectIOListener(listener);
}
@Override
public void removeChemObjectIOListener(IChemObjectIOListener listener) {
super.removeChemObjectIOListener(listener);
if (chemObjectReader != null) chemObjectReader.removeChemObjectIOListener(listener);
}
public synchronized int getCurrentRecord() {
return currentRecord;
}
public synchronized boolean isIndexCreated() {
return indexCreated;
}
public synchronized void setIndexCreated(boolean indexCreated) {
this.indexCreated = indexCreated;
notifyAll();
}
private void indexTheFile() {
try {
setIndexCreated(false);
makeIndex();
currentRecord = 0;
raFile.seek(index[0][0]);
setIndexCreated(true);
} catch (Exception x) {
setIndexCreated(true);
}
}
@Override
public String toString() {
return filename;
}
}
class RecordReaderEvent extends ReaderEvent {
/**
*
*/
private static final long serialVersionUID = 572155905623474487L;
protected int record = 0;
public RecordReaderEvent(Object source, int record) {
super(source);
this.record = record;
}
public synchronized int getRecord() {
return record;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy