All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.io.random.RandomAccessReader Maven / Gradle / Ivy

There is a newer version: 2.10
Show newest version
/* Copyright (C) 2005-2008   Nina Jeliazkova 
 *                    2009   Egon Willighagen 
 *
 * Contact: [email protected]
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 * All we ask is that proper credit is given for our work, which includes
 * - but is not limited to - adding the above copyright notice to the beginning
 * of your source code files, and to any copyright notice that you may distribute
 * with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package org.openscience.cdk.io.random;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.StringReader;

import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.io.IChemObjectReader;
import org.openscience.cdk.io.ISimpleChemObjectReader;
import org.openscience.cdk.io.ReaderEvent;
import org.openscience.cdk.io.listener.IChemObjectIOListener;
import org.openscience.cdk.io.listener.IReaderListener;
import org.openscience.cdk.io.setting.IOSetting;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;

/**
 * Random access to text files of compounds.
 * Reads the file as a text and builds an index file, if the index file doesn't already exist.
 * The index stores offset, length and a third field reserved for future use.
 * Subsequent access for a record N uses this index to seek the record and return the molecule.
 * Useful for very big files.
 *
 * @author     Nina Jeliazkova 
 * @cdk.module io
 * @cdk.githash
 */
public abstract class RandomAccessReader extends DefaultRandomAccessChemObjectReader implements
        IRandomAccessChemObjectReader {

    protected static ILoggingTool     logger        = LoggingToolFactory.createLoggingTool(RandomAccessReader.class);
    protected RandomAccessFile        raFile;
    protected IOSetting[]             headerOptions = null;
    private final String              filename;
    protected ISimpleChemObjectReader chemObjectReader;
    protected int                     indexVersion  = 1;
    /*
     * index[record][0] - record offset in file index[record][1] - record length
     * index[record][2] - number of atoms (if available)
     */
    protected long[][]                index         = null;
    protected int                     records;
    protected int                     currentRecord = 0;
    protected byte[]                  b;
    protected IChemObjectBuilder      builder;
    protected boolean                 indexCreated  = false;

    /**
     * Reads the file and builds an index file, if the index file doesn't already exist.
     *
     * @param file the file object containg the molecules to be indexed
     * @param builder a chem object builder
     * @throws IOException if there is an error during reading
     */
    public RandomAccessReader(File file, IChemObjectBuilder builder) throws IOException {
        this(file, builder, null);
    }

    /**
    * Reads the file and builds an index file, if the index file doesn't already exist.
     *
     * @param file file the file object containg the molecules to be indexed
     * @param builder builder a chem object builder
     * @param listener listen for read event
     * @throws IOException if there is an error during reading
     */
    public RandomAccessReader(File file, IChemObjectBuilder builder, IReaderListener listener) throws IOException {
        super();
        this.filename = file.getAbsolutePath();
        this.builder = builder;
        setChemObjectReader(createChemObjectReader());
        if (listener != null) addChemObjectIOListener(listener);
        raFile = new RandomAccessFile(file, "r");
        records = 0;
        setIndexCreated(false);
        indexTheFile();

    }

    @Override
    protected void finalize() throws Throwable {
        try {
            close();
        } catch (Exception x) {
            logger.debug("Error during finalize");
        }
        super.finalize();
    }

    /**
     * Returns the object at given record No.
     *
     * Record numbers are zero-based!
     */
    @Override
    public synchronized IChemObject readRecord(int record) throws Exception {
        String buffer = readContent(record);
        if (chemObjectReader == null)
            throw new CDKException("No chemobject reader!");
        else {
            chemObjectReader.setReader(new StringReader(buffer));
            currentRecord = record;
            return processContent();
        }
    }

    /**
     * Reads the record text content into a String.
     *
     * @param record The record number
     * @return  A String representation of the record
     * @throws java.io.IOException if error occurs during reading
     * @throws org.openscience.cdk.exception.CDKException if the record number is invalid
     */
    protected String readContent(int record) throws IOException, CDKException {
        logger.debug("Current record ", record);

        if ((record < 0) || (record >= records)) {
            throw new CDKException("No such record " + record);
        }
        //fireFrameRead();

        raFile.seek(index[record][0]);
        int length = (int) index[record][1];
        raFile.read(b, 0, length);
        return new String(b, 0, length);
    }

    /**
     * The reader is already set to read the record buffer.
     * @return the read IChemObject
     * @throws CDKException an error occurred whilst reading the file
     */
    protected IChemObject processContent() throws CDKException {
        return chemObjectReader.read(builder.newInstance(IChemFile.class));
    }

    protected long[][] resize(long[][] index, int newLength) {
        long[][] newIndex = new long[newLength][3];
        for (int i = 0; i < index.length; i++) {
            newIndex[i][0] = index[i][0];
            newIndex[i][1] = index[i][1];
            newIndex[i][2] = index[i][2];
        }
        return newIndex;

    }

    protected abstract boolean isRecordEnd(String line);

    protected synchronized void saveIndex(File file) throws Exception {
        if (records == 0) {
            file.delete();
            return;
        }
        FileWriter out = new FileWriter(file);
        out.write(Integer.toString(indexVersion));
        out.write('\n');
        out.write(filename);
        out.write('\n');
        out.write(Long.toString(raFile.length()));
        out.write('\n');
        out.write(Integer.toString(records));
        out.write('\n');
        for (int i = 0; i < records; i++) {
            out.write(Long.toString(index[i][0]));
            out.write('\t');
            out.write(Long.toString(index[i][1]));
            out.write('\t');
            out.write(Long.toString(index[i][2]));
            out.write('\n');
        }
        out.write(Integer.toString(records));
        out.write('\n');
        out.write(filename);
        out.write('\n');
        out.close();
    }

    protected synchronized void loadIndex(File file) throws Exception {
        BufferedReader in = new BufferedReader(new FileReader(file));
        String version = in.readLine();
        try {
            if (Integer.parseInt(version) != indexVersion) {
                in.close();
                throw new Exception("Expected index version " + indexVersion + " instead of " + version);
            }
        } catch (Exception x) {
            in.close();
            throw new Exception("Invalid index version " + version);
        }
        String fileIndexed = in.readLine();
        if (!filename.equals(fileIndexed)) {
            in.close();
            throw new Exception("Index for " + fileIndexed + " found instead of " + filename + ". Creating new index.");
        }
        String line = in.readLine();
        int fileLength = Integer.parseInt(line);
        if (fileLength != raFile.length()) {
            in.close();
            throw new Exception("Index for file of size " + fileLength + " found instead of " + raFile.length());
        }
        line = in.readLine();
        int indexLength = Integer.parseInt(line);
        if (indexLength <= 0) {
            in.close();
            throw new Exception("Index of zero lenght! " + file.getAbsolutePath());
        }
        index = new long[indexLength][3];
        records = 0;
        int maxRecordLength = 0;
        for (int i = 0; i < index.length; i++) {
            line = in.readLine();
            String[] result = line.split("\t");
            for (int j = 0; j < 3; j++)
                try {
                    index[i][j] = Long.parseLong(result[j]);

                } catch (Exception x) {
                    in.close();
                    throw new Exception("Error reading index! " + result[j], x);
                }

            if (maxRecordLength < index[records][1]) maxRecordLength = (int) index[records][1];
            records++;
        }

        line = in.readLine();
        int indexLength2 = Integer.parseInt(line);
        if (indexLength2 <= 0) {
            in.close();
            throw new Exception("Index of zero lenght!");
        }
        if (indexLength2 != indexLength) {
            in.close();
            throw new Exception("Wrong index length!");
        }
        line = in.readLine();
        if (!line.equals(filename)) {
            in.close();
            throw new Exception("Index for " + line + " found instead of " + filename);
        }
        in.close();

        b = new byte[maxRecordLength];
        //fireFrameRead();
    }

    /**
     * The index file {@link #getIndexFile(String)} is loaded, if already exists, or created a new.
     * @throws Exception
     */
    protected synchronized void makeIndex() throws Exception {
        File indexFile = getIndexFile(filename);
        if (indexFile.exists()) try {
            loadIndex(indexFile);
            setIndexCreated(true);
            return;
        } catch (Exception x) {
            logger.warn(x.getMessage());
        }
        indexCreated = false;
        long now = System.currentTimeMillis();
        int recordLength = 1000;
        int maxRecords = 1;
        int maxRecordLength = 0;
        maxRecords = (int) raFile.length() / recordLength;
        if (maxRecords == 0) maxRecords = 1;
        index = new long[maxRecords][3];

        String s = null;
        long start = 0;
        long end = 0;
        raFile.seek(0);
        records = 0;
        recordLength = 0;
        while ((s = raFile.readLine()) != null) {
            if (start == -1) start = raFile.getFilePointer();
            if (isRecordEnd(s)) {
                //fireFrameRead();
                if (records >= maxRecords) {
                    index = resize(index,
                            records
                                    + (int) (records + (raFile.length() - records * raFile.getFilePointer())
                                            / recordLength));
                }
                end += 4;
                index[records][0] = start;
                index[records][1] = end - start;
                index[records][2] = -1;
                if (maxRecordLength < index[records][1]) maxRecordLength = (int) index[records][1];
                records++;
                recordLength += end - start;

                start = raFile.getFilePointer();
            } else {
                end = raFile.getFilePointer();
            }

        }
        b = new byte[maxRecordLength];
        //fireFrameRead();
        logger.info("Index created in " + (System.currentTimeMillis() - now) + " ms.");
        try {
            saveIndex(indexFile);
        } catch (Exception x) {
            logger.error(x);
        }
    }

    /**
     * Opens the file index _cdk.index in a temporary folder, as specified by "java.io.tmpdir" property.
     *
     * @param filename the name of the file for which the index was generated
     * @return a file object representing the index file
     */
    public static File getIndexFile(String filename) {
        String tmpDir = System.getProperty("java.io.tmpdir");
        File f = new File(filename);
        File indexFile = new File(tmpDir, f.getName() + "_cdk.index");
        f = null;
        return indexFile;
    }

    /*
     * (non-Javadoc)
     * @see java.io.Closeable#close()
     */
    public void close() throws IOException {
        raFile.close();
        //TODO
        //removeChemObjectIOListener(listener)

    }

    public synchronized IChemObjectReader getChemObjectReader() {
        return chemObjectReader;
    }

    public abstract ISimpleChemObjectReader createChemObjectReader();

    public synchronized void setChemObjectReader(ISimpleChemObjectReader chemObjectReader) {
        this.chemObjectReader = chemObjectReader;
    }

    /*
     * (non-Javadoc)
     * @see java.util.Iterator#hasNext()
     */
    @Override
    public boolean hasNext() {
        return currentRecord < (records - 1);
    }

    @Override
    public boolean hasPrevious() {
        return currentRecord > 0;
    }

    @Override
    public IChemObject first() {
        try {
            return readRecord(0);
        } catch (Exception x) {
            logger.error(x);
            return null;
        }
    }

    @Override
    public IChemObject last() {
        try {
            return readRecord(records - 1);
        } catch (Exception x) {
            logger.error(x);
            return null;
        }
    }

    /*
     * (non-Javadoc)
     * @see java.util.Iterator#next()
     */
    @Override
    public IChemObject next() {
        try {
            return readRecord(currentRecord + 1);
        } catch (Exception x) {
            logger.error(x);
            return null;
        }
    }

    /*
     * (non-Javadoc)
     */
    @Override
    public IChemObject previous() {
        try {
            return readRecord(currentRecord - 1);
        } catch (Exception x) {
            logger.error(x);
            return null;
        }
    }

    @Override
    public void set(IChemObject arg0) {

    }

    @Override
    public void add(IChemObject arg0) {

    }

    @Override
    public int previousIndex() {
        return currentRecord - 1;
    }

    @Override
    public int nextIndex() {
        return currentRecord + 1;
    }

    @Override
    public int size() {
        return records;
    }

    @Override
    public void addChemObjectIOListener(IChemObjectIOListener listener) {
        super.addChemObjectIOListener(listener);
        if (chemObjectReader != null) chemObjectReader.addChemObjectIOListener(listener);
    }

    @Override
    public void removeChemObjectIOListener(IChemObjectIOListener listener) {
        super.removeChemObjectIOListener(listener);
        if (chemObjectReader != null) chemObjectReader.removeChemObjectIOListener(listener);
    }

    public synchronized int getCurrentRecord() {
        return currentRecord;
    }

    public synchronized boolean isIndexCreated() {
        return indexCreated;
    }

    public synchronized void setIndexCreated(boolean indexCreated) {
        this.indexCreated = indexCreated;
        notifyAll();
    }

    private void indexTheFile() {
        try {
            setIndexCreated(false);
            makeIndex();
            currentRecord = 0;
            raFile.seek(index[0][0]);
            setIndexCreated(true);
        } catch (Exception x) {
            setIndexCreated(true);
        }
    }

    @Override
    public String toString() {
        return filename;
    }

}

class RecordReaderEvent extends ReaderEvent {

    /**
     *
     */
    private static final long serialVersionUID = 572155905623474487L;
    protected int             record           = 0;

    public RecordReaderEvent(Object source, int record) {
        super(source);
        this.record = record;
    }

    public synchronized int getRecord() {
        return record;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy