All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.format.BufferedSeqReader Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.format;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import com.hfg.bio.seq.BioSequence;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.io.GZIP;

//------------------------------------------------------------------------------
/**
 Buffered sequence reader.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class BufferedSeqReader { private ReadableSeqFormat mSeqFormatObj; private BufferedReader mBufferedReader; private boolean mEndOfContentReached; private String mRecordStartLine; private int mNumRecordsParsed; private StringBuilderPlus mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n"); private List mCompressedRecordChunks; private int mCurrentRecordLength = 0; // How long the record should be before compression is used. private static int sCompressionThreshold = 8 * 1024; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public BufferedSeqReader(BufferedReader inReader, ReadableSeqFormat inSeqFormatObj) { mBufferedReader = inReader; mSeqFormatObj = inSeqFormatObj; } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public void close() throws IOException { mBufferedReader.close(); } //--------------------------------------------------------------------------- public ReadableSeqFormat getSeqFormat() { return mSeqFormatObj; } //--------------------------------------------------------------------------- public synchronized boolean hasNext() { boolean result = false; if (! endOfContentReached()) { if (0 == mCurrentRecordLength) { readNextRecord(); } result = mCurrentRecordLength > 0; } return result; } //--------------------------------------------------------------------------- public synchronized T next() { T nextSeq = null; if (0 == mCurrentRecordLength) { readNextRecord(); } if (mCurrentRecordLength > 0) { nextSeq = mSeqFormatObj.readRecord(getBufferedRecordReader()); // mNextRecord.setLength(0); // Clear the raw record mCurrentRecordLength = 0; } return nextSeq; } //--------------------------------------------------------------------------- public List readAll() { List seqs = new ArrayList(); while (hasNext()) { seqs.add(next()); } return seqs; } //--------------------------------------------------------------------------- protected boolean endOfContentReached() { return mEndOfContentReached; } //--------------------------------------------------------------------------- private synchronized void readNextRecord() { if (! endOfContentReached()) { // Start w/ a fresh record // mNextRecord.setLength(0); mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n"); mCompressedRecordChunks = null; mCurrentRecordLength = 0; if (mRecordStartLine != null) { // mNextRecord.appendln(mRecordStartLine); mUncompressedRecord.appendln(mRecordStartLine); } try { String line; while ((line = mBufferedReader.readLine()) != null) { if (line.length() > 0) { if (mSeqFormatObj.isEndOfRecord(line)) { if (mSeqFormatObj.hasJanusDelimiter()) { if (0 == mNumRecordsParsed && 0 == mCurrentRecordLength) { appendLineToCurrentRecord(line); // mNextRecord.appendln(line); } else { mRecordStartLine = line; break; } } else { appendLineToCurrentRecord(line); // mNextRecord.appendln(line); break; } } else { appendLineToCurrentRecord(line); // mNextRecord.appendln(line); } } } if (null == line) { mEndOfContentReached = true; } } catch (IOException e) { throw new SeqIOException(e); } } // if (mNextRecord.length() > 0) if (mCurrentRecordLength > 0) { mNumRecordsParsed++; } } //-------------------------------------------------------------------------- // Note: inLine will not have a return at the end private void appendLineToCurrentRecord(String inLine) throws SeqIOException { mCurrentRecordLength += inLine.length() + 1; mUncompressedRecord.appendln(inLine); if (mUncompressedRecord.length() > sCompressionThreshold) { if (null == mCompressedRecordChunks) { mCompressedRecordChunks = new ArrayList<>(); } mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString())); mUncompressedRecord.setLength(0); } } //-------------------------------------------------------------------------- private BufferedReader getBufferedRecordReader() { InputStream seqStream = null; if (CollectionUtil.hasValues(mCompressedRecordChunks)) { if (mUncompressedRecord.length() > 0) { mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString())); } seqStream = new RecordStreamer(); } else if (mUncompressedRecord.length() > 0) { seqStream = new ByteArrayInputStream(mUncompressedRecord.toString().getBytes()); } return new BufferedReader(new InputStreamReader(seqStream)); } //########################################################################## // INNER CLASSES //########################################################################## private class RecordStreamer extends InputStream { private String mCurrentChunk; private int mCurrentChunkIndex; private int mCharIndex; private boolean mDone = false; //----------------------------------------------------------------------- public RecordStreamer() { mCurrentChunkIndex = 0; } //----------------------------------------------------------------------- public int read() { return (mDone ? -1 : getNextChar()); } //----------------------------------------------------------------------- private char getNextChar() { if (null == mCurrentChunk) { mCurrentChunk = GZIP.uncompressToString(mCompressedRecordChunks.get(mCurrentChunkIndex)); mCharIndex = 0; } char nextChar = mCurrentChunk.charAt(mCharIndex++); if (mCharIndex >= mCurrentChunk.length()) { // This is the last char in this chunk. mCurrentChunk = null; mCurrentChunkIndex++; if (mCurrentChunkIndex < 0 || mCurrentChunkIndex == mCompressedRecordChunks.size()) { // This was the last chunk. mDone = true; } } return nextChar; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy