com.hfg.bio.seq.format.BufferedSeqReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.format;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import com.hfg.bio.seq.BioSequence;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.io.GZIP;
//------------------------------------------------------------------------------
/**
Buffered sequence reader.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class BufferedSeqReader
{
private ReadableSeqFormat mSeqFormatObj;
private BufferedReader mBufferedReader;
private boolean mEndOfContentReached;
private String mRecordStartLine;
private int mNumRecordsParsed;
private StringBuilderPlus mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n");
private List mCompressedRecordChunks;
private int mCurrentRecordLength = 0;
// How long the record should be before compression is used.
private static int sCompressionThreshold = 8 * 1024;
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public BufferedSeqReader(BufferedReader inReader, ReadableSeqFormat inSeqFormatObj)
{
mBufferedReader = inReader;
mSeqFormatObj = inSeqFormatObj;
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public void close()
throws IOException
{
mBufferedReader.close();
}
//---------------------------------------------------------------------------
public ReadableSeqFormat getSeqFormat()
{
return mSeqFormatObj;
}
//---------------------------------------------------------------------------
public synchronized boolean hasNext()
{
boolean result = false;
if (! endOfContentReached())
{
if (0 == mCurrentRecordLength)
{
readNextRecord();
}
result = mCurrentRecordLength > 0;
}
return result;
}
//---------------------------------------------------------------------------
public synchronized T next()
{
T nextSeq = null;
if (0 == mCurrentRecordLength)
{
readNextRecord();
}
if (mCurrentRecordLength > 0)
{
nextSeq = mSeqFormatObj.readRecord(getBufferedRecordReader());
// mNextRecord.setLength(0); // Clear the raw record
mCurrentRecordLength = 0;
}
return nextSeq;
}
//---------------------------------------------------------------------------
public List readAll()
{
List seqs = new ArrayList();
while (hasNext())
{
seqs.add(next());
}
return seqs;
}
//---------------------------------------------------------------------------
protected boolean endOfContentReached()
{
return mEndOfContentReached;
}
//---------------------------------------------------------------------------
private synchronized void readNextRecord()
{
if (! endOfContentReached())
{
// Start w/ a fresh record
// mNextRecord.setLength(0);
mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n");
mCompressedRecordChunks = null;
mCurrentRecordLength = 0;
if (mRecordStartLine != null)
{
// mNextRecord.appendln(mRecordStartLine);
mUncompressedRecord.appendln(mRecordStartLine);
}
try
{
String line;
while ((line = mBufferedReader.readLine()) != null)
{
if (line.length() > 0)
{
if (mSeqFormatObj.isEndOfRecord(line))
{
if (mSeqFormatObj.hasJanusDelimiter())
{
if (0 == mNumRecordsParsed
&& 0 == mCurrentRecordLength)
{
appendLineToCurrentRecord(line);
// mNextRecord.appendln(line);
}
else
{
mRecordStartLine = line;
break;
}
}
else
{
appendLineToCurrentRecord(line);
// mNextRecord.appendln(line);
break;
}
}
else
{
appendLineToCurrentRecord(line);
// mNextRecord.appendln(line);
}
}
}
if (null == line)
{
mEndOfContentReached = true;
}
}
catch (IOException e)
{
throw new SeqIOException(e);
}
}
// if (mNextRecord.length() > 0)
if (mCurrentRecordLength > 0)
{
mNumRecordsParsed++;
}
}
//--------------------------------------------------------------------------
// Note: inLine will not have a return at the end
private void appendLineToCurrentRecord(String inLine)
throws SeqIOException
{
mCurrentRecordLength += inLine.length() + 1;
mUncompressedRecord.appendln(inLine);
if (mUncompressedRecord.length() > sCompressionThreshold)
{
if (null == mCompressedRecordChunks)
{
mCompressedRecordChunks = new ArrayList<>();
}
mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString()));
mUncompressedRecord.setLength(0);
}
}
//--------------------------------------------------------------------------
private BufferedReader getBufferedRecordReader()
{
InputStream seqStream = null;
if (CollectionUtil.hasValues(mCompressedRecordChunks))
{
if (mUncompressedRecord.length() > 0)
{
mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString()));
}
seqStream = new RecordStreamer();
}
else if (mUncompressedRecord.length() > 0)
{
seqStream = new ByteArrayInputStream(mUncompressedRecord.toString().getBytes());
}
return new BufferedReader(new InputStreamReader(seqStream));
}
//##########################################################################
// INNER CLASSES
//##########################################################################
private class RecordStreamer extends InputStream
{
private String mCurrentChunk;
private int mCurrentChunkIndex;
private int mCharIndex;
private boolean mDone = false;
//-----------------------------------------------------------------------
public RecordStreamer()
{
mCurrentChunkIndex = 0;
}
//-----------------------------------------------------------------------
public int read()
{
return (mDone ? -1 : getNextChar());
}
//-----------------------------------------------------------------------
private char getNextChar()
{
if (null == mCurrentChunk)
{
mCurrentChunk = GZIP.uncompressToString(mCompressedRecordChunks.get(mCurrentChunkIndex));
mCharIndex = 0;
}
char nextChar = mCurrentChunk.charAt(mCharIndex++);
if (mCharIndex >= mCurrentChunk.length())
{
// This is the last char in this chunk.
mCurrentChunk = null;
mCurrentChunkIndex++;
if (mCurrentChunkIndex < 0 || mCurrentChunkIndex == mCompressedRecordChunks.size())
{
// This was the last chunk.
mDone = true;
}
}
return nextChar;
}
}
}