marytts.unitselection.data.TimelineReader Maven / Gradle / Ivy
The newest version!
/**
* Portions Copyright 2006 DFKI GmbH.
* Portions Copyright 2001 Sun Microsystems, Inc.
* Portions Copyright 1999-2001 Language Technologies Institute,
* Carnegie Mellon University.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package marytts.unitselection.data;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.UTFDataFormatException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Vector;
import marytts.exceptions.MaryConfigurationException;
import marytts.util.MaryUtils;
import marytts.util.Pair;
import marytts.util.data.Datagram;
import marytts.util.data.MaryHeader;
import marytts.util.io.StreamUtils;
/**
* The TimelineReader class provides an interface to read regularly or variably spaced datagrams from a Timeline data file in Mary
* format.
*
* @author sacha, marc
*
*/
public class TimelineReader {
protected MaryHeader maryHdr = null; // The standard Mary header
protected ProcHeader procHdr = null; // The processing info header
protected Index idx = null; // A global time index for the variable-sized datagrams
/* Some specific header fields: */
protected int sampleRate = 0;
protected long numDatagrams = 0;
/**
* The total duration of the timeline data, in samples. This is only computed upon request.
*/
protected long totalDuration = -1;
protected int datagramsBytePos = 0;
protected int timeIdxBytePos = 0;
// exactly one of the two following variables will be non-null after load():
private MappedByteBuffer mappedBB = null;
private FileChannel fileChannel = null;
/****************/
/* CONSTRUCTORS */
/****************/
/**
* Construct a timeline from the given file name.
*
* Aiming for the fundamental guarantee: If an instance of this class is created, it is usable.
*
* @param fileName
* The file to read the timeline from. Must be non-null and point to a valid timeline file.
* @throws NullPointerException
* if null argument is given
* @throws MaryConfigurationException
* if no timeline reader can be instantiated from fileName
*/
public TimelineReader(String fileName) throws MaryConfigurationException {
this(fileName, true);
}
/**
* Construct a timeline from the given file name.
*
* Aiming for the fundamental guarantee: If an instance of this class is created, it is usable.
*
* @param fileName
* The file to read the timeline from. Must be non-null and point to a valid timeline file.
* @param tryMemoryMapping
* if true, will attempt to read audio data via a memory map, and fall back to piecewise reading. If false, will
* immediately go for piecewise reading using a RandomAccessFile.
* @throws NullPointerException
* if null argument is given
* @throws MaryConfigurationException
* if no timeline reader can be instantiated from fileName
*/
public TimelineReader(String fileName, boolean tryMemoryMapping) throws MaryConfigurationException {
if (fileName == null) {
throw new NullPointerException("Filename is null");
}
try {
load(fileName, tryMemoryMapping);
} catch (Exception e) {
throw new MaryConfigurationException("Cannot load timeline file from " + fileName, e);
}
}
/**
* Only subclasses can instantiate a TimelineReader object that doesn't call {@link #load(String)}. It is their responsibility
* then to ensure the fundamental guarantee.
*/
protected TimelineReader() {
}
/**
* Load a timeline from a file.
*
* @param fileName
* The file to read the timeline from. Must be non-null and point to a valid timeline file.
*
* @throws IOException
* if a problem occurs during reading
* @throws BufferUnderflowException
* if a problem occurs during reading
* @throws MaryConfigurationException
* if fileName does not point to a valid timeline file
*/
protected void load(String fileName) throws IOException, BufferUnderflowException, MaryConfigurationException,
NullPointerException {
load(fileName, true);
}
/**
* Load a timeline from a file.
*
* @param fileName
* The file to read the timeline from. Must be non-null and point to a valid timeline file.
* @param tryMemoryMapping
* tryMemoryMapping
* @throws IOException
* if a problem occurs during reading
* @throws BufferUnderflowException
* if a problem occurs during reading
* @throws MaryConfigurationException
* if fileName does not point to a valid timeline file
* @throws NullPointerException
* NullPointerException
*/
protected void load(String fileName, boolean tryMemoryMapping) throws IOException, BufferUnderflowException,
MaryConfigurationException, NullPointerException {
assert fileName != null : "filename is null";
RandomAccessFile file = new RandomAccessFile(fileName, "r");
FileChannel fc = file.getChannel();
// Expect header to be no bigger than 64k bytes
ByteBuffer headerBB = ByteBuffer.allocate(0x10000);
fc.read(headerBB);
headerBB.limit(headerBB.position());
headerBB.position(0);
maryHdr = new MaryHeader(headerBB);
if (maryHdr.getType() != MaryHeader.TIMELINE) {
throw new MaryConfigurationException("File is not a valid timeline file.");
}
/* Load the processing info header */
procHdr = new ProcHeader(headerBB);
/* Load the timeline dimensions */
sampleRate = headerBB.getInt();
numDatagrams = headerBB.getLong();
if (sampleRate <= 0 || numDatagrams < 0) {
throw new MaryConfigurationException("Illegal values in timeline file.");
}
/* Load the positions of the various subsequent components */
datagramsBytePos = (int) headerBB.getLong();
timeIdxBytePos = (int) headerBB.getLong();
if (timeIdxBytePos < datagramsBytePos) {
throw new MaryConfigurationException("File seems corrupt: index is expected after data, not before");
}
/* Go fetch the time index at the end of the file */
fc.position(timeIdxBytePos);
ByteBuffer indexBB = ByteBuffer.allocate((int) (fc.size() - timeIdxBytePos));
fc.read(indexBB);
indexBB.limit(indexBB.position());
indexBB.position(0);
idx = new Index(indexBB);
if (tryMemoryMapping) {
// Try if we can use a mapped byte buffer:
try {
mappedBB = fc.map(FileChannel.MapMode.READ_ONLY, datagramsBytePos, timeIdxBytePos - datagramsBytePos);
file.close(); // if map() succeeded, we don't need the file anymore.
} catch (IOException ome) {
MaryUtils.getLogger("Timeline").warn(
"Cannot use memory mapping for timeline file '" + fileName + "' -- falling back to piecewise reading");
}
}
if (!tryMemoryMapping || mappedBB == null) { // use piecewise reading
fileChannel = fc;
assert fileChannel != null;
// and leave file open
}
// postconditions:
assert idx != null;
assert procHdr != null;
assert fileChannel == null && mappedBB != null || fileChannel != null && mappedBB == null;
}
/**
* Return the content of the processing header as a String.
*
* @return a non-null string representing the proc header.
*/
public String getProcHeaderContents() {
return procHdr.getString();
}
/**
* Returns the number of datagrams in the timeline.
*
* @return the (non-negative) number of datagrams, as a long.
*/
public long getNumDatagrams() {
assert numDatagrams >= 0;
return numDatagrams;
}
/**
* Returns the position of the datagram zone in the original file.
*
* @return the byte position of the datagram zone.
*/
protected long getDatagramsBytePos() {
return datagramsBytePos;
}
/**
* Returns the timeline's sample rate.
*
* @return the sample rate as a positive integer.
*/
public int getSampleRate() {
assert sampleRate > 0;
return sampleRate;
}
/**
* Return the total duration of all data in this timeline. Implementation note: this is an expensive operation that should not
* be used in production.
*
* @return a non-negative long representing the accumulated duration of all datagrams.
* @throws MaryConfigurationException
* if the duration cannot be obtained.
*/
public long getTotalDuration() throws MaryConfigurationException {
if (totalDuration == -1) {
computeTotalDuration();
}
assert totalDuration >= 0;
return totalDuration;
}
/**
* Compute the total duration of a timeline. This is an expensive method, since it goes through all datagrams to compute this
* duration. It should not normally be used in production.
*
* @throws MaryConfigurationException
* if the duration could not be computed.
*/
protected void computeTotalDuration() throws MaryConfigurationException {
long time = 0;
long nRead = 0;
boolean haveReadAll = false;
try {
Pair p = getByteBufferAtTime(0);
ByteBuffer bb = p.getFirst();
assert p.getSecond() == 0;
while (!haveReadAll) {
Datagram dat = getNextDatagram(bb);
if (dat == null) {
// we may have reached the end of the current byte buffer... try reading another:
p = getByteBufferAtTime(time);
bb = p.getFirst();
assert p.getSecond() == time;
dat = getNextDatagram(bb);
if (dat == null) { // no, indeed we cannot read any more
break; // abort, we could not read all
}
}
assert dat != null;
time += dat.getDuration(); // duration in timeline sample rate
nRead++; // number of datagrams read
if (nRead == numDatagrams) {
haveReadAll = true;
}
}
} catch (Exception e) {
throw new MaryConfigurationException("Could not compute total duration", e);
}
if (!haveReadAll) {
throw new MaryConfigurationException("Could not read all datagrams to compute total duration");
}
totalDuration = time;
}
/**
* The index object.
*
* @return the non-null index object.
*/
public Index getIndex() {
assert idx != null;
return idx;
}
// Helper methods
/**
* Scales a discrete time to the timeline's sample rate.
*
* @param reqSampleRate
* the externally given sample rate.
* @param targetTimeInSamples
* a discrete time, with respect to the externally given sample rate.
* @return a discrete time, in samples with respect to the timeline's sample rate.
*/
protected long scaleTime(int reqSampleRate, long targetTimeInSamples) {
if (reqSampleRate == sampleRate)
return (targetTimeInSamples);
/* else */return ((long) Math.round((double) (reqSampleRate) * (double) (targetTimeInSamples) / (double) (sampleRate)));
}
/**
* Unscales a discrete time from the timeline's sample rate.
*
* @param reqSampleRate
* the externally given sample rate.
* @param timelineTimeInSamples
* a discrete time, with respect to the timeline sample rate.
* @return a discrete time, in samples with respect to the externally given sample rate.
*/
protected long unScaleTime(int reqSampleRate, long timelineTimeInSamples) {
if (reqSampleRate == sampleRate)
return (timelineTimeInSamples);
/* else */return ((long) Math.round((double) (sampleRate) * (double) (timelineTimeInSamples) / (double) (reqSampleRate)));
}
/******************/
/* DATA ACCESSORS */
/******************/
/**
* Skip the upcoming datagram at the current position of the byte buffer.
*
* @param bb
* bb
* @return the duration of the datagram we skipped
* @throws IOException
* if we cannot skip another datagram because we have reached the end of the byte buffer
*/
protected long skipNextDatagram(ByteBuffer bb) throws IOException {
long datagramDuration = bb.getLong();
int datagramSize = bb.getInt();
if (bb.position() + datagramSize > bb.limit()) {
throw new IOException("cannot skip datagram: it is not fully contained in byte buffer");
}
bb.position(bb.position() + datagramSize);
return datagramDuration;
}
/**
* Read and return the upcoming datagram from the given byte buffer. Subclasses should override this method to create
* subclasses of Datagram.
*
* @param bb
* the timeline byte buffer to read from
*
* @return the current datagram, or null if EOF was encountered
*/
protected Datagram getNextDatagram(ByteBuffer bb) {
assert bb != null;
// If the end of the datagram zone is reached, refuse to read
if (bb.position() == bb.limit()) {
return null;
}
// Else, read the datagram from the file
try {
return new Datagram(bb);
} catch (IOException ioe) {
return null;
}
}
/**
* Hop the datagrams in the given byte buffer until the one which begins at or contains the desired time (time is in samples;
* the sample rate is assumed to be that of the timeline).
*
* @param bb
* the timeline byte buffer to use. Must not be null.
* @param currentTimeInSamples
* the time position corresponding to the current position of the byte buffer. Must not be negative.
* @param targetTimeInSamples
* the time location to reach. Must not be less than currentTimeInSamples
*
* @return the actual time at which we end up after hopping. This is less than or equal to targetTimeInSamples, never greater
* than it.
* @throws IOException
* if there is a problem skipping the datagrams
* @throws IllegalArgumentException
* if targetTimeInSamples is less than currentTimeInSamples
*/
protected long hopToTime(ByteBuffer bb, long currentTimeInSamples, long targetTimeInSamples) throws IOException,
IllegalArgumentException {
assert bb != null;
assert currentTimeInSamples >= 0;
assert targetTimeInSamples >= currentTimeInSamples : "Cannot hop back from time " + currentTimeInSamples + " to time "
+ targetTimeInSamples;
/*
* If the current time position is the requested time do nothing, you are already at the right position
*/
if (currentTimeInSamples == targetTimeInSamples) {
return currentTimeInSamples;
}
/* Else hop: */
int byteBefore = bb.position();
long timeBefore = currentTimeInSamples;
/* Hop until the datagram which comes just after the requested time */
while (currentTimeInSamples <= targetTimeInSamples) { // Stop after the requested time, we will step back
// to the correct time in case of equality
timeBefore = currentTimeInSamples;
byteBefore = bb.position();
long skippedDuration = skipNextDatagram(bb);
currentTimeInSamples += skippedDuration;
}
/* Do one step back so that the pointed datagram contains the requested time */
bb.position(byteBefore);
return timeBefore;
}
/**
* This method produces a new byte buffer whose current position represents the requested positionInFile. It cannot be assumed
* that a call to byteBuffer.position() produces any meaningful values. The byte buffer may represent only a part of the
* available data; however, at least one datagram can be read from the byte buffer. If no further data can be read from it, a
* new byte buffer must be obtained by calling this method again with a new target time.
*
* @param targetTimeInSamples
* the time position in the file which should be accessed as a byte buffer, in samples. Must be non-negative and
* less than the total duration of the timeline.
* @return a pair representing the byte buffer from which to read, and the exact time corresponding to the current position of
* the byte buffer. The position as such is not meaningful; the time is guaranteed to be less than or equal to
* targetTimeInSamples.
* @throws IOException
* IOException
* @throws BufferUnderflowException
* , BufferUnderflowException if no byte buffer can be obtained for the requested time.
*/
protected Pair getByteBufferAtTime(long targetTimeInSamples) throws IOException, BufferUnderflowException {
if (mappedBB != null) {
return getMappedByteBufferAtTime(targetTimeInSamples);
} else {
return loadByteBufferAtTime(targetTimeInSamples);
}
}
protected Pair getMappedByteBufferAtTime(long targetTimeInSamples) throws IllegalArgumentException,
IOException {
assert mappedBB != null;
/* Seek for the time index which comes just before the requested time */
IdxField idxFieldBefore = idx.getIdxFieldBefore(targetTimeInSamples);
long time = idxFieldBefore.timePtr;
int bytePos = (int) (idxFieldBefore.bytePtr - datagramsBytePos);
ByteBuffer bb = mappedBB.duplicate();
bb.position(bytePos);
time = hopToTime(bb, time, targetTimeInSamples);
return new Pair(bb, time);
}
protected Pair loadByteBufferAtTime(long targetTimeInSamples) throws IOException {
assert fileChannel != null;
// we must load a chunk of data from the FileChannel
int bufSize = 0x10000; // 64 kB
/* Seek for the time index which comes just before the requested time */
IdxField idxFieldBefore = idx.getIdxFieldBefore(targetTimeInSamples);
long time = idxFieldBefore.timePtr;
long bytePos = idxFieldBefore.bytePtr;
if (bytePos + bufSize > timeIdxBytePos) { // must not read index data as datagrams
bufSize = (int) (timeIdxBytePos - bytePos);
}
ByteBuffer bb = loadByteBuffer(bytePos, bufSize);
while (true) {
if (!canReadDatagramHeader(bb)) {
bb = loadByteBuffer(bytePos, bufSize);
assert canReadDatagramHeader(bb);
}
int posBefore = bb.position();
Datagram d = new Datagram(bb, false);
if (time + d.getDuration() > targetTimeInSamples) { // d is our datagram
bb.position(posBefore);
int datagramNumBytes = Datagram.NUM_HEADER_BYTES + d.getLength();
// need to make sure we return a byte buffer from which d can be read
if (!canReadAmount(bb, datagramNumBytes)) {
bb = loadByteBuffer(bytePos, Math.max(datagramNumBytes, bufSize));
}
assert canReadAmount(bb, datagramNumBytes);
break;
} else {
// keep on skipping
time += d.getDuration();
if (canReadAmount(bb, d.getLength())) {
bb.position(bb.position() + d.getLength());
} else {
bytePos += bb.position();
bytePos += d.getLength();
bb = loadByteBuffer(bytePos, bufSize);
}
}
}
return new Pair(bb, time);
}
/**
* @param bytePos
* position in fileChannel from which to load the byte buffer
* @param bufSize
* size of the byte buffer
* @return the byte buffer, loaded and set such that limit is bufSize and position is 0
* @throws IOException
* if the data cannot be read from fileChannel
*/
private ByteBuffer loadByteBuffer(long bytePos, int bufSize) throws IOException {
ByteBuffer bb = ByteBuffer.allocate(bufSize);
fileChannel.read(bb, bytePos); // this will block if another thread is currently reading from fileChannel
bb.limit(bb.position());
bb.position(0);
return bb;
}
private boolean canReadDatagramHeader(ByteBuffer bb) {
return canReadAmount(bb, Datagram.NUM_HEADER_BYTES);
}
private boolean canReadAmount(ByteBuffer bb, int amount) {
return bb.limit() - bb.position() >= amount;
}
/**
* Get a single datagram from a particular time location, given in the timeline's sampling rate.
*
* @param targetTimeInSamples
* the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
*
* @return the datagram starting at or overlapping the given time, or null if end-of-file was encountered
* @throws IOException
* , BufferUnderflowException if no datagram could be created from the data at the given time.
*/
public Datagram getDatagram(long targetTimeInSamples) throws IOException {
Pair p = getByteBufferAtTime(targetTimeInSamples);
ByteBuffer bb = p.getFirst();
return getNextDatagram(bb);
}
/**
* Get a single datagram from a particular time location.
*
* @param targetTimeInSamples
* the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
* @param reqSampleRate
* the sample rate for the requested times.
*
* @return the datagram starting at or overlapping the given time, or null if end-of-file was encountered
* @throws IOException
* if no datagram could be created from the data at the given time.
*/
public Datagram getDatagram(long targetTimeInSamples, int reqSampleRate) throws IOException {
/*
* Resample the requested time location, in case the sample times are different between the request and the timeline
*/
long scaledTargetTime = scaleTime(reqSampleRate, targetTimeInSamples);
Datagram dat = getDatagram(scaledTargetTime);
if (dat == null)
return null;
if (reqSampleRate != sampleRate)
dat.setDuration(unScaleTime(reqSampleRate, dat.getDuration())); // => Don't forget to stay time-consistent!
return dat;
}
/**
* Get the datagrams spanning a particular time range from a particular time location, and return the time offset between the
* time request and the actual location of the first returned datagram. Irrespective of the values of nDatagrams and
* timeSpanInSamples, at least one datagram is always returned.
*
* @param targetTimeInSamples
* the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
* @param nDatagrams
* the number of datagrams to read. Ignored if timeSpanInSamples is positive.
* @param timeSpanInSamples
* the requested time span, in samples. If positive, then datagrams are selected by the given time span.
* @param reqSampleRate
* the sample rate for the requested and returned times. Must be positive.
* @param returnOffset
* an optional output field. If it is not null, then after the call it must have length of at least 1, and the
* first array field will contain the time difference, in samples, between the time request and the actual
* beginning of the first datagram.
*
* @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
* the number of datagrams that can be read is returned.
* @throws IllegalArgumentException
* if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
* @throws IOException
* if no data can be read at the given target time
*/
private Datagram[] getDatagrams(long targetTimeInSamples, int nDatagrams, long timeSpanInSamples, int reqSampleRate,
long[] returnOffset) throws IllegalArgumentException, IOException {
/* Check the input arguments */
if (targetTimeInSamples < 0) {
throw new IllegalArgumentException("Can't get a datagram from a negative time position (given time position was ["
+ targetTimeInSamples + "]).");
}
if (reqSampleRate <= 0) {
throw new IllegalArgumentException("sample rate must be positive, but is " + reqSampleRate);
}
// Get the datagrams by number or by time span?
boolean byNumber;
if (timeSpanInSamples > 0) {
byNumber = false;
} else {
byNumber = true;
if (nDatagrams <= 0) {
nDatagrams = 1; // return at least one datagram
}
}
/*
* Resample the requested time location, in case the sample times are different between the request and the timeline
*/
long scaledTargetTime = scaleTime(reqSampleRate, targetTimeInSamples);
Pair p = getByteBufferAtTime(scaledTargetTime);
ByteBuffer bb = p.getFirst();
long time = p.getSecond();
if (returnOffset != null) { // return offset between target and actual start time
if (returnOffset.length == 0) {
throw new IllegalArgumentException("If returnOffset is given, it must have length of at least 1");
}
returnOffset[0] = unScaleTime(reqSampleRate, (scaledTargetTime - time));
}
ArrayList datagrams = new ArrayList(byNumber ? nDatagrams : 10);
// endTime is stop criterion if reading by time scale:
long endTime = byNumber ? -1 : scaleTime(reqSampleRate, (targetTimeInSamples + timeSpanInSamples));
int nRead = 0;
boolean haveReadAll = false;
while (!haveReadAll) {
Datagram dat = getNextDatagram(bb);
if (dat == null) {
// we may have reached the end of the current byte buffer... try reading another:
try {
p = getByteBufferAtTime(time);
} catch (Exception ioe) {
// cannot get another byte buffer -- stop reading.
break;
}
bb = p.getFirst();
dat = getNextDatagram(bb);
if (dat == null) { // no, indeed we cannot read any more
break; // abort, we could not read all
}
}
assert dat != null;
time += dat.getDuration(); // duration in timeline sample rate
nRead++; // number of datagrams read
if (reqSampleRate != sampleRate) {
dat.setDuration(unScaleTime(reqSampleRate, dat.getDuration())); // convert duration into reqSampleRate
}
datagrams.add(dat);
if (byNumber && nRead == nDatagrams || !byNumber && time >= endTime) {
haveReadAll = true;
}
}
return (Datagram[]) datagrams.toArray(new Datagram[0]);
}
// ///////////////////// Convenience methods: variants of getDatagrams() ///////////////////////
// ///////////////////// by time span ////////////////////////////
/**
* Get the datagrams spanning a particular time range from a particular time location, and return the time offset between the
* time request and the actual location of the first returned datagram. Irrespective of the value of timeSpanInSamples, at
* least one datagram is always returned.
*
* @param targetTimeInSamples
* the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
* @param timeSpanInSamples
* the requested time span, in samples. If positive, then datagrams are selected by the given time span.
* @param reqSampleRate
* the sample rate for the requested and returned times. Must be positive.
* @param returnOffset
* an optional output field. If it is not null, then after the call it must have length of at least 1, and the
* first array field will contain the time difference, in samples, between the time request and the actual
* beginning of the first datagram.
*
* @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
* the number of datagrams that can be read is returned.
* @throws IllegalArgumentException
* if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
* @throws IOException
* , BufferUnderflowException if no data can be read at the given target time
*/
public Datagram[] getDatagrams(long targetTimeInSamples, long timeSpanInSamples, int reqSampleRate, long[] returnOffset)
throws IOException {
return getDatagrams(targetTimeInSamples, -1, timeSpanInSamples, reqSampleRate, returnOffset);
}
/**
* Get the datagrams spanning a particular time range from a particular time location. Irrespective of the value of
* timeSpanInSamples, at least one datagram is always returned.
*
* @param targetTimeInSamples
* the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
* @param timeSpanInSamples
* the requested time span, in samples. If positive, then datagrams are selected by the given time span.
* @param reqSampleRate
* the sample rate for the requested and returned times. Must be positive.
*
* @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
* the number of datagrams that can be read is returned.
* @throws IllegalArgumentException
* if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
* @throws IOException
* if no data can be read at the given target time
*/
public Datagram[] getDatagrams(long targetTimeInSamples, long timeSpanInSamples, int reqSampleRate) throws IOException {
return getDatagrams(targetTimeInSamples, timeSpanInSamples, reqSampleRate, null);
}
/**
* Get a given number of datagrams from a particular time location.
*
* @param targetTimeInSamples
* the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
* @param timeSpanInSamples
* the span in samples
* @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
* the number of datagrams that can be read is returned.
* @throws IllegalArgumentException
* if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
* @throws IOException
* if no data can be read at the given target time
*/
public Datagram[] getDatagrams(long targetTimeInSamples, long timeSpanInSamples) throws IOException {
return getDatagrams(targetTimeInSamples, timeSpanInSamples, sampleRate, null);
}
// ///////////////////// by number of datagrams ////////////////////////////
/**
* Get a given number of datagrams from a particular time location, and return the time offset between the time request and
* the actual location of the first returned datagram.
*
* @param targetTimeInSamples
* the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
* @param number
* the number of datagrams to read. Even if this is ≤ 0, at least one datagram is always returned.
* @param reqSampleRate
* the sample rate for the requested and returned times. Must be positive.
* @param returnOffset
* an optional output field. If it is not null, then after the call it must have length of at least 1, and the
* first array field will contain the time difference, in samples, between the time request and the actual
* beginning of the first datagram.
*
* @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
* the number of datagrams that can be read is returned.
* @throws IllegalArgumentException
* if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
* @throws IOException
* if no data can be read at the given target time
*/
public Datagram[] getDatagrams(long targetTimeInSamples, int number, int reqSampleRate, long[] returnOffset)
throws IOException {
return getDatagrams(targetTimeInSamples, number, -1, reqSampleRate, returnOffset);
}
// ///////////////////// by unit ////////////////////////////
/**
* Get the datagrams spanning a particular unit, and return the time offset between the unit request and the actual location
* of the first returned datagram. Irrespective of the unit duration, at least one datagram is always returned.
*
* @param unit
* The requested speech unit, containing its own position and duration.
* @param reqSampleRate
* the sample rate for the requested and returned times. Must be positive.
* @param returnOffset
* an optional output field. If it is not null, then after the call it must have length of at least 1, and the
* first array field will contain the time difference, in samples, between the time request and the actual
* beginning of the first datagram.
*
* @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
* the number of datagrams that can be read is returned.
* @throws IllegalArgumentException
* if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
* @throws IOException
* if no data can be read at the given target time
*/
public Datagram[] getDatagrams(Unit unit, int reqSampleRate, long[] returnOffset) throws IOException {
return getDatagrams(unit.startTime, (long) (unit.duration), reqSampleRate, returnOffset);
}
/**
* Get the datagrams spanning a particular unit. Irrespective of the unit duration, at least one datagram is always returned.
*
* @param unit
* The requested speech unit, containing its own position and duration.
* @param reqSampleRate
* the sample rate for the requested and returned times. Must be positive.
*
* @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
* the number of datagrams that can be read is returned.
* @throws IllegalArgumentException
* if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
* @throws IOException
* if no data can be read at the given target time
*/
public Datagram[] getDatagrams(Unit unit, int reqSampleRate) throws IOException {
return getDatagrams(unit, reqSampleRate, null);
}
/*****************************************/
/* HELPER CLASSES */
/*****************************************/
/**
* Simple helper class to read the index part of a timeline file. The index points to datagrams at or before a certain point
* in time.
*
* Note: If no datagram starts at the exact index time, it makes sense to point to the previous datagram rather than the
* following one.
*
* If one would store the location of the datagram which comes just after the index position (the currently tested datagram),
* there would be a possibility that a particular time request falls between the index and the datagram:
*
* time axis ⇒ INDEX ← REQUEST | ⇒ DATAGRAM
*
* This would require a subsequent backwards time hopping, which is impossible because the datagrams are a singly linked list.
*
* By registering the location of the previous datagram, any time request will find an index which points to a datagram
* falling BEFORE or ON the index location:
*
* time axis ⇒ INDEX ← REQUEST | DATAGRAM ←
*
* Thus, forward hopping is always possible and the requested time can always be reached.
*
* @author sacha
*/
public static class Index {
private int idxInterval = 0; // The fixed time interval (in samples) separating two index fields.
/**
* For index field i, bytePtrs[i] is the position in bytes, from the beginning of the file, of the datagram coming on or
* just before that index field.
*/
private long[] bytePtrs;
/**
* For index field i, timePtrs[i] is the time position in samples of the datagram coming on or just before that index
* field.
*/
private long[] timePtrs;
/****************/
/* CONSTRUCTORS */
/****************/
/**
* Construct an index from a data input stream or random access file. Fundamental guarantee: Once created, the index is
* guaranteed to contain a positive index interval and monotonously rising byte and time pointers.
*
* @param bb
* byte buffer from which to read the index. Must not be null, and read position must be at start of index.
* @throws IOException
* if there is a problem reading.
* @throws MaryConfigurationException
* if the index is not well-formed.
*/
private Index(DataInput raf) throws IOException, MaryConfigurationException {
assert raf != null : "null argument";
load(raf);
}
/**
* Construct an index from a byte buffer. Fundamental guarantee: Once created, the index is guaranteed to contain a
* positive index interval and monotonously rising byte and time pointers.
*
* @param rafIn
* data input from which to read the index. Must not be null, and read position must be at start of index.
* @throws BufferUnderflowException
* if there is a problem reading.
* @throws MaryConfigurationException
* if the index is not well-formed.
*/
private Index(ByteBuffer bb) throws BufferUnderflowException, MaryConfigurationException {
assert bb != null : "null argument";
load(bb);
}
/**
* Constructor which builds a new index with a specific index interval and a given sample rate. Fundamental guarantee:
* Once created, the index is guaranteed to contain a positive index interval and monotonously rising byte and time
* pointers.
*
* @param idxInterval
* the index interval, in samples. Must be a positive number.
* @param indexFields
* the actual index data. Must not be null.
* @throws IllegalArgumentException
* if the index data given is not well-formed.
* @throws NullPointerException
* if indexFields are null.
*/
public Index(int idxInterval, Vector indexFields) throws IllegalArgumentException, NullPointerException {
if (idxInterval <= 0) {
throw new IllegalArgumentException("got index interval <= 0");
}
if (indexFields == null) {
throw new NullPointerException("null argument");
}
this.idxInterval = idxInterval;
bytePtrs = new long[indexFields.size()];
timePtrs = new long[indexFields.size()];
for (int i = 0; i < bytePtrs.length; i++) {
IdxField f = indexFields.get(i);
bytePtrs[i] = f.bytePtr;
timePtrs[i] = f.timePtr;
if (i > 0) {
if (bytePtrs[i] < bytePtrs[i - 1] || timePtrs[i] < timePtrs[i - 1]) {
throw new IllegalArgumentException(
"Pointer positions in index fields must be strictly monotonously rising");
}
}
}
}
/*****************/
/* I/O METHODS */
/*****************/
/**
* Method which loads an index from a data input (random access file or data input stream).
*
* @param rafIn
* data input from which to read the index. Must not be null, and read position must be at start of index.
* @throws IOException
* if there is a problem reading.
* @throws MaryConfigurationException
* if the index is not well-formed.
*/
public void load(DataInput rafIn) throws IOException, MaryConfigurationException {
int numIdx = rafIn.readInt();
idxInterval = rafIn.readInt();
if (idxInterval <= 0) {
throw new MaryConfigurationException("read negative index interval -- file seems corrupt");
}
bytePtrs = new long[numIdx];
timePtrs = new long[numIdx];
int numBytesToRead = 16 * numIdx + 16; // 2*8 bytes for each index field + 16 for prevBytePos and prevTimePos
byte[] data = new byte[numBytesToRead];
rafIn.readFully(data);
DataInput bufIn = new DataInputStream(new ByteArrayInputStream(data));
for (int i = 0; i < numIdx; i++) {
bytePtrs[i] = bufIn.readLong();
timePtrs[i] = bufIn.readLong();
if (i > 0) {
if (bytePtrs[i] < bytePtrs[i - 1] || timePtrs[i] < timePtrs[i - 1]) {
throw new MaryConfigurationException(
"File seems corrupt: Pointer positions in index fields are not strictly monotonously rising");
}
}
}
/* Obsolete: Read the "last datagram" memory */
/* prevBytePos = */bufIn.readLong();
/* prevTimePos = */bufIn.readLong();
}
/**
* Method which loads an index from a byte buffer.
*
* @param bb
* byte buffer from which to read the index. Must not be null, and read position must be at start of index.
* @throws BufferUnderflowException
* if there is a problem reading.
* @throws MaryConfigurationException
* if the index is not well-formed.
*/
private void load(ByteBuffer bb) throws BufferUnderflowException, MaryConfigurationException {
int numIdx = bb.getInt();
idxInterval = bb.getInt();
if (idxInterval <= 0) {
throw new MaryConfigurationException("read negative index interval -- file seems corrupt");
}
bytePtrs = new long[numIdx];
timePtrs = new long[numIdx];
for (int i = 0; i < numIdx; i++) {
bytePtrs[i] = bb.getLong();
timePtrs[i] = bb.getLong();
if (i > 0) {
if (bytePtrs[i] < bytePtrs[i - 1] || timePtrs[i] < timePtrs[i - 1]) {
throw new MaryConfigurationException(
"File seems corrupt: Pointer positions in index fields are not strictly monotonously rising");
}
}
}
/* Obsolete: Read the "last datagram" memory */
/* prevBytePos = */bb.getLong();
/* prevTimePos = */bb.getLong();
}
/**
* Method which writes an index to a RandomAccessFile
*
* @param rafIn
* rafIn
* @throws IOException
* IOException
* @return nBytes
* */
public long dump(RandomAccessFile rafIn) throws IOException {
long nBytes = 0;
int numIdx = getNumIdx();
rafIn.writeInt(numIdx);
nBytes += 4;
rafIn.writeInt(idxInterval);
nBytes += 4;
for (int i = 0; i < numIdx; i++) {
rafIn.writeLong(bytePtrs[i]);
nBytes += 8;
rafIn.writeLong(timePtrs[i]);
nBytes += 8;
}
// Obsolete, keep only for file format compatibility:
// Register the "last datagram" memory as an additional field
// rafIn.writeLong(prevBytePos);
// rafIn.writeLong(prevTimePos);
rafIn.writeLong(0l);
rafIn.writeLong(0l);
nBytes += 16l;
return nBytes;
}
/**
* Method which writes an index to stdout
* */
public void print() {
System.out.println("");
int numIdx = getNumIdx();
System.out.println("interval = " + idxInterval);
System.out.println("numIdx = " + numIdx);
for (int i = 0; i < numIdx; i++) {
System.out.println("( " + bytePtrs[i] + " , " + timePtrs[i] + " )");
}
/* Obsolete: Register the "last datagram" memory as an additional field */
// System.out.println( "Last datagram: "
// + "( " + prevBytePos + " , " + prevTimePos + " )" );
System.out.println(" ");
}
/*****************/
/* ACCESSORS */
/*****************/
/**
* The number of index entries.
*
* @return bytePtrs.length
*/
public int getNumIdx() {
return bytePtrs.length;
}
/**
* The interval, in samples, between two index entries.
*
* @return idxInterval
*/
public int getIdxInterval() {
return idxInterval;
}
public IdxField getIdxField(int i) {
if (i < 0) {
throw new IndexOutOfBoundsException("Negative index.");
}
if (i >= bytePtrs.length) {
throw new IndexOutOfBoundsException("Requested index no. " + i + ", but highest is " + bytePtrs.length);
}
return new IdxField(bytePtrs[i], timePtrs[i]);
}
/*****************/
/* OTHER METHODS */
/*****************/
/**
* Returns the index field that comes immediately before or straight on the requested time.
*
* @param timePosition
* the non-negative time
* @return an index field representing the index position just before or straight on the requested time.
* @throws IllegalArgumentException
* if the given timePosition is negtive
*/
public IdxField getIdxFieldBefore(long timePosition) {
if (timePosition < 0) {
throw new IllegalArgumentException("Negative time given");
}
int index = (int) (timePosition / idxInterval); /*
* <= This is an integer division between two longs, implying a
* flooring operation on the decimal result.
*/
// System.out.println( "TIMEPOS=" + timePosition + " IDXINT=" + idxInterval + " IDX=" + idx );
// System.out.flush();
if (index < 0) {
throw new RuntimeException("Negative index field: [" + index + "] encountered when getting index before time=["
+ timePosition + "] (idxInterval=[" + idxInterval + "]).");
}
if (index >= bytePtrs.length) {
index = bytePtrs.length - 1; // <= Protection against ArrayIndexOutOfBounds exception due to "time out of bounds"
}
return new IdxField(bytePtrs[index], timePtrs[index]);
}
}
/**
* Simple helper class to read the index fields in a timeline.
*
* @author sacha
*
*/
public static class IdxField {
// TODO: rethink if these should be public fields or if we should add accessors.
public long bytePtr = 0;
public long timePtr = 0;
public IdxField() {
bytePtr = 0;
timePtr = 0;
}
public IdxField(long setBytePtr, long setTimePtr) {
bytePtr = setBytePtr;
timePtr = setTimePtr;
}
}
/**
*
* Simple helper class to load the processing header.
*
* @author sacha
*
*/
public static class ProcHeader {
private String procHeader = null;
/****************/
/* CONSTRUCTORS */
/****************/
/**
* Constructor which loads the procHeader from a RandomAccessFile. Fundamental guarantee: after creation, the ProcHeader
* object has a non-null (but possibly empty) string content.
*
* @param raf
* input from which to load the processing header. Must not be null and must be positioned so that a processing
* header can be read from it.
*
* @throws IOException
* if no proc header can be read at the current position.
*/
private ProcHeader(RandomAccessFile raf) throws IOException {
loadProcHeader(raf);
}
/**
* Constructor which loads the procHeader from a RandomAccessFile Fundamental guarantee: after creation, the ProcHeader
* object has a non-null (but possibly empty) string content.
*
* @param raf
* input from which to load the processing header. Must not be null and must be positioned so that a processing
* header can be read from it.
*
* @throws BufferUnderflowException
* , UTFDataFormatException if no proc header can be read at the current position.
*/
private ProcHeader(ByteBuffer bb) throws BufferUnderflowException, UTFDataFormatException {
loadProcHeader(bb);
}
/**
* Constructor which makes the procHeader from a String. Fundamental guarantee: after creation, the ProcHeader object has
* a non-null (but possibly empty) string content.
*
* @param procStr
* a non-null string representing the contents of the ProcHeader.
* @throws NullPointerException
* if procStr is null
* */
public ProcHeader(String procStr) {
if (procStr == null) {
throw new NullPointerException("null argument");
}
procHeader = procStr;
}
/****************/
/* ACCESSORS */
/****************/
/**
* Return the string length of the proc header.
*
* @return a non-negative int representling the string length of the proc header.
*/
public int getCharSize() {
assert procHeader != null;
return procHeader.length();
}
/**
* Get the string content of the proc header.
*
* @return a non-null string representing the string content of the proc header.
*/
public String getString() {
assert procHeader != null;
return procHeader;
}
/*****************/
/* I/O METHODS */
/*****************/
/**
* Method which loads the header from a RandomAccessFile.
*
* @param rafIn
* file to read from, must not be null.
* @throws IOException
* if no proc header can be read at the current position.
*/
private void loadProcHeader(RandomAccessFile rafIn) throws IOException {
assert rafIn != null : "null argument";
procHeader = rafIn.readUTF();
assert procHeader != null;
}
/**
* Method which loads the header from a byte buffer.
*
* @param bb
* byte buffer to read from, must not be null.
* @throws BufferUnderflowException
* , UTFDataFormatException if no proc header can be read at the current position.
* @throws UTFDataFormatException
* UTFDataFormatException
*/
private void loadProcHeader(ByteBuffer bb) throws BufferUnderflowException, UTFDataFormatException {
procHeader = StreamUtils.readUTF(bb);
assert procHeader != null;
}
/**
* Method which writes the proc header to a RandomAccessFile.
*
* @param rafIn
* rafIn
* @throws IOException
* IOException
* @return the number of written bytes.
* */
public long dump(RandomAccessFile rafIn) throws IOException {
long before = rafIn.getFilePointer();
rafIn.writeUTF(procHeader);
long after = rafIn.getFilePointer();
return after - before;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy