All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.unitselection.data.TimelineReader Maven / Gradle / Ivy

The newest version!
/**
 * Portions Copyright 2006 DFKI GmbH.
 * Portions Copyright 2001 Sun Microsystems, Inc.
 * Portions Copyright 1999-2001 Language Technologies Institute, 
 * Carnegie Mellon University.
 * All Rights Reserved.  Use is subject to license terms.
 * 
 * Permission is hereby granted, free of charge, to use and distribute
 * this software and its documentation without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of this work, and to
 * permit persons to whom this work is furnished to do so, subject to
 * the following conditions:
 * 
 * 1. The code must retain the above copyright notice, this list of
 *    conditions and the following disclaimer.
 * 2. Any modifications must be clearly marked as such.
 * 3. Original authors' names are not deleted.
 * 4. The authors' names are not used to endorse or promote products
 *    derived from this software without specific prior written
 *    permission.
 *
 * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
 * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 * THIS SOFTWARE.
 */
package marytts.unitselection.data;

import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.io.UTFDataFormatException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Vector;

import marytts.exceptions.MaryConfigurationException;
import marytts.util.MaryUtils;
import marytts.util.Pair;
import marytts.util.data.Datagram;
import marytts.util.data.MaryHeader;
import marytts.util.io.StreamUtils;

/**
 * The TimelineReader class provides an interface to read regularly or variably spaced datagrams from a Timeline data file in Mary
 * format.
 * 
 * @author sacha, marc
 *
 */
public class TimelineReader {
	protected MaryHeader maryHdr = null; // The standard Mary header
	protected ProcHeader procHdr = null; // The processing info header

	protected Index idx = null; // A global time index for the variable-sized datagrams

	/* Some specific header fields: */
	protected int sampleRate = 0;
	protected long numDatagrams = 0;
	/**
	 * The total duration of the timeline data, in samples. This is only computed upon request.
	 */
	protected long totalDuration = -1;

	protected int datagramsBytePos = 0;
	protected int timeIdxBytePos = 0;

	// exactly one of the two following variables will be non-null after load():
	private MappedByteBuffer mappedBB = null;
	private FileChannel fileChannel = null;

	/****************/
	/* CONSTRUCTORS */
	/****************/

	/**
	 * Construct a timeline from the given file name.
	 * 
	 * Aiming for the fundamental guarantee: If an instance of this class is created, it is usable.
	 * 
	 * @param fileName
	 *            The file to read the timeline from. Must be non-null and point to a valid timeline file.
	 * @throws NullPointerException
	 *             if null argument is given
	 * @throws MaryConfigurationException
	 *             if no timeline reader can be instantiated from fileName
	 */
	public TimelineReader(String fileName) throws MaryConfigurationException {
		this(fileName, true);
	}

	/**
	 * Construct a timeline from the given file name.
	 * 
	 * Aiming for the fundamental guarantee: If an instance of this class is created, it is usable.
	 * 
	 * @param fileName
	 *            The file to read the timeline from. Must be non-null and point to a valid timeline file.
	 * @param tryMemoryMapping
	 *            if true, will attempt to read audio data via a memory map, and fall back to piecewise reading. If false, will
	 *            immediately go for piecewise reading using a RandomAccessFile.
	 * @throws NullPointerException
	 *             if null argument is given
	 * @throws MaryConfigurationException
	 *             if no timeline reader can be instantiated from fileName
	 */
	public TimelineReader(String fileName, boolean tryMemoryMapping) throws MaryConfigurationException {
		if (fileName == null) {
			throw new NullPointerException("Filename is null");
		}
		try {
			load(fileName, tryMemoryMapping);
		} catch (Exception e) {
			throw new MaryConfigurationException("Cannot load timeline file from " + fileName, e);
		}
	}

	/**
	 * Only subclasses can instantiate a TimelineReader object that doesn't call {@link #load(String)}. It is their responsibility
	 * then to ensure the fundamental guarantee.
	 */
	protected TimelineReader() {
	}

	/**
	 * Load a timeline from a file.
	 * 
	 * @param fileName
	 *            The file to read the timeline from. Must be non-null and point to a valid timeline file.
	 * 
	 * @throws IOException
	 *             if a problem occurs during reading
	 * @throws BufferUnderflowException
	 *             if a problem occurs during reading
	 * @throws MaryConfigurationException
	 *             if fileName does not point to a valid timeline file
	 */
	protected void load(String fileName) throws IOException, BufferUnderflowException, MaryConfigurationException,
			NullPointerException {
		load(fileName, true);
	}

	/**
	 * Load a timeline from a file.
	 * 
	 * @param fileName
	 *            The file to read the timeline from. Must be non-null and point to a valid timeline file.
	 * @param tryMemoryMapping
	 *            tryMemoryMapping
	 * @throws IOException
	 *             if a problem occurs during reading
	 * @throws BufferUnderflowException
	 *             if a problem occurs during reading
	 * @throws MaryConfigurationException
	 *             if fileName does not point to a valid timeline file
	 * @throws NullPointerException
	 *             NullPointerException
	 */
	protected void load(String fileName, boolean tryMemoryMapping) throws IOException, BufferUnderflowException,
			MaryConfigurationException, NullPointerException {
		assert fileName != null : "filename is null";

		RandomAccessFile file = new RandomAccessFile(fileName, "r");
		FileChannel fc = file.getChannel();
		// Expect header to be no bigger than 64k bytes
		ByteBuffer headerBB = ByteBuffer.allocate(0x10000);
		fc.read(headerBB);
		headerBB.limit(headerBB.position());
		headerBB.position(0);

		maryHdr = new MaryHeader(headerBB);
		if (maryHdr.getType() != MaryHeader.TIMELINE) {
			throw new MaryConfigurationException("File is not a valid timeline file.");
		}
		/* Load the processing info header */
		procHdr = new ProcHeader(headerBB);

		/* Load the timeline dimensions */
		sampleRate = headerBB.getInt();
		numDatagrams = headerBB.getLong();
		if (sampleRate <= 0 || numDatagrams < 0) {
			throw new MaryConfigurationException("Illegal values in timeline file.");
		}

		/* Load the positions of the various subsequent components */
		datagramsBytePos = (int) headerBB.getLong();
		timeIdxBytePos = (int) headerBB.getLong();
		if (timeIdxBytePos < datagramsBytePos) {
			throw new MaryConfigurationException("File seems corrupt: index is expected after data, not before");
		}

		/* Go fetch the time index at the end of the file */
		fc.position(timeIdxBytePos);
		ByteBuffer indexBB = ByteBuffer.allocate((int) (fc.size() - timeIdxBytePos));
		fc.read(indexBB);
		indexBB.limit(indexBB.position());
		indexBB.position(0);
		idx = new Index(indexBB);

		if (tryMemoryMapping) {
			// Try if we can use a mapped byte buffer:
			try {
				mappedBB = fc.map(FileChannel.MapMode.READ_ONLY, datagramsBytePos, timeIdxBytePos - datagramsBytePos);
				file.close(); // if map() succeeded, we don't need the file anymore.
			} catch (IOException ome) {
				MaryUtils.getLogger("Timeline").warn(
						"Cannot use memory mapping for timeline file '" + fileName + "' -- falling back to piecewise reading");
			}
		}
		if (!tryMemoryMapping || mappedBB == null) { // use piecewise reading
			fileChannel = fc;
			assert fileChannel != null;
			// and leave file open
		}

		// postconditions:
		assert idx != null;
		assert procHdr != null;
		assert fileChannel == null && mappedBB != null || fileChannel != null && mappedBB == null;
	}

	/**
	 * Return the content of the processing header as a String.
	 * 
	 * @return a non-null string representing the proc header.
	 */
	public String getProcHeaderContents() {
		return procHdr.getString();
	}

	/**
	 * Returns the number of datagrams in the timeline.
	 * 
	 * @return the (non-negative) number of datagrams, as a long.
	 */
	public long getNumDatagrams() {
		assert numDatagrams >= 0;
		return numDatagrams;
	}

	/**
	 * Returns the position of the datagram zone in the original file.
	 * 
	 * @return the byte position of the datagram zone.
	 */
	protected long getDatagramsBytePos() {
		return datagramsBytePos;
	}

	/**
	 * Returns the timeline's sample rate.
	 * 
	 * @return the sample rate as a positive integer.
	 */
	public int getSampleRate() {
		assert sampleRate > 0;
		return sampleRate;
	}

	/**
	 * Return the total duration of all data in this timeline. Implementation note: this is an expensive operation that should not
	 * be used in production.
	 * 
	 * @return a non-negative long representing the accumulated duration of all datagrams.
	 * @throws MaryConfigurationException
	 *             if the duration cannot be obtained.
	 */
	public long getTotalDuration() throws MaryConfigurationException {
		if (totalDuration == -1) {
			computeTotalDuration();
		}
		assert totalDuration >= 0;
		return totalDuration;
	}

	/**
	 * Compute the total duration of a timeline. This is an expensive method, since it goes through all datagrams to compute this
	 * duration. It should not normally be used in production.
	 * 
	 * @throws MaryConfigurationException
	 *             if the duration could not be computed.
	 */
	protected void computeTotalDuration() throws MaryConfigurationException {
		long time = 0;
		long nRead = 0;
		boolean haveReadAll = false;
		try {
			Pair p = getByteBufferAtTime(0);
			ByteBuffer bb = p.getFirst();
			assert p.getSecond() == 0;
			while (!haveReadAll) {
				Datagram dat = getNextDatagram(bb);
				if (dat == null) {
					// we may have reached the end of the current byte buffer... try reading another:
					p = getByteBufferAtTime(time);
					bb = p.getFirst();
					assert p.getSecond() == time;
					dat = getNextDatagram(bb);
					if (dat == null) { // no, indeed we cannot read any more
						break; // abort, we could not read all
					}
				}
				assert dat != null;
				time += dat.getDuration(); // duration in timeline sample rate
				nRead++; // number of datagrams read
				if (nRead == numDatagrams) {
					haveReadAll = true;
				}
			}
		} catch (Exception e) {
			throw new MaryConfigurationException("Could not compute total duration", e);
		}
		if (!haveReadAll) {
			throw new MaryConfigurationException("Could not read all datagrams to compute total duration");
		}
		totalDuration = time;
	}

	/**
	 * The index object.
	 * 
	 * @return the non-null index object.
	 */
	public Index getIndex() {
		assert idx != null;
		return idx;
	}

	// Helper methods

	/**
	 * Scales a discrete time to the timeline's sample rate.
	 * 
	 * @param reqSampleRate
	 *            the externally given sample rate.
	 * @param targetTimeInSamples
	 *            a discrete time, with respect to the externally given sample rate.
	 * @return a discrete time, in samples with respect to the timeline's sample rate.
	 */
	protected long scaleTime(int reqSampleRate, long targetTimeInSamples) {
		if (reqSampleRate == sampleRate)
			return (targetTimeInSamples);
		/* else */return ((long) Math.round((double) (reqSampleRate) * (double) (targetTimeInSamples) / (double) (sampleRate)));
	}

	/**
	 * Unscales a discrete time from the timeline's sample rate.
	 * 
	 * @param reqSampleRate
	 *            the externally given sample rate.
	 * @param timelineTimeInSamples
	 *            a discrete time, with respect to the timeline sample rate.
	 * @return a discrete time, in samples with respect to the externally given sample rate.
	 */
	protected long unScaleTime(int reqSampleRate, long timelineTimeInSamples) {
		if (reqSampleRate == sampleRate)
			return (timelineTimeInSamples);
		/* else */return ((long) Math.round((double) (sampleRate) * (double) (timelineTimeInSamples) / (double) (reqSampleRate)));
	}

	/******************/
	/* DATA ACCESSORS */
	/******************/

	/**
	 * Skip the upcoming datagram at the current position of the byte buffer.
	 * 
	 * @param bb
	 *            bb
	 * @return the duration of the datagram we skipped
	 * @throws IOException
	 *             if we cannot skip another datagram because we have reached the end of the byte buffer
	 */
	protected long skipNextDatagram(ByteBuffer bb) throws IOException {
		long datagramDuration = bb.getLong();
		int datagramSize = bb.getInt();
		if (bb.position() + datagramSize > bb.limit()) {
			throw new IOException("cannot skip datagram: it is not fully contained in byte buffer");
		}
		bb.position(bb.position() + datagramSize);
		return datagramDuration;
	}

	/**
	 * Read and return the upcoming datagram from the given byte buffer. Subclasses should override this method to create
	 * subclasses of Datagram.
	 * 
	 * @param bb
	 *            the timeline byte buffer to read from
	 * 
	 * @return the current datagram, or null if EOF was encountered
	 */
	protected Datagram getNextDatagram(ByteBuffer bb) {
		assert bb != null;
		// If the end of the datagram zone is reached, refuse to read
		if (bb.position() == bb.limit()) {
			return null;
		}
		// Else, read the datagram from the file
		try {
			return new Datagram(bb);
		} catch (IOException ioe) {
			return null;
		}
	}

	/**
	 * Hop the datagrams in the given byte buffer until the one which begins at or contains the desired time (time is in samples;
	 * the sample rate is assumed to be that of the timeline).
	 * 
	 * @param bb
	 *            the timeline byte buffer to use. Must not be null.
	 * @param currentTimeInSamples
	 *            the time position corresponding to the current position of the byte buffer. Must not be negative.
	 * @param targetTimeInSamples
	 *            the time location to reach. Must not be less than currentTimeInSamples
	 * 
	 * @return the actual time at which we end up after hopping. This is less than or equal to targetTimeInSamples, never greater
	 *         than it.
	 * @throws IOException
	 *             if there is a problem skipping the datagrams
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is less than currentTimeInSamples
	 */
	protected long hopToTime(ByteBuffer bb, long currentTimeInSamples, long targetTimeInSamples) throws IOException,
			IllegalArgumentException {
		assert bb != null;
		assert currentTimeInSamples >= 0;
		assert targetTimeInSamples >= currentTimeInSamples : "Cannot hop back from time " + currentTimeInSamples + " to time "
				+ targetTimeInSamples;

		/*
		 * If the current time position is the requested time do nothing, you are already at the right position
		 */
		if (currentTimeInSamples == targetTimeInSamples) {
			return currentTimeInSamples;
		}
		/* Else hop: */
		int byteBefore = bb.position();
		long timeBefore = currentTimeInSamples;
		/* Hop until the datagram which comes just after the requested time */
		while (currentTimeInSamples <= targetTimeInSamples) { // Stop after the requested time, we will step back
			// to the correct time in case of equality
			timeBefore = currentTimeInSamples;
			byteBefore = bb.position();
			long skippedDuration = skipNextDatagram(bb);
			currentTimeInSamples += skippedDuration;
		}
		/* Do one step back so that the pointed datagram contains the requested time */
		bb.position(byteBefore);
		return timeBefore;
	}

	/**
	 * This method produces a new byte buffer whose current position represents the requested positionInFile. It cannot be assumed
	 * that a call to byteBuffer.position() produces any meaningful values. The byte buffer may represent only a part of the
	 * available data; however, at least one datagram can be read from the byte buffer. If no further data can be read from it, a
	 * new byte buffer must be obtained by calling this method again with a new target time.
	 * 
	 * @param targetTimeInSamples
	 *            the time position in the file which should be accessed as a byte buffer, in samples. Must be non-negative and
	 *            less than the total duration of the timeline.
	 * @return a pair representing the byte buffer from which to read, and the exact time corresponding to the current position of
	 *         the byte buffer. The position as such is not meaningful; the time is guaranteed to be less than or equal to
	 *         targetTimeInSamples.
	 * @throws IOException
	 *             IOException
	 * @throws BufferUnderflowException
	 *             , BufferUnderflowException if no byte buffer can be obtained for the requested time.
	 */
	protected Pair getByteBufferAtTime(long targetTimeInSamples) throws IOException, BufferUnderflowException {
		if (mappedBB != null) {
			return getMappedByteBufferAtTime(targetTimeInSamples);
		} else {
			return loadByteBufferAtTime(targetTimeInSamples);
		}
	}

	protected Pair getMappedByteBufferAtTime(long targetTimeInSamples) throws IllegalArgumentException,
			IOException {
		assert mappedBB != null;
		/* Seek for the time index which comes just before the requested time */
		IdxField idxFieldBefore = idx.getIdxFieldBefore(targetTimeInSamples);
		long time = idxFieldBefore.timePtr;
		int bytePos = (int) (idxFieldBefore.bytePtr - datagramsBytePos);
		ByteBuffer bb = mappedBB.duplicate();
		bb.position(bytePos);
		time = hopToTime(bb, time, targetTimeInSamples);
		return new Pair(bb, time);
	}

	protected Pair loadByteBufferAtTime(long targetTimeInSamples) throws IOException {
		assert fileChannel != null;
		// we must load a chunk of data from the FileChannel
		int bufSize = 0x10000; // 64 kB
		/* Seek for the time index which comes just before the requested time */
		IdxField idxFieldBefore = idx.getIdxFieldBefore(targetTimeInSamples);
		long time = idxFieldBefore.timePtr;
		long bytePos = idxFieldBefore.bytePtr;
		if (bytePos + bufSize > timeIdxBytePos) { // must not read index data as datagrams
			bufSize = (int) (timeIdxBytePos - bytePos);
		}
		ByteBuffer bb = loadByteBuffer(bytePos, bufSize);

		while (true) {
			if (!canReadDatagramHeader(bb)) {
				bb = loadByteBuffer(bytePos, bufSize);
				assert canReadDatagramHeader(bb);
			}
			int posBefore = bb.position();
			Datagram d = new Datagram(bb, false);
			if (time + d.getDuration() > targetTimeInSamples) { // d is our datagram
				bb.position(posBefore);
				int datagramNumBytes = Datagram.NUM_HEADER_BYTES + d.getLength();
				// need to make sure we return a byte buffer from which d can be read
				if (!canReadAmount(bb, datagramNumBytes)) {
					bb = loadByteBuffer(bytePos, Math.max(datagramNumBytes, bufSize));
				}
				assert canReadAmount(bb, datagramNumBytes);
				break;
			} else {
				// keep on skipping
				time += d.getDuration();
				if (canReadAmount(bb, d.getLength())) {
					bb.position(bb.position() + d.getLength());
				} else {
					bytePos += bb.position();
					bytePos += d.getLength();
					bb = loadByteBuffer(bytePos, bufSize);
				}
			}
		}
		return new Pair(bb, time);
	}

	/**
	 * @param bytePos
	 *            position in fileChannel from which to load the byte buffer
	 * @param bufSize
	 *            size of the byte buffer
	 * @return the byte buffer, loaded and set such that limit is bufSize and position is 0
	 * @throws IOException
	 *             if the data cannot be read from fileChannel
	 */
	private ByteBuffer loadByteBuffer(long bytePos, int bufSize) throws IOException {
		ByteBuffer bb = ByteBuffer.allocate(bufSize);
		fileChannel.read(bb, bytePos); // this will block if another thread is currently reading from fileChannel
		bb.limit(bb.position());
		bb.position(0);
		return bb;
	}

	private boolean canReadDatagramHeader(ByteBuffer bb) {
		return canReadAmount(bb, Datagram.NUM_HEADER_BYTES);
	}

	private boolean canReadAmount(ByteBuffer bb, int amount) {
		return bb.limit() - bb.position() >= amount;
	}

	/**
	 * Get a single datagram from a particular time location, given in the timeline's sampling rate.
	 * 
	 * @param targetTimeInSamples
	 *            the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
	 * 
	 * @return the datagram starting at or overlapping the given time, or null if end-of-file was encountered
	 * @throws IOException
	 *             , BufferUnderflowException if no datagram could be created from the data at the given time.
	 */
	public Datagram getDatagram(long targetTimeInSamples) throws IOException {
		Pair p = getByteBufferAtTime(targetTimeInSamples);
		ByteBuffer bb = p.getFirst();
		return getNextDatagram(bb);
	}

	/**
	 * Get a single datagram from a particular time location.
	 * 
	 * @param targetTimeInSamples
	 *            the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
	 * @param reqSampleRate
	 *            the sample rate for the requested times.
	 * 
	 * @return the datagram starting at or overlapping the given time, or null if end-of-file was encountered
	 * @throws IOException
	 *             if no datagram could be created from the data at the given time.
	 */
	public Datagram getDatagram(long targetTimeInSamples, int reqSampleRate) throws IOException {
		/*
		 * Resample the requested time location, in case the sample times are different between the request and the timeline
		 */
		long scaledTargetTime = scaleTime(reqSampleRate, targetTimeInSamples);
		Datagram dat = getDatagram(scaledTargetTime);
		if (dat == null)
			return null;
		if (reqSampleRate != sampleRate)
			dat.setDuration(unScaleTime(reqSampleRate, dat.getDuration())); // => Don't forget to stay time-consistent!
		return dat;
	}

	/**
	 * Get the datagrams spanning a particular time range from a particular time location, and return the time offset between the
	 * time request and the actual location of the first returned datagram. Irrespective of the values of nDatagrams and
	 * timeSpanInSamples, at least one datagram is always returned.
	 * 
	 * @param targetTimeInSamples
	 *            the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
	 * @param nDatagrams
	 *            the number of datagrams to read. Ignored if timeSpanInSamples is positive.
	 * @param timeSpanInSamples
	 *            the requested time span, in samples. If positive, then datagrams are selected by the given time span.
	 * @param reqSampleRate
	 *            the sample rate for the requested and returned times. Must be positive.
	 * @param returnOffset
	 *            an optional output field. If it is not null, then after the call it must have length of at least 1, and the
	 *            first array field will contain the time difference, in samples, between the time request and the actual
	 *            beginning of the first datagram.
	 * 
	 * @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
	 *         the number of datagrams that can be read is returned.
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
	 * @throws IOException
	 *             if no data can be read at the given target time
	 */
	private Datagram[] getDatagrams(long targetTimeInSamples, int nDatagrams, long timeSpanInSamples, int reqSampleRate,
			long[] returnOffset) throws IllegalArgumentException, IOException {
		/* Check the input arguments */
		if (targetTimeInSamples < 0) {
			throw new IllegalArgumentException("Can't get a datagram from a negative time position (given time position was ["
					+ targetTimeInSamples + "]).");
		}
		if (reqSampleRate <= 0) {
			throw new IllegalArgumentException("sample rate must be positive, but is " + reqSampleRate);
		}
		// Get the datagrams by number or by time span?
		boolean byNumber;
		if (timeSpanInSamples > 0) {
			byNumber = false;
		} else {
			byNumber = true;
			if (nDatagrams <= 0) {
				nDatagrams = 1; // return at least one datagram
			}
		}

		/*
		 * Resample the requested time location, in case the sample times are different between the request and the timeline
		 */
		long scaledTargetTime = scaleTime(reqSampleRate, targetTimeInSamples);

		Pair p = getByteBufferAtTime(scaledTargetTime);
		ByteBuffer bb = p.getFirst();
		long time = p.getSecond();
		if (returnOffset != null) { // return offset between target and actual start time
			if (returnOffset.length == 0) {
				throw new IllegalArgumentException("If returnOffset is given, it must have length of at least 1");
			}
			returnOffset[0] = unScaleTime(reqSampleRate, (scaledTargetTime - time));
		}

		ArrayList datagrams = new ArrayList(byNumber ? nDatagrams : 10);
		// endTime is stop criterion if reading by time scale:
		long endTime = byNumber ? -1 : scaleTime(reqSampleRate, (targetTimeInSamples + timeSpanInSamples));
		int nRead = 0;
		boolean haveReadAll = false;
		while (!haveReadAll) {
			Datagram dat = getNextDatagram(bb);
			if (dat == null) {
				// we may have reached the end of the current byte buffer... try reading another:
				try {
					p = getByteBufferAtTime(time);
				} catch (Exception ioe) {
					// cannot get another byte buffer -- stop reading.
					break;
				}
				bb = p.getFirst();
				dat = getNextDatagram(bb);
				if (dat == null) { // no, indeed we cannot read any more
					break; // abort, we could not read all
				}
			}
			assert dat != null;
			time += dat.getDuration(); // duration in timeline sample rate
			nRead++; // number of datagrams read
			if (reqSampleRate != sampleRate) {
				dat.setDuration(unScaleTime(reqSampleRate, dat.getDuration())); // convert duration into reqSampleRate
			}
			datagrams.add(dat);
			if (byNumber && nRead == nDatagrams || !byNumber && time >= endTime) {
				haveReadAll = true;
			}
		}
		return (Datagram[]) datagrams.toArray(new Datagram[0]);
	}

	// ///////////////////// Convenience methods: variants of getDatagrams() ///////////////////////

	// ///////////////////// by time span ////////////////////////////

	/**
	 * Get the datagrams spanning a particular time range from a particular time location, and return the time offset between the
	 * time request and the actual location of the first returned datagram. Irrespective of the value of timeSpanInSamples, at
	 * least one datagram is always returned.
	 * 
	 * @param targetTimeInSamples
	 *            the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
	 * @param timeSpanInSamples
	 *            the requested time span, in samples. If positive, then datagrams are selected by the given time span.
	 * @param reqSampleRate
	 *            the sample rate for the requested and returned times. Must be positive.
	 * @param returnOffset
	 *            an optional output field. If it is not null, then after the call it must have length of at least 1, and the
	 *            first array field will contain the time difference, in samples, between the time request and the actual
	 *            beginning of the first datagram.
	 * 
	 * @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
	 *         the number of datagrams that can be read is returned.
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
	 * @throws IOException
	 *             , BufferUnderflowException if no data can be read at the given target time
	 */
	public Datagram[] getDatagrams(long targetTimeInSamples, long timeSpanInSamples, int reqSampleRate, long[] returnOffset)
			throws IOException {
		return getDatagrams(targetTimeInSamples, -1, timeSpanInSamples, reqSampleRate, returnOffset);
	}

	/**
	 * Get the datagrams spanning a particular time range from a particular time location. Irrespective of the value of
	 * timeSpanInSamples, at least one datagram is always returned.
	 * 
	 * @param targetTimeInSamples
	 *            the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
	 * @param timeSpanInSamples
	 *            the requested time span, in samples. If positive, then datagrams are selected by the given time span.
	 * @param reqSampleRate
	 *            the sample rate for the requested and returned times. Must be positive.
	 * 
	 * @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
	 *         the number of datagrams that can be read is returned.
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
	 * @throws IOException
	 *             if no data can be read at the given target time
	 */
	public Datagram[] getDatagrams(long targetTimeInSamples, long timeSpanInSamples, int reqSampleRate) throws IOException {
		return getDatagrams(targetTimeInSamples, timeSpanInSamples, reqSampleRate, null);
	}

	/**
	 * Get a given number of datagrams from a particular time location.
	 * 
	 * @param targetTimeInSamples
	 *            the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
	 * @param timeSpanInSamples
	 *            the span in samples
	 * @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
	 *         the number of datagrams that can be read is returned.
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
	 * @throws IOException
	 *             if no data can be read at the given target time
	 */
	public Datagram[] getDatagrams(long targetTimeInSamples, long timeSpanInSamples) throws IOException {
		return getDatagrams(targetTimeInSamples, timeSpanInSamples, sampleRate, null);
	}

	// ///////////////////// by number of datagrams ////////////////////////////

	/**
	 * Get a given number of datagrams from a particular time location, and return the time offset between the time request and
	 * the actual location of the first returned datagram.
	 * 
	 * @param targetTimeInSamples
	 *            the requested position, in samples. Must be non-negative and less than the total duration of the timeline.
	 * @param number
	 *            the number of datagrams to read. Even if this is ≤ 0, at least one datagram is always returned.
	 * @param reqSampleRate
	 *            the sample rate for the requested and returned times. Must be positive.
	 * @param returnOffset
	 *            an optional output field. If it is not null, then after the call it must have length of at least 1, and the
	 *            first array field will contain the time difference, in samples, between the time request and the actual
	 *            beginning of the first datagram.
	 * 
	 * @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
	 *         the number of datagrams that can be read is returned.
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
	 * @throws IOException
	 *             if no data can be read at the given target time
	 */
	public Datagram[] getDatagrams(long targetTimeInSamples, int number, int reqSampleRate, long[] returnOffset)
			throws IOException {
		return getDatagrams(targetTimeInSamples, number, -1, reqSampleRate, returnOffset);
	}

	// ///////////////////// by unit ////////////////////////////

	/**
	 * Get the datagrams spanning a particular unit, and return the time offset between the unit request and the actual location
	 * of the first returned datagram. Irrespective of the unit duration, at least one datagram is always returned.
	 * 
	 * @param unit
	 *            The requested speech unit, containing its own position and duration.
	 * @param reqSampleRate
	 *            the sample rate for the requested and returned times. Must be positive.
	 * @param returnOffset
	 *            an optional output field. If it is not null, then after the call it must have length of at least 1, and the
	 *            first array field will contain the time difference, in samples, between the time request and the actual
	 *            beginning of the first datagram.
	 * 
	 * @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
	 *         the number of datagrams that can be read is returned.
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
	 * @throws IOException
	 *             if no data can be read at the given target time
	 */
	public Datagram[] getDatagrams(Unit unit, int reqSampleRate, long[] returnOffset) throws IOException {
		return getDatagrams(unit.startTime, (long) (unit.duration), reqSampleRate, returnOffset);
	}

	/**
	 * Get the datagrams spanning a particular unit. Irrespective of the unit duration, at least one datagram is always returned.
	 * 
	 * @param unit
	 *            The requested speech unit, containing its own position and duration.
	 * @param reqSampleRate
	 *            the sample rate for the requested and returned times. Must be positive.
	 * 
	 * @return an array of datagrams containing at least one datagram. If less than the requested amount of datagrams can be read,
	 *         the number of datagrams that can be read is returned.
	 * @throws IllegalArgumentException
	 *             if targetTimeInSamples is negative, or if a returnOffset of length 0 is given.
	 * @throws IOException
	 *             if no data can be read at the given target time
	 */
	public Datagram[] getDatagrams(Unit unit, int reqSampleRate) throws IOException {
		return getDatagrams(unit, reqSampleRate, null);
	}

	/*****************************************/
	/* HELPER CLASSES */
	/*****************************************/

	/**
	 * Simple helper class to read the index part of a timeline file. The index points to datagrams at or before a certain point
	 * in time.
	 * 
	 * Note: If no datagram starts at the exact index time, it makes sense to point to the previous datagram rather than the
	 * following one.
	 * 
	 * If one would store the location of the datagram which comes just after the index position (the currently tested datagram),
	 * there would be a possibility that a particular time request falls between the index and the datagram:
	 * 
	 * time axis ⇒ INDEX ← REQUEST | ⇒ DATAGRAM
	 * 
	 * This would require a subsequent backwards time hopping, which is impossible because the datagrams are a singly linked list.
	 * 
	 * By registering the location of the previous datagram, any time request will find an index which points to a datagram
	 * falling BEFORE or ON the index location:
	 * 
	 * time axis ⇒ INDEX ← REQUEST | DATAGRAM ←
	 * 
	 * Thus, forward hopping is always possible and the requested time can always be reached.
	 * 
	 * @author sacha
	 */
	public static class Index {
		private int idxInterval = 0; // The fixed time interval (in samples) separating two index fields.

		/**
		 * For index field i, bytePtrs[i] is the position in bytes, from the beginning of the file, of the datagram coming on or
		 * just before that index field.
		 */
		private long[] bytePtrs;

		/**
		 * For index field i, timePtrs[i] is the time position in samples of the datagram coming on or just before that index
		 * field.
		 */
		private long[] timePtrs;

		/****************/
		/* CONSTRUCTORS */
		/****************/

		/**
		 * Construct an index from a data input stream or random access file. Fundamental guarantee: Once created, the index is
		 * guaranteed to contain a positive index interval and monotonously rising byte and time pointers.
		 * 
		 * @param bb
		 *            byte buffer from which to read the index. Must not be null, and read position must be at start of index.
		 * @throws IOException
		 *             if there is a problem reading.
		 * @throws MaryConfigurationException
		 *             if the index is not well-formed.
		 */
		private Index(DataInput raf) throws IOException, MaryConfigurationException {
			assert raf != null : "null argument";
			load(raf);
		}

		/**
		 * Construct an index from a byte buffer. Fundamental guarantee: Once created, the index is guaranteed to contain a
		 * positive index interval and monotonously rising byte and time pointers.
		 * 
		 * @param rafIn
		 *            data input from which to read the index. Must not be null, and read position must be at start of index.
		 * @throws BufferUnderflowException
		 *             if there is a problem reading.
		 * @throws MaryConfigurationException
		 *             if the index is not well-formed.
		 */
		private Index(ByteBuffer bb) throws BufferUnderflowException, MaryConfigurationException {
			assert bb != null : "null argument";
			load(bb);
		}

		/**
		 * Constructor which builds a new index with a specific index interval and a given sample rate. Fundamental guarantee:
		 * Once created, the index is guaranteed to contain a positive index interval and monotonously rising byte and time
		 * pointers.
		 * 
		 * @param idxInterval
		 *            the index interval, in samples. Must be a positive number.
		 * @param indexFields
		 *            the actual index data. Must not be null.
		 * @throws IllegalArgumentException
		 *             if the index data given is not well-formed.
		 * @throws NullPointerException
		 *             if indexFields are null.
		 */
		public Index(int idxInterval, Vector indexFields) throws IllegalArgumentException, NullPointerException {
			if (idxInterval <= 0) {
				throw new IllegalArgumentException("got index interval <= 0");
			}
			if (indexFields == null) {
				throw new NullPointerException("null argument");
			}
			this.idxInterval = idxInterval;
			bytePtrs = new long[indexFields.size()];
			timePtrs = new long[indexFields.size()];
			for (int i = 0; i < bytePtrs.length; i++) {
				IdxField f = indexFields.get(i);
				bytePtrs[i] = f.bytePtr;
				timePtrs[i] = f.timePtr;
				if (i > 0) {
					if (bytePtrs[i] < bytePtrs[i - 1] || timePtrs[i] < timePtrs[i - 1]) {
						throw new IllegalArgumentException(
								"Pointer positions in index fields must be strictly monotonously rising");
					}
				}
			}
		}

		/*****************/
		/* I/O METHODS */
		/*****************/

		/**
		 * Method which loads an index from a data input (random access file or data input stream).
		 * 
		 * @param rafIn
		 *            data input from which to read the index. Must not be null, and read position must be at start of index.
		 * @throws IOException
		 *             if there is a problem reading.
		 * @throws MaryConfigurationException
		 *             if the index is not well-formed.
		 */
		public void load(DataInput rafIn) throws IOException, MaryConfigurationException {
			int numIdx = rafIn.readInt();
			idxInterval = rafIn.readInt();
			if (idxInterval <= 0) {
				throw new MaryConfigurationException("read negative index interval -- file seems corrupt");
			}

			bytePtrs = new long[numIdx];
			timePtrs = new long[numIdx];
			int numBytesToRead = 16 * numIdx + 16; // 2*8 bytes for each index field + 16 for prevBytePos and prevTimePos

			byte[] data = new byte[numBytesToRead];
			rafIn.readFully(data);
			DataInput bufIn = new DataInputStream(new ByteArrayInputStream(data));

			for (int i = 0; i < numIdx; i++) {
				bytePtrs[i] = bufIn.readLong();
				timePtrs[i] = bufIn.readLong();
				if (i > 0) {
					if (bytePtrs[i] < bytePtrs[i - 1] || timePtrs[i] < timePtrs[i - 1]) {
						throw new MaryConfigurationException(
								"File seems corrupt: Pointer positions in index fields are not strictly monotonously rising");
					}
				}
			}
			/* Obsolete: Read the "last datagram" memory */
			/* prevBytePos = */bufIn.readLong();
			/* prevTimePos = */bufIn.readLong();
		}

		/**
		 * Method which loads an index from a byte buffer.
		 * 
		 * @param bb
		 *            byte buffer from which to read the index. Must not be null, and read position must be at start of index.
		 * @throws BufferUnderflowException
		 *             if there is a problem reading.
		 * @throws MaryConfigurationException
		 *             if the index is not well-formed.
		 */
		private void load(ByteBuffer bb) throws BufferUnderflowException, MaryConfigurationException {
			int numIdx = bb.getInt();
			idxInterval = bb.getInt();
			if (idxInterval <= 0) {
				throw new MaryConfigurationException("read negative index interval -- file seems corrupt");
			}

			bytePtrs = new long[numIdx];
			timePtrs = new long[numIdx];

			for (int i = 0; i < numIdx; i++) {
				bytePtrs[i] = bb.getLong();
				timePtrs[i] = bb.getLong();
				if (i > 0) {
					if (bytePtrs[i] < bytePtrs[i - 1] || timePtrs[i] < timePtrs[i - 1]) {
						throw new MaryConfigurationException(
								"File seems corrupt: Pointer positions in index fields are not strictly monotonously rising");
					}
				}
			}
			/* Obsolete: Read the "last datagram" memory */
			/* prevBytePos = */bb.getLong();
			/* prevTimePos = */bb.getLong();
		}

		/**
		 * Method which writes an index to a RandomAccessFile
		 * 
		 * @param rafIn
		 *            rafIn
		 * @throws IOException
		 *             IOException
		 * @return nBytes
		 * */
		public long dump(RandomAccessFile rafIn) throws IOException {
			long nBytes = 0;
			int numIdx = getNumIdx();
			rafIn.writeInt(numIdx);
			nBytes += 4;
			rafIn.writeInt(idxInterval);
			nBytes += 4;
			for (int i = 0; i < numIdx; i++) {
				rafIn.writeLong(bytePtrs[i]);
				nBytes += 8;
				rafIn.writeLong(timePtrs[i]);
				nBytes += 8;
			}
			// Obsolete, keep only for file format compatibility:
			// Register the "last datagram" memory as an additional field
			// rafIn.writeLong(prevBytePos);
			// rafIn.writeLong(prevTimePos);
			rafIn.writeLong(0l);
			rafIn.writeLong(0l);
			nBytes += 16l;

			return nBytes;
		}

		/**
		 * Method which writes an index to stdout
		 * */
		public void print() {
			System.out.println("");
			int numIdx = getNumIdx();
			System.out.println("interval = " + idxInterval);
			System.out.println("numIdx = " + numIdx);
			for (int i = 0; i < numIdx; i++) {
				System.out.println("( " + bytePtrs[i] + " , " + timePtrs[i] + " )");
			}
			/* Obsolete: Register the "last datagram" memory as an additional field */
			// System.out.println( "Last datagram: "
			// + "( " + prevBytePos + " , " + prevTimePos + " )" );
			System.out.println("");
		}

		/*****************/
		/* ACCESSORS */
		/*****************/
		/**
		 * The number of index entries.
		 * 
		 * @return bytePtrs.length
		 */
		public int getNumIdx() {
			return bytePtrs.length;
		}

		/**
		 * The interval, in samples, between two index entries.
		 * 
		 * @return idxInterval
		 */
		public int getIdxInterval() {
			return idxInterval;
		}

		public IdxField getIdxField(int i) {
			if (i < 0) {
				throw new IndexOutOfBoundsException("Negative index.");
			}
			if (i >= bytePtrs.length) {
				throw new IndexOutOfBoundsException("Requested index no. " + i + ", but highest is " + bytePtrs.length);
			}
			return new IdxField(bytePtrs[i], timePtrs[i]);
		}

		/*****************/
		/* OTHER METHODS */
		/*****************/

		/**
		 * Returns the index field that comes immediately before or straight on the requested time.
		 * 
		 * @param timePosition
		 *            the non-negative time
		 * @return an index field representing the index position just before or straight on the requested time.
		 * @throws IllegalArgumentException
		 *             if the given timePosition is negtive
		 */
		public IdxField getIdxFieldBefore(long timePosition) {
			if (timePosition < 0) {
				throw new IllegalArgumentException("Negative time given");
			}
			int index = (int) (timePosition / idxInterval); /*
															 * <= This is an integer division between two longs, implying a
															 * flooring operation on the decimal result.
															 */
			// System.out.println( "TIMEPOS=" + timePosition + " IDXINT=" + idxInterval + " IDX=" + idx );
			// System.out.flush();
			if (index < 0) {
				throw new RuntimeException("Negative index field: [" + index + "] encountered when getting index before time=["
						+ timePosition + "] (idxInterval=[" + idxInterval + "]).");
			}
			if (index >= bytePtrs.length) {
				index = bytePtrs.length - 1; // <= Protection against ArrayIndexOutOfBounds exception due to "time out of bounds"
			}
			return new IdxField(bytePtrs[index], timePtrs[index]);
		}
	}

	/**
	 * Simple helper class to read the index fields in a timeline.
	 * 
	 * @author sacha
	 *
	 */
	public static class IdxField {
		// TODO: rethink if these should be public fields or if we should add accessors.
		public long bytePtr = 0;
		public long timePtr = 0;

		public IdxField() {
			bytePtr = 0;
			timePtr = 0;
		}

		public IdxField(long setBytePtr, long setTimePtr) {
			bytePtr = setBytePtr;
			timePtr = setTimePtr;
		}
	}

	/**
	 * 
	 * Simple helper class to load the processing header.
	 * 
	 * @author sacha
	 *
	 */
	public static class ProcHeader {

		private String procHeader = null;

		/****************/
		/* CONSTRUCTORS */
		/****************/

		/**
		 * Constructor which loads the procHeader from a RandomAccessFile. Fundamental guarantee: after creation, the ProcHeader
		 * object has a non-null (but possibly empty) string content.
		 * 
		 * @param raf
		 *            input from which to load the processing header. Must not be null and must be positioned so that a processing
		 *            header can be read from it.
		 *
		 * @throws IOException
		 *             if no proc header can be read at the current position.
		 */
		private ProcHeader(RandomAccessFile raf) throws IOException {
			loadProcHeader(raf);
		}

		/**
		 * Constructor which loads the procHeader from a RandomAccessFile Fundamental guarantee: after creation, the ProcHeader
		 * object has a non-null (but possibly empty) string content.
		 * 
		 * @param raf
		 *            input from which to load the processing header. Must not be null and must be positioned so that a processing
		 *            header can be read from it.
		 *
		 * @throws BufferUnderflowException
		 *             , UTFDataFormatException if no proc header can be read at the current position.
		 */
		private ProcHeader(ByteBuffer bb) throws BufferUnderflowException, UTFDataFormatException {
			loadProcHeader(bb);
		}

		/**
		 * Constructor which makes the procHeader from a String. Fundamental guarantee: after creation, the ProcHeader object has
		 * a non-null (but possibly empty) string content.
		 * 
		 * @param procStr
		 *            a non-null string representing the contents of the ProcHeader.
		 * @throws NullPointerException
		 *             if procStr is null
		 * */
		public ProcHeader(String procStr) {
			if (procStr == null) {
				throw new NullPointerException("null argument");
			}
			procHeader = procStr;
		}

		/****************/
		/* ACCESSORS */
		/****************/

		/**
		 * Return the string length of the proc header.
		 * 
		 * @return a non-negative int representling the string length of the proc header.
		 */
		public int getCharSize() {
			assert procHeader != null;
			return procHeader.length();
		}

		/**
		 * Get the string content of the proc header.
		 * 
		 * @return a non-null string representing the string content of the proc header.
		 */
		public String getString() {
			assert procHeader != null;
			return procHeader;
		}

		/*****************/
		/* I/O METHODS */
		/*****************/

		/**
		 * Method which loads the header from a RandomAccessFile.
		 * 
		 * @param rafIn
		 *            file to read from, must not be null.
		 * @throws IOException
		 *             if no proc header can be read at the current position.
		 */
		private void loadProcHeader(RandomAccessFile rafIn) throws IOException {
			assert rafIn != null : "null argument";
			procHeader = rafIn.readUTF();
			assert procHeader != null;
		}

		/**
		 * Method which loads the header from a byte buffer.
		 * 
		 * @param bb
		 *            byte buffer to read from, must not be null.
		 * @throws BufferUnderflowException
		 *             , UTFDataFormatException if no proc header can be read at the current position.
		 * @throws UTFDataFormatException
		 *             UTFDataFormatException
		 */
		private void loadProcHeader(ByteBuffer bb) throws BufferUnderflowException, UTFDataFormatException {
			procHeader = StreamUtils.readUTF(bb);
			assert procHeader != null;
		}

		/**
		 * Method which writes the proc header to a RandomAccessFile.
		 * 
		 * @param rafIn
		 *            rafIn
		 * @throws IOException
		 *             IOException
		 * @return the number of written bytes.
		 * */
		public long dump(RandomAccessFile rafIn) throws IOException {
			long before = rafIn.getFilePointer();
			rafIn.writeUTF(procHeader);
			long after = rafIn.getFilePointer();
			return after - before;
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy