All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.WaveTimelineMaker Maven / Gradle / Ivy

The newest version!
/**
 * Portions Copyright 2006 DFKI GmbH.
 * Portions Copyright 2001 Sun Microsystems, Inc.
 * Portions Copyright 1999-2001 Language Technologies Institute, 
 * Carnegie Mellon University.
 * All Rights Reserved.  Use is subject to license terms.
 * 
 * Permission is hereby granted, free of charge, to use and distribute
 * this software and its documentation without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of this work, and to
 * permit persons to whom this work is furnished to do so, subject to
 * the following conditions:
 * 
 * 1. The code must retain the above copyright notice, this list of
 *    conditions and the following disclaimer.
 * 2. Any modifications must be clearly marked as such.
 * 3. Original authors' names are not deleted.
 * 4. The authors' names are not used to endorse or promote products
 *    derived from this software without specific prior written
 *    permission.
 *
 * DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
 * CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
 * THIS SOFTWARE.
 */
package marytts.tools.voiceimport;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.text.DecimalFormat;
import java.util.SortedMap;
import java.util.TreeMap;

import marytts.util.data.Datagram;
import marytts.util.data.ESTTrackReader;

/**
 * The WaveTimelineMaker class takes a database root directory and a list of basenames, and split the waveforms as datagrams to be
 * stored in a timeline in Mary format.
 * 
 * @author sacha
 */
public class WaveTimelineMaker extends VoiceImportComponent {

	protected DatabaseLayout db = null;
	protected int percent = 0;
	public final String WAVETIMELINE = "WaveTimelineMaker.waveTimeline";

	public final String PMDIR = "db.pmDir";
	public final String PMEXT = "db.pmExtension";

	public final String getName() {
		return "WaveTimelineMaker";
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			props.put(WAVETIMELINE, db.getProp(db.FILEDIR) + "timeline_waveforms" + db.getProp(db.MARYEXT));
		}
		return props;
	}

	protected void setupHelp() {
		props2Help = new TreeMap();
		props2Help.put(WAVETIMELINE, "file containing all wave files. Will be created by this module");
	}

	/**
	 * Reads and concatenates a list of waveforms into one single timeline file.
	 * 
	 */
	public boolean compute() {
		System.out.println("---- Making a pitch synchronous waveform timeline\n\n");

		/* Export the basename list into an array of strings */
		String[] baseNameArray = bnl.getListAsArray();
		System.out.println("Processing [" + baseNameArray.length + "] utterances.\n");

		try {
			/*
			 * 1) Determine the reference sampling rate as being the sample rate of the first encountered wav file
			 */
			WavReader wav = new WavReader(db.getProp(db.WAVDIR) + baseNameArray[0] + db.getProp(db.WAVEXT));
			int globSampleRate = wav.getSampleRate();
			System.out.println("---- Detected a global sample rate of: [" + globSampleRate + "] Hz.");

			System.out.println("---- Folding the wav files according to the pitchmarks...");

			/* 2) Open the destination timeline file */

			/* Make the file name */
			String waveTimelineName = getProp(WAVETIMELINE);
			System.out.println("Will create the waveform timeline in file [" + waveTimelineName + "].");

			/* Processing header: */
			String processingHeader = "\n";

			/* Instantiate the TimelineWriter: */
			TimelineWriter waveTimeline = new TimelineWriter(waveTimelineName, processingHeader, globSampleRate, 0.1);

			/* 3) Write the datagrams and feed the index */

			float totalDuration = 0.0f; // Accumulator for the total timeline duration
			long totalTime = 0l;
			int numDatagrams = 0;

			/* For each EST track file: */
			ESTTrackReader pmFile = null;
			for (int i = 0; i < baseNameArray.length; i++) {
				percent = 100 * i / baseNameArray.length;

				/* - open+load */
				System.out.println(baseNameArray[i]);
				pmFile = new ESTTrackReader(db.getProp(PMDIR) + baseNameArray[i] + db.getProp(PMEXT));
				totalDuration += pmFile.getTimeSpan();
				wav = new WavReader(db.getProp(db.WAVDIR) + baseNameArray[i] + db.getProp(db.WAVEXT));
				short[] wave = wav.getSamples();
				/* - Reset the frame locations in the local file */
				int frameStart = 0;
				int frameEnd = 0;
				int duration = 0;
				long localTime = 0l;
				/* - For each frame in the WAV file: */
				for (int f = 0; f < pmFile.getNumFrames(); f++) {

					/* Locate the corresponding segment in the wave file */
					frameStart = frameEnd;
					frameEnd = (int) ((double) pmFile.getTime(f) * (double) (globSampleRate));
					assert frameEnd <= wave.length : "Frame ends after end of wave data: " + frameEnd + " > " + wave.length;

					duration = frameEnd - frameStart;
					ByteArrayOutputStream buff = new ByteArrayOutputStream(2 * duration);
					DataOutputStream subWave = new DataOutputStream(buff);
					for (int k = 0; k < duration; k++) {
						subWave.writeShort(wave[frameStart + k]);
					}

					// Handle the case when the last pitch marks falls beyond the end of the signal

					/* Feed the datagram to the timeline */
					waveTimeline.feed(new Datagram(duration, buff.toByteArray()), globSampleRate);
					totalTime += duration;
					localTime += duration;
					numDatagrams++;
				}
				// System.out.println( baseNameArray[i] + " -> pm file says [" + localTime + "] samples, wav file says ["+
				// wav.getNumSamples() + "] samples." );
			}
			waveTimeline.close();

			System.out.println("---- Done.");

			/* 7) Print some stats and close the file */
			System.out.println("---- Waveform timeline result:");
			System.out.println("Number of files scanned: " + baseNameArray.length);
			System.out.println("Total speech duration: [" + totalTime + "] samples / ["
					+ ((float) (totalTime) / (float) (globSampleRate)) + "] seconds.");
			System.out.println("(Speech duration approximated from EST Track float times: [" + totalDuration + "] seconds.)");
			System.out.println("Number of frames: [" + numDatagrams + "].");
			System.out.println("Size of the index: [" + waveTimeline.getIndex().getNumIdx() + "] ("
					+ (waveTimeline.getIndex().getNumIdx() * 16) + " bytes, i.e. "
					+ new DecimalFormat("#.##").format((double) (waveTimeline.getIndex().getNumIdx()) * 16.0 / 1048576.0)
					+ " megs).");
			System.out.println("---- Waveform timeline done.");

		} catch (SecurityException e) {
			System.err.println("Error: you don't have write access to the target database directory.");
			e.printStackTrace();
			return false;
		} catch (Exception e) {
			e.printStackTrace();
			System.err.println(e);
			return false;
		}

		return (true);
	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return percent;
	}

	public static void main(String[] args) throws Exception {
		VoiceImportComponent vic = new WaveTimelineMaker();
		DatabaseLayout db = new DatabaseLayout(vic);
		vic.compute();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy