All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.vocalizations.VocalizationIntonationWriter Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */

package marytts.tools.voiceimport.vocalizations;

import java.io.BufferedOutputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.SortedMap;
import java.util.TreeMap;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;

import marytts.exceptions.MaryConfigurationException;
import marytts.signalproc.analysis.F0TrackerAutocorrelationHeuristic;
import marytts.signalproc.analysis.PitchFileHeader;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.analysis.SPTKPitchReaderWriter;
import marytts.tools.voiceimport.DatabaseLayout;
import marytts.tools.voiceimport.VoiceImportComponent;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.MaryHeader;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.io.BasenameList;
import marytts.util.math.Polynomial;
import marytts.util.signal.SignalProcUtils;
import marytts.vocalizations.VocalizationIntonationReader;
import marytts.vocalizations.VocalizationUnitFileReader;

/**
 * Vocalization intonation writer into a time-line file This class can create a timeline file with intonation contours and thier
 * polynomial coeffs
 * 
 * @author sathish pammi
 *
 */
public class VocalizationIntonationWriter extends VoiceImportComponent {

	protected String vocalizationsDir;
	protected BasenameList bnlVocalizations;
	protected VocalizationUnitFileReader listenerUnits;

	protected DatabaseLayout db = null;
	protected int percent = 0;

	public final String PITCHDIR = getName() + ".pitchDir";
	public final String WAVEDIR = getName() + ".inputWaveDir";
	public final String POLYORDER = getName() + ".polynomialOrder";
	public final String ISEXTERNALF0 = getName() + ".isExternalF0Usage";
	public final String EXTERNALF0FORMAT = getName() + ".externalF0Format";
	public final String EXTERNALEXT = getName() + ".externalF0Extention";
	public final String UNITFILE = getName() + ".unitFile";
	public final String SKIPSIZE = getName() + ".skipSize";
	public final String WINDOWSIZE = getName() + ".windowSize";
	public final String F0TIMELINE = getName() + ".intonationTimeLineFile";
	public final String F0FEATDEF = getName() + ".intonationFeatureDefinition";

	public String getName() {
		return "VocalizationIntonationWriter";
	}

	@Override
	protected void initialiseComp() {

		String timelineDir = db.getProp(db.VOCALIZATIONSDIR) + File.separator + "files";
		if (!(new File(timelineDir)).exists()) {
			System.out.println("vocalizations/files directory does not exist; ");
			if (!(new File(timelineDir)).mkdirs()) {
				throw new Error("Could not create vocalizations/files");
			}
			System.out.println("Created successfully.\n");
		}

		try {
			String basenameFile = db.getProp(db.VOCALIZATIONSDIR) + File.separator + "basenames.lst";
			if ((new File(basenameFile)).exists()) {
				System.out.println("Loading basenames of vocalizations from '" + basenameFile + "' list...");
				bnlVocalizations = new BasenameList(basenameFile);
				System.out.println("Found " + bnlVocalizations.getLength() + " vocalizations in basename list");
			} else {
				String vocalWavDir = db.getProp(db.VOCALIZATIONSDIR) + File.separator + "wav";
				System.out.println("Loading basenames of vocalizations from '" + vocalWavDir + "' directory...");
				bnlVocalizations = new BasenameList(vocalWavDir, ".wav");
				System.out.println("Found " + bnlVocalizations.getLength() + " vocalizations in " + vocalWavDir + " directory");
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			props.put(WAVEDIR, db.getProp(db.VOCALIZATIONSDIR) + File.separator + "wav");
			props.put(UNITFILE, db.getProp(db.VOCALIZATIONSDIR) + File.separator + "files" + File.separator
					+ "vocalization_units" + db.getProp(db.MARYEXT));
			props.put(POLYORDER, "3");
			props.put(ISEXTERNALF0, "true");
			props.put(EXTERNALF0FORMAT, "sptk");
			props.put(EXTERNALEXT, ".lf0");
			props.put(PITCHDIR, db.getProp(db.VOCALIZATIONSDIR) + File.separator + "lf0");
			props.put(SKIPSIZE, "0.005");
			props.put(WINDOWSIZE, "0.005");
			props.put(F0TIMELINE, db.getProp(db.VOCALIZATIONSDIR) + File.separator + "files" + File.separator
					+ "vocalization_intonation" + db.getProp(db.MARYEXT));
			props.put(F0FEATDEF, db.getProp(db.VOCALIZATIONSDIR) + File.separator + "features" + File.separator
					+ "vocalization_f0_feature_definition.txt");
		}
		return props;
	}

	protected void setupHelp() {
		props2Help = new TreeMap();

	}

	/**
	 * Reads and concatenates a list of waveforms into one single timeline file.
	 * 
	 * @throws IOException
	 *             IOException
	 * @throws MaryConfigurationException
	 *             MaryConfigurationException
	 */
	@Override
	public boolean compute() throws IOException, MaryConfigurationException {

		listenerUnits = new VocalizationUnitFileReader(getProp(UNITFILE));

		// write features into timeline file
		DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(new File(getProp(F0TIMELINE)))));
		writeHeaderTo(out);
		writeUnitFeaturesTo(out);
		out.close();

		VocalizationIntonationReader tester = new VocalizationIntonationReader(getProp(F0TIMELINE));
		int unitsOnDisk = tester.getNumberOfUnits();
		if (unitsOnDisk == listenerUnits.getNumberOfUnits()) {
			System.out.println("Can read right number of units");
			return true;
		} else {
			System.out.println("Read wrong number of units: " + unitsOnDisk);
			return false;
		}
	}

	/**
	 * 
	 * @param out
	 *            out
	 * @throws IOException
	 *             IOException
	 */
	protected void writeUnitFeaturesTo(DataOutput out) throws IOException {

		int numUnits = listenerUnits.getNumberOfUnits();
		float windowSize = new Float(getProp(WINDOWSIZE)).floatValue();
		float skipSize = new Float(getProp(SKIPSIZE)).floatValue();

		out.writeFloat(windowSize);
		out.writeFloat(skipSize);
		out.writeInt(numUnits);

		for (int i = 0; i < bnlVocalizations.getLength(); i++) {

			double[] f0Array = null;

			try {
				f0Array = getVocalizationF0(bnlVocalizations.getName(i), false);
			} catch (UnsupportedAudioFileException e) {
				e.printStackTrace();
			}

			// write coeffs followed by its order
			double[] coeffs = getPolynomialCoeffs(f0Array);
			if (coeffs == null) {
				out.writeInt(0);
			} else {
				out.writeInt(coeffs.length);
				for (int j = 0; j < coeffs.length; j++) {
					out.writeFloat((float) coeffs[j]);
				}
			}

			// write f0 Array followed by f0 contour array size
			if (f0Array == null) {
				out.writeInt(0);
			} else {
				out.writeInt(f0Array.length);
				for (int j = 0; j < f0Array.length; j++) {
					out.writeFloat((float) f0Array[j]);
				}
			}
		}
	}

	/**
	 * get f0 contour of vocalization f0
	 * 
	 * @param baseName
	 *            baseName
	 * @param doInterpolate
	 *            doInterpolate
	 * @return interpolateF0Array(f0Array) if doInterpolate, f0Array otherwise
	 * @throws UnsupportedAudioFileException
	 *             UnsupportedAudioFileException
	 * @throws IOException
	 *             IOException
	 */
	private double[] getVocalizationF0(String baseName, boolean doInterpolate) throws UnsupportedAudioFileException, IOException {

		double[] f0Array = null;

		if ("true".equals(getProp(ISEXTERNALF0))) {

			String externalFormat = getProp(EXTERNALF0FORMAT);
			String externalExt = getProp(EXTERNALEXT);
			System.out.println("Loading f0 contour from file : " + getProp(PITCHDIR) + File.separator + baseName + externalExt);
			if ("sptk".equals(externalFormat)) {
				String fileName = getProp(PITCHDIR) + File.separator + baseName + externalExt;
				SPTKPitchReaderWriter sprw = new SPTKPitchReaderWriter(fileName);
				f0Array = sprw.getF0Contour();
			} else if ("ptc".equals(externalFormat)) {
				String fileName = getProp(PITCHDIR) + File.separator + baseName + externalExt;
				PitchReaderWriter sprw = new PitchReaderWriter(fileName);
				f0Array = sprw.contour;
			}
		} else {
			PitchFileHeader params = new PitchFileHeader();
			F0TrackerAutocorrelationHeuristic tracker = new F0TrackerAutocorrelationHeuristic(params);
			String waveFile = db.getProp(db.VOCALIZATIONSDIR) + File.separator + "wav" + baseName + db.getProp(db.WAVEXT);
			System.out.println("Computing f0 contour from wave file: " + waveFile);
			AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(waveFile));

			// Enforce PCM_SIGNED encoding
			if (!inputAudio.getFormat().getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED)) {
				inputAudio = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, inputAudio);
			}

			int audioSampleRate = (int) inputAudio.getFormat().getSampleRate();
			AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
			double[] sentenceAudio = signal.getAllData();
			tracker.pitchAnalyze(new BufferedDoubleDataSource(sentenceAudio));
			// double frameShiftTime = tracker.getSkipSizeInSeconds();
			f0Array = tracker.getF0Contour();
		}

		if (doInterpolate) {
			return interpolateF0Array(f0Array);
		}

		return f0Array;
	}

	/**
	 * to get polynomial coeffs of f0 contour
	 * 
	 * @param f0Array
	 *            f0Array
	 * @return null if f0Array == null, coeffs otherwise
	 */
	private double[] getPolynomialCoeffs(double[] f0Array) {

		if (f0Array == null) {
			return null;
		}

		f0Array = cutStartEndUnvoicedSegments(f0Array);
		double[] f0AndInterpolate = interpolateF0Array(f0Array);
		int polynomialOrder = (new Integer(getProp(POLYORDER))).intValue();
		double[] coeffs = Polynomial.fitPolynomial(f0AndInterpolate, polynomialOrder);
		return coeffs;
	}

	/**
	 * to interpolate F0 contour values
	 * 
	 * @param f0Array
	 *            f0Array
	 * @return null if f0Array == null, f0AndInterpolate
	 */
	private double[] interpolateF0Array(double[] f0Array) {

		if (f0Array == null) {
			return null;
		}

		for (int j = 0; j < f0Array.length; j++) {
			if (f0Array[j] == 0) {
				f0Array[j] = Double.NaN;
			}
		}
		if (f0Array.length >= 3) {
			f0Array = SignalProcUtils.medianFilter(f0Array, 5);
		}

		double[] f0AndInterpol;
		double[] interpol = new double[f0Array.length];
		Arrays.fill(interpol, Double.NaN);
		f0AndInterpol = new double[f0Array.length];
		int iLastValid = -1;
		for (int j = 0; j < f0Array.length; j++) {
			if (!Double.isNaN(f0Array[j])) { // a valid value
				if (iLastValid == j - 1) {
					// no need to interpolate
					f0AndInterpol[j] = f0Array[j];
				} else {
					// need to interpolate
					double prevF0;
					if (iLastValid < 0) { // we don't have a previous value -- use current one
						prevF0 = f0Array[j];
					} else {
						prevF0 = f0Array[iLastValid];
					}
					double delta = (f0Array[j] - prevF0) / (j - iLastValid);
					double f0 = prevF0;
					for (int k = iLastValid + 1; k < j; k++) {
						f0 += delta;
						interpol[k] = f0;
						f0AndInterpol[k] = f0;
					}
				}
				iLastValid = j;
			}
		}

		double[] f0AndInterpolate = combineF0andInterpolate(f0Array, interpol);
		return f0AndInterpolate;
	}

	/**
	 * cut begin-end unvoiced segments
	 * 
	 * @param array
	 *            array
	 * @return null if array == null, newArray
	 */
	private double[] cutStartEndUnvoicedSegments(double[] array) {

		if (array == null)
			return null;

		int startIndex = 0;
		int endIndex = array.length;

		// find start index
		for (int i = 0; i < array.length; i++) {
			if (array[i] != 0) {
				startIndex = i;
				break;
			}
		}

		// find end index
		for (int i = (array.length - 1); i > 0; i--) {
			if (array[i] != 0) {
				endIndex = i;
				break;
			}
		}

		int newArraySize = endIndex - startIndex;

		double[] newArray = new double[newArraySize];
		System.arraycopy(array, startIndex, newArray, 0, newArraySize);

		/*
		 * for ( int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy