All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.vocalizations.VocalizationF0PolyFeatureFileWriter Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.voiceimport.vocalizations;

import java.awt.Color;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;

import javax.sound.sampled.AudioFormat;
import javax.swing.JFrame;

import marytts.exceptions.MaryConfigurationException;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureVector;
import marytts.signalproc.analysis.F0TrackerAutocorrelationHeuristic;
import marytts.signalproc.analysis.PitchFileHeader;
import marytts.signalproc.display.FunctionGraph;
import marytts.tools.voiceimport.DatabaseLayout;
import marytts.tools.voiceimport.VoiceImportComponent;
import marytts.unitselection.data.FeatureFileReader;
import marytts.unitselection.data.TimelineReader;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.Datagram;
import marytts.util.data.DatagramDoubleDataSource;
import marytts.util.data.MaryHeader;
import marytts.util.data.audio.AudioPlayer;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.ArrayUtils;
import marytts.util.math.Polynomial;
import marytts.util.signal.SignalProcUtils;
import marytts.vocalizations.VocalizationUnitFileReader;

/**
 * NOT COMPLETED (USEFUL FOR FUTURE)
 * 
 * @author sathish
 *
 */
public class VocalizationF0PolyFeatureFileWriter extends VoiceImportComponent {
	protected File maryDir;
	protected FeatureFileReader features;
	protected FeatureDefinition inFeatureDefinition;
	protected File outFeatureFile;
	protected FeatureDefinition outFeatureDefinition;
	protected VocalizationUnitFileReader listenerUnits;
	protected TimelineReader audio;
	protected DatabaseLayout db = null;
	protected int percent = 0;

	private final String name = "F0PolynomialFeatureFileWriter";
	public final String UNITFILE = name + ".unitFile";
	public final String WAVETIMELINE = name + ".waveTimeLine";
	public final String FEATUREFILE = name + ".featureFile";
	public final String F0FEATUREFILE = name + ".f0FeatureFile";
	public final String POLYNOMORDER = name + ".polynomOrder";
	public final String SHOWGRAPH = name + ".showGraph";
	public final String INTERPOLATE = name + ".interpolate";
	public final String MINPITCH = name + ".minPitch";
	public final String MAXPITCH = name + ".maxPitch";

	public String getName() {
		return name;
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			String fileDir = db.getProp(db.FILEDIR);
			String maryExt = db.getProp(db.MARYEXT);
			props.put(UNITFILE, fileDir + "halfphoneUnits" + maryExt);
			props.put(WAVETIMELINE, fileDir + "timeline_waveforms" + maryExt);
			props.put(FEATUREFILE, fileDir + "halfphoneFeatures" + maryExt);
			props.put(F0FEATUREFILE, fileDir + "vocalizationF0Polynomials" + maryExt);
			props.put(POLYNOMORDER, "3");
			props.put(SHOWGRAPH, "false");
			props.put(INTERPOLATE, "true");
			if (db.getProp(db.GENDER).equals("female")) {
				props.put(MINPITCH, "100");
				props.put(MAXPITCH, "600");
			} else {
				props.put(MINPITCH, "60");
				props.put(MAXPITCH, "400");
			}
		}
		return props;
	}

	protected void setupHelp() {
		if (props2Help == null) {
			props2Help = new TreeMap();
			props2Help.put(UNITFILE, "file containing all halfphone units");
			props2Help.put(WAVETIMELINE, "file containing all waveforms or models that can genarate them");
			props2Help.put(FEATUREFILE, "file containing all halfphone units and their target cost features");
			props2Help.put(F0FEATUREFILE, "file containing syllable-based polynom coefficients on vowels");
			props2Help.put(POLYNOMORDER, "order of the polynoms used to approximate syllable F0 curves");
			props2Help.put(SHOWGRAPH, "whether to show a graph with f0 aproximations for each sentence");
			props2Help.put(INTERPOLATE, "whether to interpolate F0 across unvoiced regions");
			props2Help.put(MINPITCH, "minimum value for the pitch (in Hz). Default: female 100, male 75");
			props2Help.put(MAXPITCH, "maximum value for the pitch (in Hz). Default: female 500, male 300");
		}
	}

	@Override
	public boolean compute() throws IOException, MaryConfigurationException {
		logger.info("F0 polynomial feature file writer started.");

		maryDir = new File(db.getProp(db.FILEDIR));
		if (!maryDir.exists()) {
			maryDir.mkdirs();
			System.out.println("Created the output directory [" + (db.getProp(db.FILEDIR)) + "] to store the feature file.");
		}
		listenerUnits = new VocalizationUnitFileReader(getProp(UNITFILE));
		audio = new TimelineReader(getProp(WAVETIMELINE));

		// features = new FeatureFileReader(getProp(FEATUREFILE));
		// inFeatureDefinition = features.getFeatureDefinition();
		StringWriter sw = new StringWriter();
		PrintWriter pw = new PrintWriter(sw);
		pw.println(FeatureDefinition.BYTEFEATURES); // no byte features
		pw.println(FeatureDefinition.SHORTFEATURES); // no short features
		pw.println(FeatureDefinition.CONTINUOUSFEATURES);
		int polynomOrder = Integer.parseInt(getProp(POLYNOMORDER));
		for (int i = polynomOrder; i >= 0; i--) {
			pw.println("0 linear | f0contour_a" + i);
		}
		pw.close();
		String fd = sw.toString();
		logger.debug("Generated the following feature definition:");
		logger.debug(fd);
		StringReader sr = new StringReader(fd);
		BufferedReader br = new BufferedReader(sr);
		outFeatureDefinition = new FeatureDefinition(br, true);

		outFeatureFile = new File(getProp(F0FEATUREFILE));
		DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outFeatureFile)));
		writeHeaderTo(out);
		writeUnitFeaturesTo(out);
		out.close();
		logger.debug("Number of processed units: " + listenerUnits.getNumberOfUnits());

		FeatureFileReader tester = FeatureFileReader.getFeatureFileReader(getProp(F0FEATUREFILE));
		int unitsOnDisk = tester.getNumberOfUnits();
		if (unitsOnDisk == listenerUnits.getNumberOfUnits()) {
			System.out.println("Can read right number of units");
			return true;
		} else {
			System.out.println("Read wrong number of units: " + unitsOnDisk);
			return false;
		}
	}

	/**
	 * @param out
	 *            out
	 * @throws IOException
	 *             IOException
	 * @throws UnsupportedEncodingException
	 *             UnsupportedEncodingException
	 * @throws FileNotFoundException
	 *             FileNotFoundException
	 */
	protected void writeUnitFeaturesTo(DataOutput out) throws IOException, UnsupportedEncodingException, FileNotFoundException {
		int numUnits = listenerUnits.getNumberOfUnits();
		int unitSampleRate = listenerUnits.getSampleRate();
		int audioSampleRate = audio.getSampleRate();
		boolean showGraph = Boolean.parseBoolean(getProp(SHOWGRAPH));
		boolean interpolate = Boolean.parseBoolean(getProp(INTERPOLATE));
		int polynomOrder = Integer.parseInt(getProp(POLYNOMORDER));
		float[] zeros = new float[polynomOrder + 1];
		int unitIndex = 0;

		out.writeInt(numUnits);
		logger.debug("Number of units : " + numUnits);

		FeatureDefinition featureDefinition = features.getFeatureDefinition();
		int fiPhoneme = featureDefinition.getFeatureIndex("phone");
		byte fvPhoneme_0 = featureDefinition.getFeatureValueAsByte(fiPhoneme, "0");
		byte fvPhoneme_Silence = featureDefinition.getFeatureValueAsByte(fiPhoneme, "_");
		int fiLR = featureDefinition.getFeatureIndex("halfphone_lr");
		byte fvLR_L = featureDefinition.getFeatureValueAsByte(fiLR, "L");
		byte fvLR_R = featureDefinition.getFeatureValueAsByte(fiLR, "R");
		int fiSylStart = featureDefinition.getFeatureIndex("segs_from_syl_start");
		int fiSylEnd = featureDefinition.getFeatureIndex("segs_from_syl_end");
		int fiSentenceStart = featureDefinition.getFeatureIndex("words_from_sentence_start");
		int fiSentenceEnd = featureDefinition.getFeatureIndex("words_from_sentence_end");
		int fiWordStart = featureDefinition.getFeatureIndex("segs_from_word_start");
		int fiWordEnd = featureDefinition.getFeatureIndex("segs_from_word_end");
		int fiVowel = featureDefinition.getFeatureIndex("ph_vc");
		byte fvVowel_Plus = featureDefinition.getFeatureValueAsByte(fiVowel, "+");

		boolean haveUnitLogF0 = false;
		int fiUnitLogF0 = -1;
		int fiUnitLogF0delta = -1;
		if (featureDefinition.hasFeature("unit_logf0") && featureDefinition.hasFeature("unit_logf0delta")) {
			haveUnitLogF0 = true;
			fiUnitLogF0 = featureDefinition.getFeatureIndex("unit_logf0");
			fiUnitLogF0delta = featureDefinition.getFeatureIndex("unit_logf0delta");
		}

		FunctionGraph f0Graph = null;
		JFrame jf = null;
		int iSentenceStart = -1;
		int iSentenceEnd = -1;
		List iSylStarts = new ArrayList();
		List iSylEnds = new ArrayList();
		List iSylVowels = new ArrayList();
		if (showGraph) {
			f0Graph = new FunctionGraph(0, 1, new double[1]);
			f0Graph.setYMinMax(50, 300);
			f0Graph.setPrimaryDataSeriesStyle(Color.BLUE, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_FULLCIRCLE);
			jf = f0Graph.showInJFrame("Sentence", false, true);
		}

		for (int i = 0; i < numUnits; i++) {
			percent = 100 * i / numUnits;
			FeatureVector fv = features.getFeatureVector(i);
			// System.out.print(featureDefinition.getFeatureValueAsString("phone", fv));
			// if (fv.getByteFeature(fiPhoneme) == fvPhoneme_0
			// || fv.getByteFeature(fiPhoneme) == fvPhoneme_Silence) continue;
			if (iSentenceStart == -1 && fv.getByteFeature(fiSentenceStart) == 0 && fv.getByteFeature(fiWordStart) == 0
					&& fv.getByteFeature(fiLR) == fvLR_L) { // first unit in sentence
				iSentenceStart = i;
				iSylStarts.clear();
				iSylEnds.clear();
				iSylVowels.clear();
				// System.out.print(", is sentence start");
			}
			// Silence and edge units cannot be part of syllables, but they can
			// mark start/end of sentence:
			if (fv.getByteFeature(fiPhoneme) != fvPhoneme_0 && fv.getByteFeature(fiPhoneme) != fvPhoneme_Silence) {
				if (fv.getByteFeature(fiSylStart) == 0 && fv.getByteFeature(fiLR) == fvLR_L) { // first segment in syllable
					if (iSylStarts.size() > iSylEnds.size()) {
						System.err.println("Syllable ends before other syllable starts!");
					}
					iSylStarts.add(i);
					// System.out.print(", is syl start");
				}
				if (fv.getByteFeature(fiVowel) == fvVowel_Plus && iSylVowels.size() < iSylStarts.size()) { // first vowel unit in
																											// syllable
					iSylVowels.add(i);
					// System.out.print(", is vowel");
				}
				if (fv.getByteFeature(fiSylEnd) == 0 && fv.getByteFeature(fiLR) == fvLR_R) { // last segment in syllable
					iSylEnds.add(i);
					// System.out.print(", is syl end");
					assert iSylStarts.size() == iSylEnds.size();
					if (iSylVowels.size() < iSylEnds.size()) {
						// System.err.println("Syllable contains no vowel -- skipping");
						iSylStarts.remove(iSylStarts.size() - 1);
						iSylEnds.remove(iSylEnds.size() - 1);
					}
				}
			}
			if (iSentenceStart != -1 && fv.getByteFeature(fiSentenceEnd) == 0 && fv.getByteFeature(fiWordEnd) == 0
					&& fv.getByteFeature(fiLR) == fvLR_R) { // last unit in sentence
				iSentenceEnd = i;
				// System.out.print(", is sentence end");
				if (iSylEnds.size() < iSylStarts.size()) {
					System.err.println("Last syllable in sentence is not properly closed");
					iSylEnds.add(i);
				}
			}
			// System.out.println();

			if (iSentenceStart >= 0 && iSentenceEnd >= iSentenceStart && iSylVowels.size() > 0) {
				assert iSylStarts.size() == iSylEnds.size() : "Have " + iSylStarts.size() + " syllable starts, but "
						+ iSylEnds.size() + " syllable ends!";
				assert iSylStarts.size() == iSylVowels.size();
				long tsSentenceStart = listenerUnits.getUnit(iSentenceStart).startTime;
				long tsSentenceEnd = listenerUnits.getUnit(iSentenceEnd).startTime + listenerUnits.getUnit(iSentenceEnd).duration;
				long tsSentenceDuration = tsSentenceEnd - tsSentenceStart;
				Datagram[] sentenceData = audio.getDatagrams(tsSentenceStart, tsSentenceDuration);
				DatagramDoubleDataSource ddds = new DatagramDoubleDataSource(sentenceData);
				double[] sentenceAudio = ddds.getAllData();
				AudioPlayer ap = null;
				if (showGraph) {
					ap = new AudioPlayer(new DDSAudioInputStream(new BufferedDoubleDataSource(sentenceAudio), new AudioFormat(
							AudioFormat.Encoding.PCM_SIGNED, audioSampleRate, // samples per second
							16, // bits per sample
							1, // mono
							2, // nr. of bytes per frame
							audioSampleRate, // nr. of frames per second
							true))); // big-endian;))
					ap.start();
				}
				PitchFileHeader params = new PitchFileHeader();
				params.fs = audioSampleRate;
				params.minimumF0 = Double.parseDouble(getProp(MINPITCH));
				params.maximumF0 = Double.parseDouble(getProp(MAXPITCH));
				F0TrackerAutocorrelationHeuristic tracker = new F0TrackerAutocorrelationHeuristic(params);
				tracker.pitchAnalyze(new BufferedDoubleDataSource(sentenceAudio));
				double frameShiftTime = tracker.getSkipSizeInSeconds();
				double[] f0Array = tracker.getF0Contour();
				if (f0Array != null) {
					for (int j = 0; j < f0Array.length; j++) {
						if (f0Array[j] == 0) {
							f0Array[j] = Double.NaN;
						}
					}
					if (f0Array.length >= 3) {
						f0Array = SignalProcUtils.medianFilter(f0Array, 5);
					}
					if (showGraph) {
						f0Graph.updateData(0, tsSentenceDuration / (double) audioSampleRate / f0Array.length, f0Array);
						jf.repaint();
					}

					double[] f0AndInterpol;
					if (interpolate) {
						double[] interpol = new double[f0Array.length];
						Arrays.fill(interpol, Double.NaN);
						f0AndInterpol = new double[f0Array.length];
						int iLastValid = -1;
						for (int j = 0; j < f0Array.length; j++) {
							if (!Double.isNaN(f0Array[j])) { // a valid value
								if (iLastValid == j - 1) {
									// no need to interpolate
									f0AndInterpol[j] = f0Array[j];
								} else {
									// need to interpolate
									double prevF0;
									if (iLastValid < 0) { // we don't have a previous value -- use current one
										prevF0 = f0Array[j];
									} else {
										prevF0 = f0Array[iLastValid];
									}
									double delta = (f0Array[j] - prevF0) / (j - iLastValid);
									double f0 = prevF0;
									for (int k = iLastValid + 1; k < j; k++) {
										f0 += delta;
										interpol[k] = f0;
										f0AndInterpol[k] = f0;
									}
								}
								iLastValid = j;
							}
						}
						if (showGraph) {
							f0Graph.addDataSeries(interpol, Color.GREEN, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_EMPTYCIRCLE);
							jf.repaint();
						}
					} else {
						f0AndInterpol = f0Array.clone();
					}

					for (int j = 0; j < f0AndInterpol.length; j++) {
						if (f0AndInterpol[j] == 0)
							f0AndInterpol[j] = Double.NaN;
						else
							f0AndInterpol[j] = Math.log(f0AndInterpol[j]);
					}
					double[] approx = new double[f0Array.length];
					Arrays.fill(approx, Double.NaN);
					for (int s = 0; s < iSylStarts.size(); s++) {
						long tsSylStart = listenerUnits.getUnit(iSylStarts.get(s)).startTime;
						long tsSylEnd = listenerUnits.getUnit(iSylEnds.get(s)).startTime
								+ listenerUnits.getUnit(iSylEnds.get(s)).duration;
						long tsSylDuration = tsSylEnd - tsSylStart;
						int iSylVowel = iSylVowels.get(s);
						// now map time to position in f0AndInterpol array:
						int iSylStart = (int) (((double) (tsSylStart - tsSentenceStart) / tsSentenceDuration) * f0AndInterpol.length);
						assert iSylStart >= 0;
						int iSylEnd = iSylStart + (int) ((double) tsSylDuration / tsSentenceDuration * f0AndInterpol.length) + 1;
						if (iSylEnd > f0AndInterpol.length)
							iSylEnd = f0AndInterpol.length;
						// System.out.println("Syl "+s+" from "+iSylStart+" to "+iSylEnd+" out of "+f0AndInterpol.length);
						double[] sylF0 = new double[iSylEnd - iSylStart];
						System.arraycopy(f0AndInterpol, iSylStart, sylF0, 0, sylF0.length);
						double[] coeffs = Polynomial.fitPolynomial(sylF0, polynomOrder);
						if (coeffs != null) {
							if (showGraph) {
								double[] sylPred = Polynomial.generatePolynomialValues(coeffs, sylF0.length, 0, 1);
								System.arraycopy(sylPred, 0, approx, iSylStart, sylPred.length);
							}
							// Write coefficients to file
							while (unitIndex < iSylVowel) {
								FeatureVector outFV = outFeatureDefinition.toFeatureVector(unitIndex, null, null, zeros);
								outFV.writeTo(out);
								unitIndex++;
							}
							float[] fcoeffs = ArrayUtils.copyDouble2Float(coeffs);
							// System.out.print("Polynomial values (unit "+unitIndex+") ");
							// for (int p=0; p




© 2015 - 2025 Weber Informatics LLC | Privacy Policy