All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.F0PolynomialInspector Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.voiceimport;

import java.awt.Color;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;

import javax.sound.sampled.AudioFormat;
import javax.swing.JFrame;

import marytts.exceptions.MaryConfigurationException;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureVector;
import marytts.signalproc.analysis.F0TrackerAutocorrelationHeuristic;
import marytts.signalproc.analysis.PitchFileHeader;
import marytts.signalproc.display.FunctionGraph;
import marytts.unitselection.data.FeatureFileReader;
import marytts.unitselection.data.HnmTimelineReader;
import marytts.unitselection.data.TimelineReader;
import marytts.unitselection.data.UnitFileReader;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.Datagram;
import marytts.util.data.DatagramDoubleDataSource;
import marytts.util.data.audio.AudioPlayer;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.ArrayUtils;
import marytts.util.math.Polynomial;
import marytts.util.signal.SignalProcUtils;

public class F0PolynomialInspector extends VoiceImportComponent {
	protected FeatureFileReader features;
	protected FeatureDefinition inFeatureDefinition;
	protected UnitFileReader units;
	protected FeatureFileReader contours;
	protected TimelineReader audio;
	protected DatabaseLayout db = null;
	protected int percent = 0;

	private final String name = "F0PolynomialInspector";
	public final String UNITFILE = name + ".unitFile";
	public final String WAVETIMELINE = name + ".waveTimeLine";
	public final String ISHNMTIMELINE = name + ".isHnmTimeline";
	public final String FEATUREFILE = name + ".featureFile";
	public final String F0FEATUREFILE = name + ".f0FeatureFile";

	public String getName() {
		return name;
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			String fileDir = db.getProp(db.FILEDIR);
			String maryExt = db.getProp(db.MARYEXT);
			props.put(UNITFILE, fileDir + "halfphoneUnits" + maryExt);
			props.put(WAVETIMELINE, fileDir + "timeline_waveforms" + maryExt);
			props.put(ISHNMTIMELINE, "false");
			props.put(FEATUREFILE, fileDir + "halfphoneFeatures_ac" + maryExt);
			props.put(F0FEATUREFILE, fileDir + "syllableF0Polynomials" + maryExt);
		}
		return props;
	}

	protected void setupHelp() {
		if (props2Help == null) {
			props2Help = new TreeMap();
			props2Help.put(UNITFILE, "file containing all halfphone units");
			props2Help.put(WAVETIMELINE, "file containing all waveforms or models that can genarate them");
			props2Help.put(ISHNMTIMELINE, "file containing all wave files");
			props2Help.put(FEATUREFILE, "file containing all halfphone units and their target cost features");
			props2Help.put(F0FEATUREFILE, "file containing syllable-based polynom coefficients on vowels");
		}
	}

	@Override
	public boolean compute() throws IOException, MaryConfigurationException {
		logger.info("F0 polynomial feature file writer started.");

		units = new UnitFileReader(getProp(UNITFILE));
		audio = null;
		if (getProp(ISHNMTIMELINE).compareToIgnoreCase("true") == 0)
			audio = new HnmTimelineReader(getProp(WAVETIMELINE));
		else
			audio = new TimelineReader(getProp(WAVETIMELINE));

		features = new FeatureFileReader(getProp(FEATUREFILE));
		inFeatureDefinition = features.getFeatureDefinition();
		StringWriter sw = new StringWriter();
		PrintWriter pw = new PrintWriter(sw);
		pw.println(FeatureDefinition.BYTEFEATURES); // no byte features
		pw.println(FeatureDefinition.SHORTFEATURES); // no short features
		pw.println(FeatureDefinition.CONTINUOUSFEATURES);

		contours = new FeatureFileReader(getProp(F0FEATUREFILE));

		displaySentences();
		return true;
	}

	/**
	 * 
	 * @throws IOException
	 *             IOException
	 */
	protected void displaySentences() throws IOException {
		int numUnits = units.getNumberOfUnits();
		int unitSampleRate = units.getSampleRate();
		int audioSampleRate = audio.getSampleRate();
		int unitIndex = 0;

		logger.debug("Number of units : " + numUnits);

		FeatureDefinition featureDefinition = features.getFeatureDefinition();
		int fiPhoneme = featureDefinition.getFeatureIndex("phone");
		byte fvPhoneme_0 = featureDefinition.getFeatureValueAsByte(fiPhoneme, "0");
		byte fvPhoneme_Silence = featureDefinition.getFeatureValueAsByte(fiPhoneme, "_");
		int fiLR = featureDefinition.getFeatureIndex("halfphone_lr");
		byte fvLR_L = featureDefinition.getFeatureValueAsByte(fiLR, "L");
		byte fvLR_R = featureDefinition.getFeatureValueAsByte(fiLR, "R");
		int fiSylStart = featureDefinition.getFeatureIndex("segs_from_syl_start");
		int fiSylEnd = featureDefinition.getFeatureIndex("segs_from_syl_end");
		int fiSentenceStart = featureDefinition.getFeatureIndex("words_from_sentence_start");
		int fiSentenceEnd = featureDefinition.getFeatureIndex("words_from_sentence_end");
		int fiWordStart = featureDefinition.getFeatureIndex("segs_from_word_start");
		int fiWordEnd = featureDefinition.getFeatureIndex("segs_from_word_end");
		int fiVowel = featureDefinition.getFeatureIndex("ph_vc");
		byte fvVowel_Plus = featureDefinition.getFeatureValueAsByte(fiVowel, "+");

		boolean haveUnitLogF0 = false;
		int fiUnitLogF0 = -1;
		int fiUnitLogF0delta = -1;
		if (featureDefinition.hasFeature("unit_logf0") && featureDefinition.hasFeature("unit_logf0delta")) {
			haveUnitLogF0 = true;
			fiUnitLogF0 = featureDefinition.getFeatureIndex("unit_logf0");
			fiUnitLogF0delta = featureDefinition.getFeatureIndex("unit_logf0delta");
		}

		FunctionGraph f0Graph = null;
		JFrame jf = null;
		int iSentenceStart = -1;
		int iSentenceEnd = -1;
		List iSylStarts = new ArrayList();
		List iSylEnds = new ArrayList();
		List iSylVowels = new ArrayList();
		f0Graph = new FunctionGraph(0, 1, new double[1]);
		f0Graph.setYMinMax(50, 300);
		f0Graph.setPrimaryDataSeriesStyle(Color.BLUE, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_FULLCIRCLE);
		jf = f0Graph.showInJFrame("Sentence", false, true);

		for (int i = 0; i < numUnits; i++) {
			percent = 100 * i / numUnits;
			FeatureVector fv = features.getFeatureVector(i);
			// System.out.print(featureDefinition.getFeatureValueAsString("phone", fv));
			// if (fv.getByteFeature(fiPhoneme) == fvPhoneme_0
			// || fv.getByteFeature(fiPhoneme) == fvPhoneme_Silence) continue;
			if (iSentenceStart == -1 && fv.getByteFeature(fiSentenceStart) == 0 && fv.getByteFeature(fiWordStart) == 0
					&& fv.getByteFeature(fiLR) == fvLR_L) { // first unit in sentence
				iSentenceStart = i;
				iSylStarts.clear();
				iSylEnds.clear();
				iSylVowels.clear();
				// System.out.print(", is sentence start");
			}
			// Silence and edge units cannot be part of syllables, but they can
			// mark start/end of sentence:
			if (fv.getByteFeature(fiPhoneme) != fvPhoneme_0 && fv.getByteFeature(fiPhoneme) != fvPhoneme_Silence) {
				if (fv.getByteFeature(fiSylStart) == 0 && fv.getByteFeature(fiLR) == fvLR_L) { // first segment in syllable
					if (iSylStarts.size() > iSylEnds.size()) {
						System.err.println("Syllable ends before other syllable starts!");
					}
					iSylStarts.add(i);
					// System.out.print(", is syl start");
				}
				if (fv.getByteFeature(fiVowel) == fvVowel_Plus && iSylVowels.size() < iSylStarts.size()) { // first vowel unit in
																											// syllable
					iSylVowels.add(i);
					// System.out.print(", is vowel");
				}
				if (fv.getByteFeature(fiSylEnd) == 0 && fv.getByteFeature(fiLR) == fvLR_R) { // last segment in syllable
					iSylEnds.add(i);
					// System.out.print(", is syl end");
					assert iSylStarts.size() == iSylEnds.size();
					if (iSylVowels.size() < iSylEnds.size()) {
						// System.err.println("Syllable contains no vowel -- skipping");
						iSylStarts.remove(iSylStarts.size() - 1);
						iSylEnds.remove(iSylEnds.size() - 1);
					}
				}
			}
			if (iSentenceStart != -1 && fv.getByteFeature(fiSentenceEnd) == 0 && fv.getByteFeature(fiWordEnd) == 0
					&& fv.getByteFeature(fiLR) == fvLR_R) { // last unit in sentence
				iSentenceEnd = i;
				// System.out.print(", is sentence end");
				if (iSylEnds.size() < iSylStarts.size()) {
					System.err.println("Last syllable in sentence is not properly closed");
					iSylEnds.add(i);
				}
			}
			// System.out.println();

			if (iSentenceStart >= 0 && iSentenceEnd >= iSentenceStart && iSylVowels.size() > 0) {
				assert iSylStarts.size() == iSylEnds.size() : "Have " + iSylStarts.size() + " syllable starts, but "
						+ iSylEnds.size() + " syllable ends!";
				assert iSylStarts.size() == iSylVowels.size();
				long tsSentenceStart = units.getUnit(iSentenceStart).startTime;
				long tsSentenceEnd = units.getUnit(iSentenceEnd).startTime + units.getUnit(iSentenceEnd).duration;
				long tsSentenceDuration = tsSentenceEnd - tsSentenceStart;
				Datagram[] sentenceData = audio.getDatagrams(tsSentenceStart, tsSentenceDuration);
				DatagramDoubleDataSource ddds = new DatagramDoubleDataSource(sentenceData);
				double[] sentenceAudio = ddds.getAllData();
				AudioPlayer ap = null;
				ap = new AudioPlayer(new DDSAudioInputStream(new BufferedDoubleDataSource(sentenceAudio), new AudioFormat(
						AudioFormat.Encoding.PCM_SIGNED, audioSampleRate, // samples per second
						16, // bits per sample
						1, // mono
						2, // nr. of bytes per frame
						audioSampleRate, // nr. of frames per second
						true))); // big-endian;))
				ap.start();
				PitchFileHeader params = new PitchFileHeader();
				params.fs = audioSampleRate;
				F0TrackerAutocorrelationHeuristic tracker = new F0TrackerAutocorrelationHeuristic(params);
				tracker.pitchAnalyze(new BufferedDoubleDataSource(sentenceAudio));
				double frameShiftTime = tracker.getSkipSizeInSeconds();
				double[] f0Array = tracker.getF0Contour();
				if (f0Array != null) {
					for (int j = 0; j < f0Array.length; j++) {
						if (f0Array[j] == 0) {
							f0Array[j] = Double.NaN;
						}
					}
					if (f0Array.length >= 3) {
						f0Array = SignalProcUtils.medianFilter(f0Array, 5);
					}
					f0Graph.updateData(0, tsSentenceDuration / (double) audioSampleRate / f0Array.length, f0Array);
					jf.repaint();

					double[] f0AndInterpol;
					double[] interpol = new double[f0Array.length];
					Arrays.fill(interpol, Double.NaN);
					f0AndInterpol = new double[f0Array.length];
					int iLastValid = -1;
					for (int j = 0; j < f0Array.length; j++) {
						if (!Double.isNaN(f0Array[j])) { // a valid value
							if (iLastValid == j - 1) {
								// no need to interpolate
								f0AndInterpol[j] = f0Array[j];
							} else {
								// need to interpolate
								double prevF0;
								if (iLastValid < 0) { // we don't have a previous value -- use current one
									prevF0 = f0Array[j];
								} else {
									prevF0 = f0Array[iLastValid];
								}
								double delta = (f0Array[j] - prevF0) / (j - iLastValid);
								double f0 = prevF0;
								for (int k = iLastValid + 1; k < j; k++) {
									f0 += delta;
									interpol[k] = f0;
									f0AndInterpol[k] = f0;
								}
							}
							iLastValid = j;
						}
					}
					f0Graph.addDataSeries(interpol, Color.GREEN, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_EMPTYCIRCLE);
					jf.repaint();

					double[] approx = new double[f0Array.length];
					Arrays.fill(approx, Double.NaN);
					for (int s = 0; s < iSylStarts.size(); s++) {
						long tsSylStart = units.getUnit(iSylStarts.get(s)).startTime;
						long tsSylEnd = units.getUnit(iSylEnds.get(s)).startTime + units.getUnit(iSylEnds.get(s)).duration;
						long tsSylDuration = tsSylEnd - tsSylStart;
						int iSylVowel = iSylVowels.get(s);
						// now map time to position in f0AndInterpol array:
						int iSylStart = (int) (((double) (tsSylStart - tsSentenceStart) / tsSentenceDuration) * f0AndInterpol.length);
						assert iSylStart >= 0;
						int iSylEnd = iSylStart + (int) ((double) tsSylDuration / tsSentenceDuration * f0AndInterpol.length) + 1;
						if (iSylEnd > approx.length)
							iSylEnd = approx.length;
						// System.out.println("Syl "+s+" from "+iSylStart+" to "+iSylEnd+" out of "+f0AndInterpol.length);
						double[] sylF0 = new double[iSylEnd - iSylStart];
						float[] coeffs = contours.getFeatureVector(iSylVowel).getContinuousFeatures();
						double[] sylPred = Polynomial.generatePolynomialValues(ArrayUtils.copyFloat2Double(coeffs), sylF0.length,
								0, 1);
						System.arraycopy(sylPred, 0, approx, iSylStart, sylPred.length);
					}
					for (int j = 0; j < approx.length; j++) {
						approx[j] = Math.exp(approx[j]);
					}
					f0Graph.addDataSeries(approx, Color.RED, FunctionGraph.DRAW_LINE, -1);
					System.out.println();

					if (haveUnitLogF0) {
						double[] unitF0 = new double[f0Array.length];
						Arrays.fill(unitF0, Double.NaN);
						for (int u = 0; u + iSentenceStart <= iSentenceEnd; u++) {
							FeatureVector localFV = features.getFeatureVector(u + iSentenceStart);
							long tsUnitStart = units.getUnit(u + iSentenceStart).startTime;
							long tsUnitDuration = units.getUnit(u + iSentenceStart).duration;
							int iUnitStartInArray = (int) (unitF0.length * (tsUnitStart - tsSentenceStart) / tsSentenceDuration);
							int iUnitDurationInArray = (int) (unitF0.length * tsUnitDuration / tsSentenceDuration);
							// while (iUnitDurationInArray+iUnitStartInArray>unitF0.length) iUnitDurationInArray--;
							if (iUnitDurationInArray > 0) {
								float logF0 = localFV.getContinuousFeature(fiUnitLogF0);
								float logF0delta = localFV.getContinuousFeature(fiUnitLogF0delta);
								double[] coeffs = new double[2];
								// logF0 is value at 0.5, logF0delta is slope
								// coeffs[0] is slope, coeffs[1] is value at 0 => coeffs[1] + 0.5*slope = logF0
								coeffs[0] = logF0delta;
								coeffs[1] = logF0 - 0.5 * logF0delta;
								double[] pred = Polynomial.generatePolynomialValues(coeffs, iUnitDurationInArray, 0, 1);
								System.arraycopy(pred, 0, unitF0, iUnitStartInArray, iUnitDurationInArray);
								iUnitStartInArray += iUnitDurationInArray;
							}
						}
						for (int j = 0; j < unitF0.length; j++) {
							unitF0[j] = Math.exp(unitF0[j]);
						}
						f0Graph.addDataSeries(unitF0, Color.BLACK, FunctionGraph.DRAW_LINE, -1);

					}
				}
				try {
					ap.join();
					Thread.sleep(4000);
				} catch (InterruptedException ie) {
				}
				iSentenceStart = -1;
				iSentenceEnd = -1;
				iSylStarts.clear();
				iSylEnds.clear();
				iSylVowels.clear();
			}
		}

	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return percent;
	}

	/**
	 * @param args
	 *            args
	 * @throws Exception
	 *             Exception
	 */
	public static void main(String[] args) throws Exception {
		F0PolynomialInspector acfeatsWriter = new F0PolynomialInspector();
		DatabaseLayout db = new DatabaseLayout(acfeatsWriter);
		acfeatsWriter.compute();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy