All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.SnackVoiceQualityProcessor Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.voiceimport;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import java.util.StringTokenizer;
import java.util.TreeMap;

import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;

import marytts.signalproc.analysis.PitchMarks;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.analysis.VoiceQuality;
import marytts.signalproc.window.HammingWindow;
import marytts.signalproc.window.Window;
import marytts.util.io.StreamGobbler;
import marytts.util.MaryUtils;
import marytts.util.data.ESTTrackReader;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.display.DisplayUtils;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;

public class SnackVoiceQualityProcessor extends VoiceImportComponent {

	protected DatabaseLayout db;
	private String name = "SnackVoiceQualityProcessor";

	protected String snackExtension = ".snack";
	protected String voiceQualityExtension = ".vq";
	protected String scriptFileName;

	int numVqParams = 5; // number of voice quality parameters extracted from the sound files:
							// OQG, GOG, SKG, RCG, IC

	private int percent = 0;
	// private final String FRAMELENGTH = "0.01"; // Default for snack
	// private final String WINDOWLENGTH = "0.025"; // Default for f0 snack ( formants uses a bigger window)

	public final String SAMPLINGRATE = "SnackVoiceQualityProcessor.samplingRate";
	public final String MINPITCH = "SnackVoiceQualityProcessor.minPitch";
	public final String MAXPITCH = "SnackVoiceQualityProcessor.maxPitch";
	public final String FRAMELENGTH = "SnackVoiceQualityProcessor.frameLength";
	public final String WINDOWLENGTH = "SnackVoiceQualityProcessor.windowLength";
	public final String NUMFORMANTS = "SnackVoiceQualityProcessor.numFormants";
	public final String LPCORDER = "SnackVoiceQualityProcessor.lpcOrder";
	public final String FFTSIZE = "SnackVoiceQualityProcessor.fftSize";
	public final String VQDIR = "SnackVoiceQualityProcessor.vqDir";

	protected void setupHelp() {
		if (props2Help == null) {
			props2Help = new TreeMap();
			props2Help.put(SAMPLINGRATE, "Sampling frequency in Hertz. Default: 16000");
			props2Help.put(MINPITCH, "minimum value for the pitch (in Hz). Default: female 60, male 40");
			props2Help.put(MAXPITCH, "maximum value for the pitch (in Hz). Default: female 500, male 400");
			props2Help.put(FRAMELENGTH, "frame length (in seconds) for VQ calculation Default: 0.005 sec.");
			props2Help.put(WINDOWLENGTH, "window length (in seconds) for VQ calculation Default: 0.025 sec.");
			props2Help.put(NUMFORMANTS, "Default 4, maximum 7");
			props2Help.put(LPCORDER, "Default 12, if NUMFORMANTS=4 min LPCORDER=12\n" + "if NUMFORMANTS=5 min LPCORDER=14\n"
					+ "if NUMFORMANTS=6 min LPCORDER=16\n" + "if NUMFORMANTS=7 min LPCORDER=18\n");
			props2Help.put(FFTSIZE, "Default 512");
			props2Help.put(VQDIR, "directory containing the voice quality files. Will be created if it does not exist");

		}
	}

	public final String getName() {
		return name;
	}

	@Override
	protected void initialiseComp() {
		scriptFileName = db.getProp(db.TEMPDIR) + "f0_formants.tcl";
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			props.put(SAMPLINGRATE, "16000");
			if (db.getProp(db.GENDER).equals("female")) {
				props.put(MINPITCH, "60");
				props.put(MAXPITCH, "400");
			} else {
				props.put(MINPITCH, "60");
				props.put(MAXPITCH, "400");
			}
			props.put(FRAMELENGTH, "0.005");
			props.put(WINDOWLENGTH, "0.025");
			props.put(NUMFORMANTS, "4");
			props.put(LPCORDER, "12");
			props.put(FFTSIZE, "512");
			props.put(VQDIR, db.getProp(db.ROOTDIR) + "vq" + System.getProperty("file.separator"));
		}
		return props;
	}

	/**
	 * The standard compute() method of the VoiceImportComponent interface.
	 * 
	 * @throws Exception
	 *             Exception
	 */
	public boolean compute() throws Exception {

		File script = new File(scriptFileName);
		/*
		 * In order to get the same number of frames when calculating f0 and formants with snack, we should keep constant the
		 * following variables: -maxpitch 400 for F0 calculation -minpitch 60 for F0 calculation -windowlength 0.03 for formants
		 * calculation -framelength should be the same for f0, formants and this SnackVoiceQualityProcessor, this value can be
		 * change, ex: 0.005, 0.01 etc.
		 */
		if (script.exists())
			script.delete();
		PrintWriter toScript = new PrintWriter(new FileWriter(script));
		toScript.println("# extracting pitch anf formants using snack");
		toScript.println("package require snack");
		toScript.println("snack::sound s");
		toScript.println("s read [lindex $argv 0]");
		toScript.println("set fd [open [lindex $argv 1] w]");
		toScript.println("set f0 [s pitch -method esps -maxpitch [lindex $argv 2] -minpitch [lindex $argv 3] -framelength [lindex $argv 4] ]");
		toScript.println("set f0_length [llength $f0]");
		// toScript.println("puts \"f0 length = $f0_length\"");
		toScript.println("set formants [s formant -numformants [lindex $argv 5] -lpcorder [lindex $argv 6] -framelength [lindex $argv 4] -windowlength 0.03]");
		toScript.println("set formants_length [llength $formants]");
		// toScript.println("puts \"formants length = $formants_length\"");
		toScript.println("set n 0");
		toScript.println("foreach line $f0 {");
		toScript.println("puts -nonewline $fd \"[lindex $line 0] \"");
		toScript.println("puts $fd [lindex $formants $n]");
		toScript.println("incr n");
		toScript.println("}");
		toScript.println("close $fd");
		toScript.println("exit");
		toScript.close();

		String[] baseNameArray = bnl.getListAsArray();
		// to test String[] baseNameArray = {"curious", "u"};
		System.out.println("Computing voice quality for " + baseNameArray.length + " utterances.");

		/* Ensure the existence of the target pitchmark directory */
		File dir = new File(getProp(VQDIR));
		if (!dir.exists()) {
			System.out.println("Creating the directory [" + getProp(VQDIR) + "].");
			dir.mkdir();
		}

		// Some general parameters that apply to all the sound files
		int samplingRate = Integer.parseInt(getProp(SAMPLINGRATE));
		// frameLength and windowLength in samples
		int frameLength = Math.round(Float.parseFloat(getProp(FRAMELENGTH)) * samplingRate);
		int windowLength = Math.round(Float.parseFloat(getProp(WINDOWLENGTH)) * samplingRate);

		// get a Hamming window
		Window hammWin = new HammingWindow(windowLength);

		// Matrix for calculating Bark spectrum
		int fftSize = Integer.parseInt(getProp(FFTSIZE));
		int nfilts = (int) Math.ceil(SignalProcUtils.hz2bark(samplingRate / 2)) + 1;
		int minfreq = 0;
		int maxfreq = samplingRate / 2;
		int bwidth = 1;
		double barkMatrix[][] = SignalProcUtils.fft2barkmx(fftSize, samplingRate, nfilts, bwidth, minfreq, maxfreq);

		/* execute snack and voice quality parameters extraction */
		for (int i = 0; i < baseNameArray.length; i++) {
			percent = 100 * i / baseNameArray.length;
			String wavFile = db.getProp(db.WAVDIR) + baseNameArray[i] + db.getProp(db.WAVEXT);
			String snackFile = getProp(VQDIR) + baseNameArray[i] + snackExtension;
			String vqFile = getProp(VQDIR) + baseNameArray[i] + voiceQualityExtension;

			System.out.println("Writing f0+formants+bandWidths to " + snackFile);

			boolean isWindows = true;
			String strTmp = scriptFileName + " " + wavFile + " " + snackFile + " " + getProp(MAXPITCH) + " " + getProp(MINPITCH)
					+ " " + getProp(FRAMELENGTH) + " " + getProp(NUMFORMANTS) + " " + getProp(LPCORDER);

			if (MaryUtils.isWindows())
				strTmp = "cmd.exe /c " + db.getExternal(db.TCLPATH) + "/tclsh " + strTmp;
			else
				strTmp = db.getExternal(db.TCLPATH) + "/tclsh " + strTmp;

			// System.out.println("Executing: " + strTmp);
			Process snack = Runtime.getRuntime().exec(strTmp);
			StreamGobbler errorGobbler = new StreamGobbler(snack.getErrorStream(), "err");
			// read from output stream
			StreamGobbler outputGobbler = new StreamGobbler(snack.getInputStream(), "out");
			// start reading from the streams
			errorGobbler.start();
			outputGobbler.start();
			// close everything down
			snack.waitFor();
			snack.exitValue();

			// Read F0, formants and bandwidths
			double[][] snackData = readSnackData(Integer.parseInt(getProp(NUMFORMANTS)), snackFile);
			// System.out.println("f0_formants size=" + snackData.length);

			// Read the sound file
			WavReader soundFile = new WavReader(wavFile);

			// Check sampling rate of sound file
			assert samplingRate == soundFile.getSampleRate();

			// calculate voice quality parameters for this file
			VoiceQuality vq = new VoiceQuality(numVqParams, samplingRate, frameLength / (float) samplingRate, windowLength
					/ (float) samplingRate);

			calculateVoiceQuality(snackData, samplingRate, frameLength, windowLength, soundFile, hammWin, barkMatrix, fftSize,
					vq, false);

			System.out.println("Writing vq parameters to " + vqFile);
			vq.writeVqFile(vqFile);

		}
		return true;
	}

	/**
	 * Loads in snackData the f0 + formants[numFormants] + band widths[numFormants] from the snackFile
	 * 
	 * @param numFormants
	 *            numFormants
	 * @param snackFile
	 *            snackFile
	 * @return snackData
	 * @throws IOException
	 *             IOException
	 */
	static double[][] readSnackData(int numFormants, String snackFile) throws IOException {
		double[][] snackData = null;
		BufferedReader reader = new BufferedReader(new FileReader(snackFile));
		int i, j;
		try {
			String line;
			String strVal;
			StringTokenizer s;
			double value;

			// find out the number of lines in the file
			List lines = new ArrayList();
			while ((line = reader.readLine()) != null) {
				lines.add(line);
			}
			int numLines = lines.size();
			// numFormants*2 + 1 : because the array will contain f0 + 4 formants + 4 bandwidths
			int numData = numFormants * 2 + 1;
			snackData = new double[numLines][numData];
			for (i = 0; i < numLines; i++) {

				strVal = (String) lines.get(i);
				s = new StringTokenizer(strVal);

				for (j = 0; j < numData; j++) {
					if (s.hasMoreTokens())
						snackData[i][j] = Double.parseDouble(s.nextToken());
				}
			}
		} catch (IOException ioe) {
			ioe.printStackTrace();
		} catch (NumberFormatException nfe) {
			nfe.printStackTrace();
		}
		return snackData;
	}

	/**
	 * 
	 * @param snack
	 *            : array containing f0+formants+band widths
	 * @param samplingRate
	 *            samplingRate
	 * @param frameLength
	 *            : in samples
	 * @param windowLength
	 *            : in samples
	 * @param sound
	 *            sound
	 * @param hammWin
	 *            hammWin
	 * @param barkMatrix
	 *            barkMatrix
	 * @param fftSize
	 *            fftSize
	 * @param vq
	 *            vq
	 * @param debug
	 *            debug
	 * @throws Exception
	 *             Exception
	 */
	public void calculateVoiceQuality(double snack[][], int samplingRate, int frameLength, int windowLength, WavReader sound,
			Window hammWin, double[][] barkMatrix, int fftSize, VoiceQuality vq, boolean debug) throws Exception {

		int i, j, k, n, T, T2, index, index1, index2, index3;
		short x_signal[] = sound.getSamples();
		double x[] = new double[windowLength];
		double magf[] = null; // spectrum of a window
		double magfdB[] = null; // spectrum of a window in dB
		double barkmagfdB[] = null; // Bark spectrum of a window in dB
		double Xpeak[] = null; // the harmonic peaks
		int windowType = 1; // 1: Hamming window
		int maxFreqIndex = fftSize / 2;
		double Fp, Fp2, Fp3, F1, F2, F3, F4, B1, B2, B3, B4, H1, H2, H3, F1p, F2p, F3p, F4p, A1p, A2p, A3p, A4p;
		double hatH1, hatH2, hatA1p, hatA2p, hatA3p;
		double OQ, OQG, GO, GOG, SK, SKG, RC, RCG, IC;
		double f0;
		int f0Length = snack.length;
		double parVq[][] = new double[vq.params.dimension][f0Length];

		// Normalise the signal before processing between 1 and -1
		// This was for getting similar values as in octave
		/*
		 * double x_signal_double[] = new double[sound.getNumSamples()]; for (i=0; i 60 HZ
		// 2. 1.5 H1 < H2 < 3H1
		// 3. H1 < F1 - B1

		// process per window
		int numFrame = 0;
		int numFrameVq = 0;
		for (n = 0; n < (sound.getNumSamples() - windowLength) && numFrame < f0Length; n = n + frameLength) {

			f0 = snack[numFrame][0];
			if (debug)
				System.out.format("\npitch=%.2f numFrame=%d n=%d \n", f0, (numFrame + 1), n);

			// First Lugger conditions
			// 1. H1 > 60 HZ
			if (f0 > 60.0) {

				// get the window frame
				for (i = 0; i < windowLength; i++)
					x[i] = x_signal[n + i];
				// x[i] = x_signal_double[n+i]; // HERE USING NORMALISED SIGNAL to get similiar values as in octave

				// apply Hamming window
				x = hammWin.apply(x);
				// MaryUtils.plot(x, "x");

				// get the spectrum in dB (20*log10(F))
				// SignalProcUtils.displayDFTSpectrumInDB(x, fftSize, windowType);
				// should be this in dB???
				// SPECTRUM
				magf = SignalProcUtils.getFrameMagnitudeSpectrum(x, fftSize, windowType);
				// magf = SignalProcUtils.getFrameHalfMagnitudeSpectrum(x, fftSize, windowType);
				// MaryUtils.plot(magf, "magf");
				// System.out.print("A=[");
				// for(i=0; i<(fftSize/2); i++)
				// System.out.format("%.3f ", magf[i]);
				// System.out.println("];\n");

				// SPECTRUM dB
				magfdB = MathUtils.amp2db(magf);
				// MaryUtils.plot(magfdB, "magfdB");

				// BARK SPECTRUM
				// double barkmagf[] = MathUtils.matrixProduct(wts, magf);
				// MaryUtils.plot(barkmagf, "barkX");
				barkmagfdB = MathUtils.amp2db(MathUtils.matrixProduct(barkMatrix, magf));
				// MaryUtils.plot(barkmagfdB, "barkmagfdB");

				// TODO: These steps of finding peaks and magnitudes, need to be improved
				// the f0 from snack not always get close to the first peak found in the spectrum calculated here, also for 2f0.
				// get the harmonic peak frequencies
				Xpeak = SignalProcUtils.getPeakAmplitudeFrequencies(magf, f0, 30, fftSize, (double) samplingRate, false);
				// MaryUtils.plot(Xpeak, "Xpeak");
				// for(j=1; j 60.0
				// set VQ measures to NAN for voiceless frames or frames whose F0 < 60.0
				// CHECK: not sure if this is a good solution to do not loose time info...
				parVq[0][numFrame] = Double.NaN;
				parVq[1][numFrame] = Double.NaN;
				parVq[2][numFrame] = Double.NaN;
				parVq[3][numFrame] = Double.NaN;
				parVq[4][numFrame] = Double.NaN;

				if (debug)
					System.out.println("1 Lugger cond.");
			}
			numFrame++;
		}

		vq.allocate(numFrame, parVq);

	}

	/**
	 * returns the index where the closset harmonic peak to f is found
	 * 
	 * @param peaks
	 *            peaks
	 * @param f
	 *            f
	 * @param maxFreqIndex
	 *            maxFreqIndex
	 * @return index
	 */
	public int findClosestHarmonicPeak(double peaks[], double f, int maxFreqIndex) {
		int index = 0;
		double iclosest = 0;
		double distance = maxFreqIndex;

		for (int i = 0; i < peaks.length; i++) {
			if (Math.abs(f - peaks[i]) < distance) {
				iclosest = peaks[i];
				distance = Math.abs(f - peaks[i]);
				index = i;
			}
		}
		return index;
	}

	/**
	 * Compensation of the vocal tract influence
	 * 
	 * @param freq
	 *            freq
	 * @param formant
	 *            formant
	 * @param bandWidth
	 *            bandWidth
	 * @return 0.0
	 */
	public double vocalTractCompensation(double freq, double formant, double bandWidth) {
		double num, denom, aux, val;
		aux = Math.pow((bandWidth / 2), 2.0);
		num = Math.pow(formant, 2.0) + aux;
		denom = Math.sqrt((Math.pow((freq - formant), 2.0) + aux) * (Math.pow((freq + formant), 2.0) + aux));
		val = (num / denom);
		if (val > 0.0)
			return (20 * Math.log10(val));
		else {
			System.out.println("vocalTractCompensation: warning value < 0.0");
			return 0.0;
		}
	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return percent;
	}

	// to test/compare vq values of several files
	public static void main3(String[] args) throws Exception {

		int numFormants = 4;
		// String wavFile = "/project/mary/marcela/HMM-voices/arctic_test/wav/curious.wav";
		// String wavFile = "/project/mary/marcela/HMM-voices/arctic_test/wav/a.wav";
		String whisperFile = "/project/mary/marcela/HMM-voices/arctic_test/vq/whisper.vq";
		String modalFile = "/project/mary/marcela/HMM-voices/arctic_test/vq/modal.vq";
		String creakFile = "/project/mary/marcela/HMM-voices/arctic_test/vq/creak.vq";
		String harshFile = "/project/mary/marcela/HMM-voices/arctic_test/vq/harsh.vq";

		VoiceQuality vq1 = new VoiceQuality();
		System.out.println("Reading: " + whisperFile);
		vq1.readVqFile(whisperFile);
		vq1.printPar();
		vq1.printMeanStd();

		VoiceQuality vq2 = new VoiceQuality();
		System.out.println("Reading: " + modalFile);
		vq2.readVqFile(modalFile);
		vq2.printPar();
		vq2.printMeanStd();

		VoiceQuality vq3 = new VoiceQuality();
		System.out.println("Reading: " + creakFile);
		vq3.readVqFile(creakFile);
		vq3.printPar();
		vq3.printMeanStd();

		VoiceQuality vq4 = new VoiceQuality();
		System.out.println("Reading: " + harshFile);
		vq4.readVqFile(harshFile);
		vq3.printPar();
		vq4.printMeanStd();

	}

	// to test write and read vq files
	public static void main2(String[] args) throws Exception {

		int numFormants = 4;
		// String wavFile = "/project/mary/marcela/HMM-voices/arctic_test/wav/curious.wav";
		// String snackFile = "/project/mary/marcela/HMM-voices/arctic_test/vq/curious.snack";
		String wavFile = "/project/mary/marcela/HMM-voices/arctic_test/wav/a.wav";
		String snackFile = "/project/mary/marcela/HMM-voices/arctic_test/vq/a.snack";

		SnackVoiceQualityProcessor vqCalc = new SnackVoiceQualityProcessor();
		// double vqPar[][];

		// This example assumes that the F0, formants and bandwidths have been already calculated and stored
		// in a file name.snack.
		// Read F0, formants and bandwidths
		double[][] snackData = vqCalc.readSnackData(numFormants, snackFile);
		// System.out.println("f0_formants size=" + snackData.length);

		// Read the sound file
		WavReader sounfFile = new WavReader(wavFile);
		int sampleRate = sounfFile.getSampleRate();

		// calculate voice quality parameters for this file
		int frameLength = 80;
		int windowLength = 400;
		int numVqParams = 5;
		int samplingRate = 16000;
		// get a Hamming window
		Window hammWin = new HammingWindow(windowLength);

		// Matrix for calculating Bark spectrum
		int fftSize = 512;
		int nfilts = (int) Math.ceil(SignalProcUtils.hz2bark(samplingRate / 2)) + 1;
		int minfreq = 0;
		int maxfreq = samplingRate / 2;
		int bwidth = 1;
		double barkMatrix[][] = SignalProcUtils.fft2barkmx(fftSize, samplingRate, nfilts, bwidth, minfreq, maxfreq);

		VoiceQuality vq = new VoiceQuality(numVqParams, sampleRate, frameLength / sampleRate, windowLength / sampleRate);
		vqCalc.calculateVoiceQuality(snackData, sampleRate, frameLength, windowLength, sounfFile, hammWin, barkMatrix, fftSize,
				vq, false);
		vq.writeVqFile("/project/mary/marcela/HMM-voices/arctic_test/vq/a.vq");
		vq.printPar();

		VoiceQuality vq1 = new VoiceQuality();
		vq1.readVqFile("/project/mary/marcela/HMM-voices/arctic_test/vq/a.vq");
		vq1.printPar();

	}

	// to test the spectrum in bark scale
	public static void main1(String[] args) throws Exception {
		String wavFile = "/project/mary/marcela/HMM-voices/arctic_test/wav/a.wav";

		int i;
		int Fs = 16000;
		int Nfft = 512;
		int nfilts = (int) Math.ceil(SignalProcUtils.hz2bark(Fs / 2)) + 1;
		int minfreq = 0;
		int maxfreq = Fs / 2;
		int bwidth = 1;
		double wts[][] = SignalProcUtils.fft2barkmx(Nfft, Fs, nfilts, bwidth, minfreq, maxfreq);
		DisplayUtils.plot(wts[10]);
		// for(int i=0; i " + 1 / (pmIn[i] - pmIn[i - 1]));

		int[] pmInSamples = SignalProcUtils.time2sample(pmIn, samplingRate);
		double[] pmConvF0 = SignalProcUtils.pitchMarks2PitchContour(pmInSamples, (float) f0.header.windowSizeInSeconds,
				(float) f0.header.skipSizeInSeconds, samplingRate);
		System.out.println("F0 after converting pitch marks in pm files:");
		for (int i = 1; i < pmConvF0.length; i++)
			System.out.println(i + ": F0=" + pmConvF0[i]);

		ESTTrackReader mcepFile; // Structure that holds the mcep track data
		float[] current; // local [min,max] vector for the current mcep track file
		float mcepMin, mcepMax, mcepRange; // Global min/max/range values for the mcep coefficients
		float totalDuration = 0.0f; // Accumulator for the total timeline duration
		long numDatagrams = 0l; // Total number of mcep datagrams in the timeline file
		int numMCep = 0; // Number of mcep channels, assumed from the first mcep file

		mcepFile = new ESTTrackReader(mcepFileName);
		System.out.println("pitch marks in MFCCs files:");
		for (int i = 1; i < mcepFile.getTimes().length; i++)
			System.out.println(i + " pm: " + mcepFile.getTime(i) + " --> " + 1 / (mcepFile.getTime(i) - mcepFile.getTime(i - 1)));

	}

	public static void main5(String[] args) throws Exception {

		String wavFile = "/project/mary/marcela/HMM-voices/arctic_test/wav/atapa.wav";
		String strPitchFile = "/project/mary/marcela/HMM-voices/arctic_test/ptc/atapa.ptc";
		String mcepFileName = "/project/mary/marcela/HMM-voices/arctic_test/mcep/atapa.mcep";
		String pmFileName = "/project/mary/marcela/HMM-voices/arctic_test/pm/atapa.pm";
		String snackFile = "/project/mary/marcela/HMM-voices/arctic_test/vq/atapa.snack";

		AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(wavFile));
		int samplingRate = (int) inputAudio.getFormat().getSampleRate();
		AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
		double[] x = signal.getAllData();

		double windowSizeInSeconds = 0.025;
		double skipSizeInSeconds = 0.005;

		double[][] snackData = readSnackData(4, snackFile);
		double[] f0s = new double[snackData.length];

		System.out.println("F0 contour in snack file");
		for (int i = 0; i < snackData.length; i++) {
			f0s[i] = snackData[i][0];
			System.out.println(i + " f0: " + f0s[i]);
		}

		int pitchMarkOffset = 0;
		PitchMarks pm = SignalProcUtils.pitchContour2pitchMarks(f0s, samplingRate, x.length, windowSizeInSeconds,
				skipSizeInSeconds, true, pitchMarkOffset);
		System.out.println("pitch marks after contour2pm");
		for (int i = 0; i < pm.f0s.length; i++)
			System.out.println(i + ": pm=" + pm.pitchMarks[i] + " = " + (pm.pitchMarks[i] * 1.0) / samplingRate + "  f0="
					+ pm.f0s[i]);

	}

	public static void main(String[] args) throws Exception {

		// to test the spectrum in bark scale
		// main1(args);

		// to test write and read vq files
		// main2(args);

		// to test/compare vq values of several files
		main3(args);

		// main4(args);
		// main5(args);

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy