All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.vocalizations.KMeansClusterer Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.vocalizations;

import java.awt.Color;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import javax.swing.JFrame;

import marytts.machinelearning.KMeansClusteringTrainerParams;
import marytts.machinelearning.PolynomialCluster;
import marytts.machinelearning.PolynomialKMeansClusteringTrainer;
import marytts.signalproc.display.FunctionGraph;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.AudioPlayer;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.Polynomial;

public class KMeansClusterer {

	private ArrayList baseNames;
	Polynomial[] f0Polynomials;
	private int polynomialOrder = 3;
	private int numberOfSamples = 0;
	private int numberOfClusters;
	private HashMap mapPolynomialBaseNames;

	public KMeansClusterer() {
		baseNames = new ArrayList();
		mapPolynomialBaseNames = new HashMap();
	}

	public void loadF0Polynomials(String fileName) throws IOException {

		BufferedReader bfr = new BufferedReader(new FileReader(new File(fileName)));
		String line;
		ArrayList lines = new ArrayList();
		while ((line = bfr.readLine()) != null) {
			lines.add(line.trim());
		}
		double[][] f0PolynomialCoeffs;
		String[] words = lines.get(0).trim().split("\\s+");
		polynomialOrder = words.length - 1;
		numberOfSamples = lines.size();
		f0PolynomialCoeffs = new double[numberOfSamples][polynomialOrder];
		f0Polynomials = new Polynomial[numberOfSamples];

		for (int i = 0; i < lines.size(); i++) {
			line = lines.get(i);
			words = line.trim().split("\\s+");
			baseNames.add(words[0].trim());

			for (int j = 0; j < polynomialOrder; j++) {
				f0PolynomialCoeffs[i][j] = (new Double(words[j + 1].trim())).doubleValue();
			}

			// System.out.println(f0PolynomialCoeffs[i][0]+" "+f0PolynomialCoeffs[i][1]+" "+f0PolynomialCoeffs[i][2]+" "+f0PolynomialCoeffs[i][3]);
			// System.out.println(words[0]+" "+words[1]+" "+words[2]+" "+words[3]+" "+words[4]);
		}

		// testing
		for (int i = 0; i < numberOfSamples; i++) {
			f0Polynomials[i] = new Polynomial(f0PolynomialCoeffs[i]);
			mapPolynomialBaseNames.put(f0Polynomials[i], baseNames.get(i));
			System.out.print(baseNames.get(i) + " ");
			for (int j = 0; j < polynomialOrder; j++) {
				System.out.print(f0PolynomialCoeffs[i][j] + " ");
			}
			System.out.println();

			double coeff[] = f0Polynomials[i].coeffs;
			System.out.print(baseNames.get(i) + " ");
			for (int j = 0; j < coeff.length; j++) {
				System.out.print(coeff[j] + " ");
			}
			System.out.println();
		}

	}

	public void trainer(int numClusters) throws UnsupportedAudioFileException, IOException {
		this.numberOfClusters = numClusters;
		KMeansClusteringTrainerParams params = new KMeansClusteringTrainerParams();
		params.numClusters = numClusters;
		params.maxIterations = 10000;
		// Train:
		PolynomialCluster[] clusters = PolynomialKMeansClusteringTrainer.train(f0Polynomials, params);

		// Visualise:
		FunctionGraph clusterGraph = new FunctionGraph(0, 1, new double[1]);
		clusterGraph.setYMinMax(-550, 500);
		clusterGraph.setPrimaryDataSeriesStyle(Color.BLUE, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_FULLCIRCLE);
		JFrame jf = clusterGraph.showInJFrame("", false, true);
		for (int i = 0; i < clusters.length; i++) {
			double[] meanValues = clusters[i].getMeanPolynomial().generatePolynomialValues(100, 0, 1);
			clusterGraph.updateData(0, 1. / meanValues.length, meanValues);

			Polynomial[] members = clusters[i].getClusterMembers();
			System.out.print("Cluster " + i + " : ");
			for (int m = 0; m < members.length; m++) {
				double[] pred = members[m].generatePolynomialValues(meanValues.length, 0, 1);
				clusterGraph.addDataSeries(pred, Color.GRAY, FunctionGraph.DRAW_LINE, -1);
				String baseName = mapPolynomialBaseNames.get(members[m]);
				System.out.print(baseName + " ");
				jf.repaint();

				String waveFile = "/home/sathish/Work/phd/voices/f0desc-listener/vocalizations/wav/" + File.separator + baseName
						+ ".wav";
				AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(waveFile));

				// Enforce PCM_SIGNED encoding
				if (!inputAudio.getFormat().getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED)) {
					inputAudio = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, inputAudio);
				}

				int audioSampleRate = (int) inputAudio.getFormat().getSampleRate();
				AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
				double[] sentenceAudio = signal.getAllData();
				AudioPlayer ap = new AudioPlayer(new DDSAudioInputStream(new BufferedDoubleDataSource(sentenceAudio),
						new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, audioSampleRate, // samples per second
								16, // bits per sample
								1, // mono
								2, // nr. of bytes per frame
								audioSampleRate, // nr. of frames per second
								true))); // big-endian;))
				ap.start();

				try {
					ap.join();
					Thread.sleep(10);
				} catch (InterruptedException ie) {
				}

			}
			System.out.println();
			jf.setTitle("Cluster " + (i + 1) + " of " + clusters.length + ": " + members.length + " members");
			jf.repaint();

			try {
				Thread.sleep(5000);
			} catch (InterruptedException ie) {
			}
		}
	}

	/**
	 * @param args
	 *            args
	 * @throws IOException
	 *             IOException
	 * @throws UnsupportedAudioFileException
	 *             UnsupportedAudioFileException
	 */
	public static void main(String[] args) throws IOException, UnsupportedAudioFileException {
		KMeansClusterer kmc = new KMeansClusterer();
		// String fileName = "/home/sathish/phd/voices/en-GB-listener/vocal-polynomials/all.listener.polynomials.txt";
		// String fileName = "/home/sathish/phd/voices/en-GB-listener/vocal-polynomials/SpiVocalizationF0PolyFeatureFile.txt";
		String fileName = "/home/sathish/phd/voices/en-GB-listener/yeahPrudenceVocalizationF0PolyFeatureFile.txt";
		kmc.loadF0Polynomials(fileName);
		kmc.trainer(5);
		System.exit(0);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy