All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.F0SoPTrainer Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.voiceimport;

import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.GridBagConstraints;
import java.awt.GridBagLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.SortedMap;
import java.util.TreeMap;

import javax.swing.JButton;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;

import marytts.features.FeatureDefinition;
import marytts.features.FeatureVector;
import marytts.machinelearning.SFFS;
import marytts.machinelearning.SoP;
import marytts.unitselection.data.FeatureFileReader;
import marytts.unitselection.data.TimelineReader;
import marytts.unitselection.data.UnitFileReader;
import marytts.util.data.Datagram;

/**
 * 
 * Example of how to use SoP modules in a voice configuration file:
 * 
 * #Modules to use for predicting acoustic target features for this voice: #this are the names that will be used to identify
 * diferent models of the same type, sop, cart or hmm voice.cmu-slt-hsmm.acousticModels = duration F0 midF0 rightF0
 * 
 * voice.cmu-slt-hsmm.duration.model = sop voice.cmu-slt-hsmm.duration.data = MARY_BASE/lib/voices/cmu-slt-hsmm/dur.sop
 * voice.cmu-slt-hsmm.duration.attribute = d
 * 
 * voice.cmu-slt-hsmm.leftF0.model = sop voice.cmu-slt-hsmm.leftF0.data = MARY_BASE/lib/voices/cmu-slt-hsmm/f0.left.sop
 * voice.cmu-slt-hsmm.leftF0.attribute = f0 voice.cmu-slt-hsmm.leftF0.attribute.format = (0,%.0f)
 * 
 * voice.cmu-slt-hsmm.midF0.model = sop voice.cmu-slt-hsmm.midF0.data = MARY_BASE/lib/voices/cmu-slt-hsmm/f0.mid.sop
 * voice.cmu-slt-hsmm.midF0.attribute = f0 voice.cmu-slt-hsmm.midF0.attribute.format = (50,%.0f)
 * 
 * voice.cmu-slt-hsmm.rightF0.model = sop voice.cmu-slt-hsmm.rightF0.data = MARY_BASE/lib/voices/cmu-slt-hsmm/f0.right.sop
 * voice.cmu-slt-hsmm.rightF0.attribute = f0 voice.cmu-slt-hsmm.rightF0.attribute.format = (100,%.0f)
 * 
 * @author marcela
 *
 */
public class F0SoPTrainer extends VoiceImportComponent {
	protected File f0Dir;
	protected String leftF0FeaturesFileName;
	protected String midF0FeaturesFileName;
	protected String rightF0FeaturesFileName;
	protected File leftSoPFile;
	protected File midSoPFile;
	protected File rightSoPFile;
	protected String featureExt = ".pfeats";
	protected String labelExt = ".lab";
	protected DatabaseLayout db = null;
	protected int percent = 0;
	protected boolean success = true;
	protected boolean interceptTerm = true;
	protected boolean logF0 = false;
	protected int solutionSize;

	private final String name = "F0SoPTrainer";

	public final String FEATUREFILE = name + ".featureFile";
	public final String UNITFILE = name + ".unitFile";
	public final String WAVETIMELINE = name + ".waveTimeline";
	public final String LABELDIR = name + ".labelDir";
	public final String FEATUREDIR = name + ".featureDir";
	public final String F0LeftSoPFILE = name + ".f0LeftSoPFile";
	public final String F0MidSoPFILE = name + ".f0MidSoPFile";
	public final String F0RightSoPFILE = name + ".f0RightSoPFile";
	private final String SOLUTIONSIZE = name + ".solutionSize";
	private final String INTERCEPTTERM = name + ".interceptTerm";
	private final String LOGF0SOLUTION = name + ".logF0";

	public F0SoPTrainer() {
		setupHelp();
	}

	public String getName() {
		return name;
	}

	@Override
	protected void initialiseComp() {
		String rootDir = db.getProp(db.ROOTDIR);
		String f0DirName = db.getProp(db.TEMPDIR);
		this.f0Dir = new File(f0DirName);
		if (!f0Dir.exists()) {
			System.out.print("temp dir " + f0DirName + " does not exist; ");
			if (!f0Dir.mkdir()) {
				throw new Error("Could not create F0DIR");
			}
			System.out.print("Created successfully.\n");
		}
		this.leftF0FeaturesFileName = f0Dir + "f0.sop.left.feats";
		this.midF0FeaturesFileName = f0Dir + "f0.sop.mid.feats";
		this.rightF0FeaturesFileName = f0Dir + "f0.sop.right.feats";
		this.interceptTerm = Boolean.valueOf(getProp(INTERCEPTTERM)).booleanValue();
		this.logF0 = Boolean.valueOf(getProp(LOGF0SOLUTION)).booleanValue();
		this.solutionSize = Integer.parseInt(getProp(SOLUTIONSIZE));

	}

	public SortedMap getDefaultProps(DatabaseLayout dbl) {
		this.db = dbl;
		if (props == null) {
			props = new TreeMap();
			String filedir = db.getProp(db.FILEDIR);
			String maryext = db.getProp(db.MARYEXT);
			props.put(FEATUREDIR, db.getProp(db.ROOTDIR) + "phonefeatures" + System.getProperty("file.separator"));
			props.put(LABELDIR, db.getProp(db.ROOTDIR) + "phonelab" + System.getProperty("file.separator"));
			props.put(FEATUREFILE, filedir + "phoneFeatures" + maryext);
			props.put(UNITFILE, filedir + "phoneUnits" + maryext);
			props.put(WAVETIMELINE, db.getProp(db.FILEDIR) + "timeline_waveforms" + db.getProp(db.MARYEXT));
			props.put(F0LeftSoPFILE, filedir + "f0.left.sop");
			props.put(F0MidSoPFILE, filedir + "f0.mid.sop");
			props.put(F0RightSoPFILE, filedir + "f0.right.sop");
			props.put(INTERCEPTTERM, "true");
			props.put(LOGF0SOLUTION, "false");
			props.put(SOLUTIONSIZE, "5");

		}
		return props;
	}

	protected void setupHelp() {
		props2Help = new TreeMap();
		props2Help.put(FEATUREDIR, "directory containing the phonefeatures");
		props2Help.put(LABELDIR, "directory containing the phone label files");
		props2Help.put(FEATUREFILE, "file containing all phone units and their target cost features");
		props2Help.put(UNITFILE, "file containing all phone units");
		props2Help.put(WAVETIMELINE, "file containing all waveforms or models that can genarate them");
		props2Help.put(F0LeftSoPFILE, "file containing the f0 left SoP model. Will be created by this module");
		props2Help.put(F0MidSoPFILE, "file containing the f0 mid SoP model. Will be created by this module");
		props2Help.put(F0RightSoPFILE, "file containing the f0 right SoP model. Will be created by this module");
		props2Help.put(INTERCEPTTERM, "whether to include interceptTerm (b0) on the solution equation : b0 + b1X1 + .. bnXn");
		props2Help.put(LOGF0SOLUTION, "whether to use log(independent variable)");
		props2Help.put(SOLUTIONSIZE, "size of the solution, number of dependend variables");

	}

	/**/
	public boolean compute() throws Exception {
		FeatureFileReader featureFile = FeatureFileReader.getFeatureFileReader(getProp(FEATUREFILE));
		UnitFileReader unitFile = new UnitFileReader(getProp(UNITFILE));
		TimelineReader waveTimeline = null;
		waveTimeline = new TimelineReader(getProp(WAVETIMELINE));

		PrintWriter toLeftFeaturesFile = new PrintWriter(new FileOutputStream(leftF0FeaturesFileName));
		PrintWriter toMidFeaturesFile = new PrintWriter(new FileOutputStream(midF0FeaturesFileName));
		PrintWriter toRightFeaturesFile = new PrintWriter(new FileOutputStream(rightF0FeaturesFileName));

		FeatureDefinition featureDefinition = featureFile.getFeatureDefinition();

		// Select the features that can be used for SFFS selection
		String[] lingFactorsToSelect;
		lingFactorsToSelect = selectLinguisticFactors(featureDefinition.getFeatureNames(),
				"Select linguistic factors for vowels:");

		System.out.println("F0 CART trainer: exporting f0 features");
		byte isVowel = featureDefinition.getFeatureValueAsByte("ph_vc", "+");
		int iVC = featureDefinition.getFeatureIndex("ph_vc");
		int iSegsFromSylStart = featureDefinition.getFeatureIndex("segs_from_syl_start");
		int iSegsFromSylEnd = featureDefinition.getFeatureIndex("segs_from_syl_end");
		int iCVoiced = featureDefinition.getFeatureIndex("ph_cvox");
		byte isCVoiced = featureDefinition.getFeatureValueAsByte("ph_cvox", "+");

		int nSyllables = 0;
		for (int i = 0, len = unitFile.getNumberOfUnits(); i < len; i++) {
			// We estimate that feature extraction takes 1/10 of the total time
			// (that's probably wrong, but never mind)
			percent = 10 * i / len;
			FeatureVector fv = featureFile.getFeatureVector(i);
			if (fv.getByteFeature(iVC) == isVowel) {
				// Found a vowel, i.e. found a syllable.
				int mid = i;
				FeatureVector fvMid = fv;
				// Now find first/last voiced unit in the syllable:
				int first = i;
				for (int j = 1, lookLeft = (int) fvMid.getByteFeature(iSegsFromSylStart); j < lookLeft; j++) {
					fv = featureFile.getFeatureVector(mid - j); // units are in sequential order
					// No need to check if there are any vowels to the left of this one,
					// because of the way we do the search in the top-level loop.
					if (fv.getByteFeature(iCVoiced) != isCVoiced) {
						break; // mid-j is not voiced
					}
					first = mid - j; // OK, the unit we are currently looking at is part of the voiced syllable section
				}
				int last = i;
				for (int j = 1, lookRight = (int) fvMid.getByteFeature(iSegsFromSylEnd); j < lookRight; j++) {
					fv = featureFile.getFeatureVector(mid + j); // units are in sequential order

					if (fv.getByteFeature(iVC) != isVowel && fv.getByteFeature(iCVoiced) != isCVoiced) {
						break; // mid+j is not voiced
					}
					last = mid + j; // OK, the unit we are currently looking at is part of the voiced syllable section
				}
				// TODO: make this more robust, e.g. by fitting two straight lines to the data:
				Datagram[] midDatagrams = waveTimeline.getDatagrams(unitFile.getUnit(mid), unitFile.getSampleRate());
				Datagram[] leftDatagrams = waveTimeline.getDatagrams(unitFile.getUnit(first), unitFile.getSampleRate());
				Datagram[] rightDatagrams = waveTimeline.getDatagrams(unitFile.getUnit(last), unitFile.getSampleRate());
				if (midDatagrams != null && midDatagrams.length > 0 && leftDatagrams != null && leftDatagrams.length > 0
						&& rightDatagrams != null && rightDatagrams.length > 0) {
					double midF0 = waveTimeline.getSampleRate() / (float) midDatagrams[midDatagrams.length / 2].getDuration();
					double leftF0 = waveTimeline.getSampleRate() / (float) leftDatagrams[0].getDuration();
					double rightF0 = waveTimeline.getSampleRate()
							/ (float) rightDatagrams[rightDatagrams.length - 1].getDuration();
					// System.out.format("Syllable at %d (length %d ): left = %.3f, mid = %.3f, right = %.3f\n", mid,
					// (last-first+1), leftF0, midF0, rightF0);

					for (int j = 0; j < lingFactorsToSelect.length; j++) {
						byte feaVal = fvMid.getByteFeature(featureDefinition.getFeatureIndex(lingFactorsToSelect[j]));
						toLeftFeaturesFile.print(feaVal + " ");
						toMidFeaturesFile.print(feaVal + " ");
						toRightFeaturesFile.print(feaVal + " ");
					}
					// last column is the F0 value
					if (midF0 == Double.NEGATIVE_INFINITY || midF0 == Double.POSITIVE_INFINITY) {
						// System.out.println("midDatagrams.length/2 = " + midDatagrams.length/2);
						// System.out.println("midDatagrams[midDatagrams.length/2].getDuration() = " +
						// midDatagrams[midDatagrams.length/2].getDuration());
						System.out.format("Syllable at %d (length %d ): left = %.3f, mid = %.3f, right = %.3f ", mid, (last
								- first + 1), leftF0, midF0, rightF0);
						midF0 = (leftF0 + rightF0) / 2.0;
						System.out.format("mindF0 is Nan --> changed to (leftF0 + rightF0) / 2.0 = %.3f\n", midF0);
					}

					if (logF0) {
						toLeftFeaturesFile.println(Math.log(leftF0));
						toMidFeaturesFile.println(Math.log(midF0));
						toRightFeaturesFile.println(Math.log(rightF0));
					} else {
						toLeftFeaturesFile.println(leftF0);
						toMidFeaturesFile.println(midF0);
						toRightFeaturesFile.println(rightF0);
					}
					nSyllables++;

				}
				// Skip the part we just covered:
				i = last;
			}
		}
		toLeftFeaturesFile.close();
		toMidFeaturesFile.close();
		toRightFeaturesFile.close();
		System.out.println("F0 features extracted for " + nSyllables + " syllables");

		int cols, rows;

		double percentToTrain = 0.9;

		SFFS sffs = new SFFS(solutionSize, interceptTerm, logF0);

		System.out.println("\n==================================\nProcessing Left F0:");
		// the final regression will be saved in this file
		PrintWriter toSopLeftFile = new PrintWriter(new FileOutputStream(getProp(F0LeftSoPFILE)));
		// Save first the features definition on the output file
		featureDefinition.writeTo(toSopLeftFile, false);
		toSopLeftFile.println();
		SoP sopLeft = new SoP(featureDefinition);
		sffs.trainModel(lingFactorsToSelect, leftF0FeaturesFileName, nSyllables, percentToTrain, sopLeft);
		toSopLeftFile.println("f0.left");
		sopLeft.saveSelectedFeatures(toSopLeftFile);
		toSopLeftFile.close();

		System.out.println("\n==================================\nProcessing Mid F0:");
		// the final regression will be saved in this file
		PrintWriter toSopMidFile = new PrintWriter(new FileOutputStream(getProp(F0MidSoPFILE)));
		// Save first the features definition on the output file
		featureDefinition.writeTo(toSopMidFile, false);
		toSopMidFile.println();
		SoP sopMid = new SoP(featureDefinition);
		sffs.trainModel(lingFactorsToSelect, midF0FeaturesFileName, nSyllables, percentToTrain, sopMid);
		toSopMidFile.println("f0.mid");
		sopMid.saveSelectedFeatures(toSopMidFile);
		toSopMidFile.close();

		System.out.println("\n==================================\nProcessing Right F0:");
		// the final regression will be saved in this file
		PrintWriter toSopRightFile = new PrintWriter(new FileOutputStream(getProp(F0RightSoPFILE)));
		// Save first the features definition on the output file
		featureDefinition.writeTo(toSopRightFile, false);
		toSopRightFile.println();
		SoP sopRight = new SoP(featureDefinition);
		sffs.trainModel(lingFactorsToSelect, rightF0FeaturesFileName, nSyllables, percentToTrain, sopRight);
		toSopRightFile.println("f0.right");
		sopRight.saveSelectedFeatures(toSopRightFile);
		toSopRightFile.close();

		percent = 100;

		return true;
	}

	public String[] selectLinguisticFactors(String featureNames, String label) throws IOException {
		String[] lingFactors = null;
		String features = checkFeatureList(featureNames);

		final JFrame frame = new JFrame(label);
		GridBagLayout gridBagLayout = new GridBagLayout();
		GridBagConstraints gridC = new GridBagConstraints();
		frame.getContentPane().setLayout(gridBagLayout);

		final JEditorPane editPane = new JEditorPane();
		editPane.setPreferredSize(new Dimension(500, 500));
		editPane.setText(features);

		JButton saveButton = new JButton("Save");
		saveButton.addActionListener(new ActionListener() {
			public void actionPerformed(ActionEvent e) {
				setSuccess(true);
				frame.setVisible(false);
			}
		});
		JButton cancelButton = new JButton("Cancel");
		cancelButton.addActionListener(new ActionListener() {
			public void actionPerformed(ActionEvent e) {
				setSuccess(false);
				frame.setVisible(false);
			}
		});

		gridC.gridx = 0;
		gridC.gridy = 0;
		// resize scroll pane:
		gridC.weightx = 1;
		gridC.weighty = 1;
		gridC.fill = GridBagConstraints.HORIZONTAL;
		JScrollPane scrollPane = new JScrollPane(editPane);
		scrollPane.setPreferredSize(editPane.getPreferredSize());
		gridBagLayout.setConstraints(scrollPane, gridC);
		frame.getContentPane().add(scrollPane);
		gridC.gridy = 1;
		// do not resize buttons:
		gridC.weightx = 0;
		gridC.weighty = 0;
		JPanel buttonPanel = new JPanel();
		buttonPanel.setLayout(new FlowLayout());
		buttonPanel.add(saveButton);
		buttonPanel.add(cancelButton);
		gridBagLayout.setConstraints(buttonPanel, gridC);
		frame.getContentPane().add(buttonPanel);
		frame.pack();
		frame.setVisible(true);

		do {
			try {
				Thread.sleep(10);
			} catch (InterruptedException e) {
			}
		} while (frame.isVisible());
		frame.dispose();

		if (success) {
			try {
				lingFactors = saveFeatures(editPane.getText());
			} catch (Exception ex) {
				ex.printStackTrace();
				throw new Error("Error defining replacements");
			}
		}
		// return true;
		return lingFactors;
	}

	private String checkFeatureList(String featureNames) throws IOException {
		String featureList = "";
		String recommendedFeatureList = "";
		String feaList[] = featureNames.split(" ");
		String line;

		for (int i = 0; i < feaList.length; i++) {
			line = feaList[i];

			// Exclude phone and phonological, those are by default used in makeLabes and makeQuestions
			// Also exclude the halfphone features not used in HMM voices
			// use this when finding the features that better predict f0, without including phonological features
			if (!(line.contains("_vc") || line.contains("_vlng") || line.contains("_vheight") || line.contains("_vfront")
					|| line.contains("_vrnd") || line.contains("_ctype") || line.contains("_cplace") || line.contains("_cvox")
					|| line.contains("_phone") || line.contains("ph_") || line.contains("halfphone_") || line
						.contentEquals("phone"))) {
				featureList += line + "\n";
			}
		}
		// return recommendedFeatureList + "\n" + featureList;
		return featureList;
		// return "";

	}

	protected void setSuccess(boolean val) {
		success = val;
	}

	private String[] saveFeatures(String newFeatures) {
		String fea[] = newFeatures.split("\n");
		String[] lingFactors = new String[fea.length];
		System.out.print("Selected linguistic factors (" + fea.length + "):");
		for (int i = 0; i < fea.length; i++) {
			System.out.print(fea[i] + " ");
			lingFactors[i] = fea[i];
		}
		System.out.println();
		return lingFactors;
	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return percent;
	}

	public static void main(String[] args) throws Exception {
		F0CARTTrainer f0ct = new F0CARTTrainer();
		DatabaseLayout db = new DatabaseLayout(f0ct);
		f0ct.compute();
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy