marytts.tools.voiceimport.DurationSoPTrainer Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2010 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.tools.voiceimport;
import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.GridBagConstraints;
import java.awt.GridBagLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.util.Scanner;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.swing.JButton;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureVector;
import marytts.machinelearning.SFFS;
import marytts.machinelearning.SoP;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.unitselection.data.FeatureFileReader;
import marytts.unitselection.data.Unit;
import marytts.unitselection.data.UnitFileReader;
/***
* Modelling duration using Sum of products (SoP) SoP is modelled using multiple linear regression Selection of features is
* performed with Sequential Floating Forward Search(SFFS):
*
* @author marcela
*/
public class DurationSoPTrainer extends VoiceImportComponent {
// protected String features;
protected DatabaseLayout db = null;
protected int percent = 0;
protected boolean success = true;
protected boolean interceptTerm;
protected boolean logDuration;
protected int solutionSize;
protected File unitlabelDir;
protected File unitfeatureDir;
private final String name = "DurationSoPTrainer";
private final String LABELDIR = name + ".labelDir";
private final String FEATUREDIR = name + ".featureDir";
private final String FEATUREFILE = name + ".featureFile";
private final String UNITFILE = name + ".unitFile";
private final String SOLUTIONSIZE = name + ".solutionSize";
private final String INTERCEPTTERM = name + ".interceptTerm";
private final String LOGDURATION = name + ".logDuration";
private final String DURSOPFILE = name + ".durSopFile";
public String getName() {
return name;
}
@Override
protected void initialiseComp() {
this.unitlabelDir = new File(getProp(LABELDIR));
this.unitfeatureDir = new File(getProp(FEATUREDIR));
String rootDir = db.getProp(db.ROOTDIR);
this.interceptTerm = Boolean.valueOf(getProp(INTERCEPTTERM)).booleanValue();
this.logDuration = Boolean.valueOf(getProp(LOGDURATION)).booleanValue();
this.solutionSize = Integer.parseInt(getProp(SOLUTIONSIZE));
}
public SortedMap getDefaultProps(DatabaseLayout dbl) {
this.db = dbl;
String fileDir = db.getProp(db.FILEDIR);
if (props == null) {
props = new TreeMap();
String fileSeparator = System.getProperty("file.separator");
props.put(FEATUREDIR, db.getProp(db.ROOTDIR) + "phonefeatures" + fileSeparator);
props.put(LABELDIR, db.getProp(db.ROOTDIR) + "phonelab" + fileSeparator);
props.put(FEATUREFILE, db.getProp(db.FILEDIR) + "phoneFeatures" + db.getProp(db.MARYEXT));
props.put(UNITFILE, db.getProp(db.FILEDIR) + "phoneUnits" + db.getProp(db.MARYEXT));
props.put(INTERCEPTTERM, "true");
props.put(LOGDURATION, "true");
props.put(SOLUTIONSIZE, "10");
props.put(DURSOPFILE, fileDir + "dur.sop");
}
return props;
}
protected void setupHelp() {
props2Help = new TreeMap();
props2Help.put(FEATUREDIR, "directory containing the phonefeatures");
props2Help.put(LABELDIR, "directory containing the phone labels");
props2Help.put(FEATUREFILE, "file containing all phone units and their target cost features");
props2Help.put(UNITFILE, "file containing all phone units");
props2Help.put(INTERCEPTTERM, "whether to include interceptTerm (b0) on the solution equation : b0 + b1X1 + .. bnXn");
props2Help.put(LOGDURATION, "whether to use log(independent variable)");
props2Help.put(SOLUTIONSIZE, "size of the solution, number of dependend variables");
props2Help.put(DURSOPFILE, "file containing the dur SoP model. Will be created by this module");
}
protected void setSuccess(boolean val) {
success = val;
}
public boolean compute() throws Exception {
String durDir = db.getProp(db.TEMPDIR);
String vowelsFile = durDir + "vowels.feats";
String consonantsFile = durDir + "consonants.feats";
String pauseFile = durDir + "pause.feats";
String[] lingFactorsVowel;
String[] lingFactorsConsonant;
String[] lingFactorsPause;
AllophoneSet allophoneSet = db.getAllophoneSet();
FeatureFileReader featureFile = FeatureFileReader.getFeatureFileReader(getProp(FEATUREFILE));
UnitFileReader unitFile = new UnitFileReader(getProp(UNITFILE));
FeatureDefinition featureDefinition = featureFile.getFeatureDefinition();
FeatureVector fv;
int nUnitsVowel = 0;
int nUnitsConsonant = 0;
int nUnitsPause = 0;
// System.out.println("Feature names: " + featureDefinition.getFeatureNames());
// select features that will be used as linguistic factors on the regression
lingFactorsVowel = selectLinguisticFactors(featureDefinition.getFeatureNames(), "Select linguistic factors for vowels:");
lingFactorsConsonant = selectLinguisticFactors(featureDefinition.getFeatureNames(),
"Select linguistic factors for consonants:");
lingFactorsPause = selectLinguisticFactors(featureDefinition.getFeatureNames(), "Select linguistic factors for pause:");
// the following files contain all the feature files in columns
PrintWriter toVowelsFile = new PrintWriter(new FileOutputStream(vowelsFile));
PrintWriter toConsonantsFile = new PrintWriter(new FileOutputStream(consonantsFile));
PrintWriter toPauseFile = new PrintWriter(new FileOutputStream(pauseFile));
int k = 0;
int numVowels = 0;
int numConsonants = 0;
int numPause = 0;
// index of phone
int phoneIndex = featureDefinition.getFeatureIndex("phone");
for (int i = 0, len = unitFile.getNumberOfUnits(); i < len; i++) {
// We estimate that feature extraction takes 1/10 of the total time
// (that's probably wrong, but never mind)
percent = 10 * i / len;
Unit u = unitFile.getUnit(i);
double dur = u.duration / (float) unitFile.getSampleRate();
fv = featureFile.getFeatureVector(i);
// first select pause, then vowell and last consonant phones
if (fv.getByteFeature(phoneIndex) > 0 && dur >= 0.01) {
if (allophoneSet.getAllophone(fv.getFeatureAsString(phoneIndex, featureDefinition)).isPause()) {
for (int j = 0; j < lingFactorsPause.length; j++)
toPauseFile.print(fv.getByteFeature(featureDefinition.getFeatureIndex(lingFactorsPause[j])) + " ");
if (logDuration)
toPauseFile.println(Math.log(dur)); // last column is the dependent variable, in this case duration
else
toPauseFile.println(dur);
numPause++;
} else if (allophoneSet.getAllophone(fv.getFeatureAsString(phoneIndex, featureDefinition)).isVowel()) {
for (int j = 0; j < lingFactorsVowel.length; j++) {
byte feaVal = fv.getByteFeature(featureDefinition.getFeatureIndex(lingFactorsVowel[j]));
toVowelsFile.print(feaVal + " ");
}
if (logDuration)
toVowelsFile.println(Math.log(dur)); // last column is the dependent variable, in this case duration
else
toVowelsFile.println(dur);
numVowels++;
} else { // everything else will be considered consonant! is this correct?
for (int j = 0; j < lingFactorsConsonant.length; j++) {
byte feaVal = fv.getByteFeature(featureDefinition.getFeatureIndex(lingFactorsConsonant[j]));
toConsonantsFile.print(feaVal + " ");
}
if (logDuration)
toConsonantsFile.println(Math.log(dur));
else
toConsonantsFile.println(dur);
numConsonants++;
}
}
}
toVowelsFile.close();
toConsonantsFile.close();
toPauseFile.close();
percent = 10;
int cols, rows;
double percentToTrain = 0.7;
// the final regression will be saved in this file, one line for vowels, one for consonants and another for pause
PrintWriter toSopFile = new PrintWriter(new FileOutputStream(getProp(DURSOPFILE)));
// Save first the features definition on the output file
featureDefinition.writeTo(toSopFile, false);
toSopFile.println();
SFFS sffs = new SFFS(solutionSize, interceptTerm, logDuration);
System.out.println("\n==================================\nProcessing Vowels:");
SoP sopVowel = new SoP(featureDefinition);
sffs.trainModel(lingFactorsVowel, vowelsFile, numVowels, percentToTrain, sopVowel);
toSopFile.println("vowel");
sopVowel.saveSelectedFeatures(toSopFile);
System.out.println("\n==================================\nProcessing Consonants:");
SoP sopConsonant = new SoP(featureDefinition);
sffs.trainModel(lingFactorsConsonant, consonantsFile, numConsonants, percentToTrain, sopConsonant);
toSopFile.println("consonant");
sopConsonant.saveSelectedFeatures(toSopFile);
System.out.println("\n==================================\nProcessing Pause:");
SoP sopPause = new SoP(featureDefinition);
sffs.trainModel(lingFactorsPause, pauseFile, numPause, percentToTrain, sopPause);
toSopFile.println("pause");
sopPause.saveSelectedFeatures(toSopFile);
toSopFile.close();
percent = 100;
return true;
}
public String[] selectLinguisticFactors(String featureNames, String label) throws IOException {
String[] lingFactors = null;
String features = checkFeatureList(featureNames);
final JFrame frame = new JFrame(label);
GridBagLayout gridBagLayout = new GridBagLayout();
GridBagConstraints gridC = new GridBagConstraints();
frame.getContentPane().setLayout(gridBagLayout);
final JEditorPane editPane = new JEditorPane();
editPane.setPreferredSize(new Dimension(500, 500));
editPane.setText(features);
JButton saveButton = new JButton("Save");
saveButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
setSuccess(true);
frame.setVisible(false);
}
});
JButton cancelButton = new JButton("Cancel");
cancelButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
setSuccess(false);
frame.setVisible(false);
}
});
gridC.gridx = 0;
gridC.gridy = 0;
// resize scroll pane:
gridC.weightx = 1;
gridC.weighty = 1;
gridC.fill = GridBagConstraints.HORIZONTAL;
JScrollPane scrollPane = new JScrollPane(editPane);
scrollPane.setPreferredSize(editPane.getPreferredSize());
gridBagLayout.setConstraints(scrollPane, gridC);
frame.getContentPane().add(scrollPane);
gridC.gridy = 1;
// do not resize buttons:
gridC.weightx = 0;
gridC.weighty = 0;
JPanel buttonPanel = new JPanel();
buttonPanel.setLayout(new FlowLayout());
buttonPanel.add(saveButton);
buttonPanel.add(cancelButton);
gridBagLayout.setConstraints(buttonPanel, gridC);
frame.getContentPane().add(buttonPanel);
frame.pack();
frame.setVisible(true);
do {
try {
Thread.sleep(10);
} catch (InterruptedException e) {
}
} while (frame.isVisible());
frame.dispose();
if (success) {
try {
lingFactors = saveFeatures(editPane.getText());
} catch (Exception ex) {
ex.printStackTrace();
throw new Error("Error defining replacements");
}
}
// return true;
return lingFactors;
}
private String checkFeatureList(String featureNames) throws IOException {
String featureList = "";
String recommendedFeatureList = "";
String feaList[] = featureNames.split(" ");
String line;
for (int i = 0; i < feaList.length; i++) {
line = feaList[i];
/*
* // CHECK: Maybe we need to exclude some features from the selection list??? // The following have variance 0 if(
* !(line.contains("style") || line.contains("sentence_punc") || line.contains("next_punctuation") ||
* line.contains("prev_punctuation") || line.contains("ph_cplace") || line.contains("ph_cvox") ||
* line.contains("ph_vc") || line.contains("onsetcoda") || line.contains("edge") )) {
*
* // CHECK: here i am including arbitrarily some.... // put in front the recomended ones:
* "ph_vfront","ph_vrnd","position_type","pos_in_syl" if( line.contentEquals("ph_vfront") ||
* line.contentEquals("ph_height") || line.contentEquals("ph_vlng") || line.contentEquals("ph_vrnd") ||
* line.contentEquals("ph_cplace") || line.contentEquals("ph_ctype") || line.contentEquals("ph_cvox") ||
* line.contentEquals("phone") || line.contentEquals("position_type") ) recommendedFeatureList += line + "\n"; else
* featureList += line + "\n"; }
*/
featureList += line + "\n";
}
// return recommendedFeatureList + "\n" + featureList;
return featureList;
// return "";
}
private String[] saveFeatures(String newFeatures) {
String fea[] = newFeatures.split("\n");
String[] lingFactors = new String[fea.length];
System.out.print("Selected linguistic factors (" + fea.length + "):");
for (int i = 0; i < fea.length; i++) {
System.out.print(fea[i] + " ");
lingFactors[i] = fea[i];
}
System.out.println();
return lingFactors;
}
public int getProgress() {
return percent;
}
public static void main(String[] args) throws Exception {
/*
* DurationSoPTrainer sop = new DurationSoPTrainer(); DatabaseLayout db = new DatabaseLayout(sop); sop.compute();
*/
String sopFileName = "/project/mary/marcela/UnitSel-voices/slt-arctic/temp/dur.sop";
// String contextFile = "/project/mary/marcela/UnitSel-voices/slt-arctic/phonefeatures/arctic_a0001.pfeats";
File sopFile = new File(sopFileName);
// Read dur.sop file
// the first line corresponds to vowels and the second to consonants
String nextLine;
String strContext = "";
Scanner s = null;
try {
s = new Scanner(new BufferedReader(new FileReader(sopFileName)));
while (s.hasNext()) {
nextLine = s.nextLine();
if (nextLine.trim().equals(""))
break;
else
strContext += nextLine + "\n";
}
// the featureDefinition is the same for vowel, consonant and Pause
FeatureDefinition voiceFeatDef = new FeatureDefinition(new BufferedReader(new StringReader(strContext)), false);
// vowel line
if (s.hasNext()) {
nextLine = s.nextLine();
System.out.println("line vowel = " + nextLine);
SoP sopVowel = new SoP(nextLine, voiceFeatDef);
sopVowel.printCoefficients();
}
// consonant line
if (s.hasNext()) {
nextLine = s.nextLine();
System.out.println("line consonants = " + nextLine);
SoP sopConsonants = new SoP(nextLine, voiceFeatDef);
sopConsonants.printCoefficients();
}
} finally {
if (s != null)
s.close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy