marytts.tools.voiceimport.HMMParameterExtractor Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2009 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.tools.voiceimport;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import marytts.htsengine.HMMData;
import marytts.htsengine.HTSParameterGeneration;
import marytts.htsengine.HTSUttModel;
import marytts.htsengine.HTSVocoder;
import marytts.modules.HTSEngine;
/**
* A component for extracting mfccs, lab, state labels, and hmm generated waves (options given to choose)
*
* @author Marcela, Sathish Pammi
*
*/
public class HMMParameterExtractor extends VoiceImportComponent {
public final String MARYBASE = "HMMParameterExtractor.maryBase";
public final String VOICECONFIG = "HMMParameterExtractor.voiceConfigFile";
public final String VOICENAME = "HMMParameterExtractor.voiceName";
public final String PHONEFEATS = "HMMParameterExtractor.phonefeaturesDir";
public final String OUTHMMDIR = "HMMParameterExtractor.outputDir";
public final String PPARAMETERS = "HMMParameterExtractor.printParameters";
public final String PLAB = "HMMParameterExtractor.printLab";
public final String PWAVE = "HMMParameterExtractor.printWave";
public final String PSLAB = "HMMParameterExtractor.PrintStateLab";
public final String USEGV = "HMMParameterExtractor.useGV";
private DatabaseLayout db;
private String sCostDirectory;
private String mfccExt = ".mfcc";
private String pfeatExt = ".pfeats";
private String slabExt = ".slab";
private String labExt = ".lab";
private String wavExt = ".wav";
private String hplabExt = ".hplab";
protected int percent = 0;
private HTSEngine hmm_tts;
private HMMData htsData;
protected void setupHelp() {
props2Help = new TreeMap();
props2Help.put(MARYBASE, "Mary Base. ex: /home/user/MARY400/ ");
props2Help.put(VOICECONFIG, "locale ex: english");
props2Help.put(VOICENAME, "Voice name ex: slt-arctic");
props2Help.put(PHONEFEATS, "Phonefeatures directory");
props2Help.put(OUTHMMDIR, "Output directory to store hmm generated parameters");
props2Help.put(PPARAMETERS, "Generate parameters like MFCC, PITCH files?");
props2Help.put(PLAB, "Generate HMM Label files?");
props2Help.put(PWAVE, "Generate HMM WAVE files?");
props2Help.put(PSLAB, "Generate HMM State Label files?");
props2Help.put(USEGV, "Use Global variance(GV) in parameter generation?");
}
public SortedMap getDefaultProps(DatabaseLayout db) {
this.db = db;
if (props == null) {
props = new TreeMap();
props.put(MARYBASE, db.getProp(db.MARYBASE));
props.put(VOICECONFIG, "english-hsmm-slt.config");
props.put(VOICENAME, "hsmm-slt");
props.put(PHONEFEATS, db.getProp(db.ROOTDIR) + File.separator + "phonefeatures");
props.put(OUTHMMDIR, db.getProp(db.ROOTDIR) + File.separator + "sCost" + File.separator + "hmmparams");
props.put(PPARAMETERS, "true");
props.put(PLAB, "true");
props.put(PWAVE, "false");
props.put(PSLAB, "false");
props.put(USEGV, "false");
}
return props;
}
public final String getName() {
return "HMMParameterExtractor";
}
@Override
protected void initialiseComp() {
// sCost dir creation, if doesn't exists
sCostDirectory = db.getProp(db.ROOTDIR) + File.separator + "sCost";
File sCostDir = new File(sCostDirectory);
if (!sCostDir.exists()) {
System.out.print(sCostDir.getAbsolutePath() + " does not exist; ");
if (!sCostDir.mkdir()) {
throw new Error("Could not create " + sCostDir.getAbsolutePath());
}
System.out.print("Created successfully.\n");
}
}
public boolean compute() throws Exception {
// output mfcc dir creator
File hmmParamDir = new File(getProp(OUTHMMDIR));
if (!hmmParamDir.exists()) {
System.out.print(hmmParamDir.getAbsolutePath() + " does not exist; ");
if (!hmmParamDir.mkdir()) {
throw new Error("Could not create " + hmmParamDir.getAbsolutePath());
}
System.out.print("Created successfully.\n");
}
/*
* For initialise provide the name of the hmm voice and the name of its configuration file, also indicate the name of your
* MARY_BASE directory.
*/
String MaryBase = getProp(MARYBASE);
// String locale = getProp(VOICECONFIG);
String voice = getProp(VOICENAME);
String configFile = getProp(VOICECONFIG);
// directory where the context features of each file are
String contextFeaDir = getProp(PHONEFEATS);
// the output dir has to be created already
String outputDir = getProp(OUTHMMDIR);
// Create a htsengine object
hmm_tts = new HTSEngine();
// Create and set HMMData
htsData = new HMMData();
htsData.initHMMData(voice, MaryBase, configFile);
// Settings for using GV
if (getProp(USEGV).equals("true")) {
htsData.setUseGV(true);
} else {
htsData.setUseGV(false);
}
// Settings for mixed excitation
htsData.setUseMixExc(true);
// Now process all files, one by one
for (int i = 0; i < bnl.getLength(); i++) {
percent = 100 * i / bnl.getLength();
generateParameters(bnl.getName(i), contextFeaDir, outputDir);
boolean success = createHalfPhoneLab(bnl.getName(i));
if (!success)
return false;
}
return true;
}
public int getProgress() {
return percent;
}
/**
* Stand alone testing using a TARGETFEATURES list of files as input.
*
* @param file
* file
* @param contextFeaDir
* contextFeaDir
* @param outputDir
* outputDir
* @throws IOException
* IOException
* @throws InterruptedException
* InterruptedException
*/
public void generateParameters(String file, String contextFeaDir, String outputDir) throws IOException, InterruptedException {
float fperiodmillisec = ((float) htsData.getFperiod() / (float) htsData.getRate()) * 1000;
float fperiodsec = ((float) htsData.getFperiod() / (float) htsData.getRate());
/* generate files out of HMMs */
String feaFile, parFile, durStateFile, durFile, mgcModifiedFile, outWavFile;
try {
/*
* The utterance model, um, is a Vector (or linked list) of Model objects. It will contain the list of models for the
* current label file.
*/
HTSUttModel um = new HTSUttModel();
HTSParameterGeneration pdf2par = new HTSParameterGeneration();
HTSVocoder par2speech = new HTSVocoder();
AudioInputStream ais;
/* Process label file of Mary context features and creates UttModel um. */
feaFile = contextFeaDir + file + pfeatExt;
um = hmm_tts.processUttFromFile(feaFile, htsData);
if (getProp(PLAB).equals("true")) {
/* save realised durations in a lab file */
FileWriter outputStream;
durFile = outputDir + file + labExt; /* realised durations */
outputStream = new FileWriter(durFile);
outputStream.write(hmm_tts.getRealisedDurations());
outputStream.close();
}
if (getProp(PSLAB).equals("true")) {
/* save realised durations at state label in a slab file */
float totalDur = 0;
int numStates = htsData.getCartTreeSet().getNumStates();
durStateFile = outputDir + file + slabExt; /* state level realised durations */
FileWriter outputStream = new FileWriter(durStateFile);
outputStream.write("#\n");
for (int i = 0; i < um.getNumModel(); i++) {
for (int j = 0; j < numStates; j++) {
totalDur += (um.getUttModel(i).getDur(j) * fperiodsec);
if (j < (numStates - 1))
outputStream.write(totalDur + " 0 " + um.getUttModel(i).getPhoneName() + "\n");
else
outputStream.write(totalDur + " 1 " + um.getUttModel(i).getPhoneName() + "\n");
}
}
outputStream.close();
}
if (getProp(PPARAMETERS).equals("true")) {
/* Generate sequence of speech parameter vectors, generate parameters out of sequence of pdf's */
boolean debug = true; /*
* with debug=true it saves the generated parameters f0 and mfcc in parFile.f0 and
* parFile.mfcc in Mary format.
*/
parFile = outputDir + file; /* generated parameters mfcc and f0, Mary format */
pdf2par.htsMaximumLikelihoodParameterGeneration(um, htsData);
}
if (getProp(PWAVE).equals("true")) {
/* Synthesize speech waveform, generate speech out of sequence of parameter */
ais = par2speech.htsMLSAVocoder(pdf2par, htsData);
outWavFile = outputDir + file + wavExt; /* generated wav file */
System.out.println("saving to file: " + outWavFile);
File fileOut = new File(outWavFile);
if (AudioSystem.isFileTypeSupported(AudioFileFormat.Type.WAVE, ais)) {
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, fileOut);
}
}
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
}
} /* main method */
private boolean createHalfPhoneLab(String baseName) {
String hmmDir = getProp(OUTHMMDIR);
if (!getProp(PLAB).equals("true"))
return true;
String labFile = hmmDir + File.separator + baseName + labExt;
try {
UnitLabel[] unitLab = UnitLabel.readLabFile(labFile);
PrintWriter pw = new PrintWriter(new FileWriter(new File(hmmDir + File.separator + baseName + hplabExt)));
pw.println("#");
int unitIndex = 0;
for (int i = 0; i < unitLab.length; i++) {
double duration = unitLab[i].endTime - unitLab[i].startTime;
assert duration > 0 : "Duration is not > 0 for phone " + unitLab[i].unitName + " (" + baseName + ")";
double midTime = unitLab[i].startTime + duration / 2;
unitIndex++;
String leftUnit = midTime + " " + unitIndex + " " + unitLab[i].unitName + "_L";
unitIndex++;
String rightUnit = unitLab[i].endTime + " " + unitIndex + " " + unitLab[i].unitName + "_R";
pw.println(leftUnit);
pw.println(rightUnit);
}
pw.flush();
pw.close();
} catch (IOException e) {
return false;
}
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy