All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.HMMVoiceConfigure Maven / Gradle / Ivy

The newest version!
/* ----------------------------------------------------------------- */
/*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
/*           developed by HTS Working Group                          */
/*           http://hts-engine.sourceforge.net/                      */
/* ----------------------------------------------------------------- */
/*                                                                   */
/*  Copyright (c) 2001-2010  Nagoya Institute of Technology          */
/*                           Department of Computer Science          */
/*                                                                   */
/*                2001-2008  Tokyo Institute of Technology           */
/*                           Interdisciplinary Graduate School of    */
/*                           Science and Engineering                 */
/*                                                                   */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/* - Redistributions of source code must retain the above copyright  */
/*   notice, this list of conditions and the following disclaimer.   */
/* - Redistributions in binary form must reproduce the above         */
/*   copyright notice, this list of conditions and the following     */
/*   disclaimer in the documentation and/or other materials provided */
/*   with the distribution.                                          */
/* - Neither the name of the HTS working group nor the names of its  */
/*   contributors may be used to endorse or promote products derived */
/*   from this software without specific prior written permission.   */
/*                                                                   */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
/* POSSIBILITY OF SUCH DAMAGE.                                       */
/* ----------------------------------------------------------------- */
/**
 * Copyright 2011 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */

package marytts.tools.voiceimport;

import java.io.File;
import java.util.SortedMap;
import java.util.TreeMap;

import marytts.util.io.General;

public class HMMVoiceConfigure extends VoiceImportComponent {

	private DatabaseLayout db;
	private String name = "HMMVoiceConfigure";
	public final String ADAPTSCRIPTS = name + ".adaptScripts";

	/** settings for speaker independent training */
	public final String CONFIGUREFILE = name + ".configureFile";
	public final String SPEAKER = name + ".speaker";
	public final String DATASET = name + ".dataSet";
	public final String LOWERF0 = name + ".lowerF0";
	public final String UPPERF0 = name + ".upperF0";
	public final String NUMTESTFILES = name + ".numTestFiles";

	public final String VER = name + ".version";
	public final String QNUM = name + ".questionsNum";
	public final String FRAMELEN = name + ".frameLen";
	public final String FRAMESHIFT = name + ".frameShift";
	public final String WINDOWTYPE = name + ".windowType";
	public final String NORMALIZE = name + ".normalize";
	public final String FFTLEN = name + ".fftLen";
	public final String FREQWARP = name + ".freqWarp";
	public final String GAMMA = name + ".gamma";
	public final String MGCORDER = name + ".mgcOrder";
	public final String STRORDER = name + ".strOrder";
	public final String STRFILTERNAME = name + ".strFilterFileName";
	public final String MGCBANDWIDTH = name + ".mgcBandWidth";
	public final String STRBANDWIDTH = name + ".strBandWidth";
	public final String LF0BANDWIDTH = name + ".lf0BandWidth";

	public final String LNGAIN = name + ".lnGain";
	public final String SAMPFREQ = name + ".sampfreq";
	public final String NSTATE = name + ".numState";
	public final String NITER = name + ".numIterations";

	/** settings for HTS ADAPT training scripts */
	public final String ADAPTTRAINSPKR = name + ".adaptTrainSpkr";
	public final String ADAPTSPKR = name + ".adaptSpkr";
	public final String ADAPTF0_RANGES = name + ".adaptF0Ranges";
	public final String ADAPTSPKRMASK = name + ".adaptSpkrMask";
	public final String ADAPTHEAD = name + ".adaptHead";

	public final String ADAPTTREEKIND = name + ".adaptTreeKind";
	public final String ADAPTTRANSKIND = name + ".adaptTransKind";

	public String getName() {
		return name;
	}

	/**
	 * Get the map of properties2values containing the default values
	 * 
	 * @param db
	 *            db
	 * @return map of props2values
	 */
	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			String rootdir = db.getProp(db.ROOTDIR);

			props.put(CONFIGUREFILE, rootdir + "hts/configure");
			props.put(SPEAKER, "slt");
			props.put(DATASET, "cmu_us_arctic");
			props.put(LOWERF0, "110");
			props.put(UPPERF0, "280");
			props.put(NUMTESTFILES, "10");
			props.put(VER, "1");
			props.put(QNUM, "001");

			// Frame period in point 80 for 16Khz; 240 for 48Khz (Frame period in point = sampRate*0.005sec)");
			int sampRate = db.getSamplingRate();
			int frameLen = (int) Math.round(sampRate * 0.025);
			int frameShift = (int) Math.round(sampRate * 0.005);
			props.put(SAMPFREQ, db.getProp(DatabaseLayout.SAMPLINGRATE));
			props.put(FRAMELEN, Integer.toString(frameLen));
			props.put(FRAMESHIFT, Integer.toString(frameShift));

			if (sampRate >= 48000) {
				props.put(FFTLEN, "2048");
				props.put(FREQWARP, "0.55"); // default HTS-2.2
			} else if (sampRate >= 44100) {
				props.put(FFTLEN, "2048");
				props.put(FREQWARP, "0.53");
			} else if (sampRate >= 22050) {
				props.put(FFTLEN, "1024");
				props.put(FREQWARP, "0.45");
			} else if (sampRate >= 16000) {
				props.put(FFTLEN, "512");
				props.put(FREQWARP, "0.42"); // default HTS-2.1
			} else if (sampRate >= 12000) {
				props.put(FFTLEN, "512");
				props.put(FREQWARP, "0.37");
			} else if (sampRate >= 10000) {
				props.put(FFTLEN, "512");
				props.put(FREQWARP, "0.35");
			} else {// sampRate >= 8000)
				props.put(FFTLEN, "256");
				props.put(FREQWARP, "0.31");
			}

			props.put(WINDOWTYPE, "1");
			props.put(NORMALIZE, "1");
			props.put(GAMMA, "0");
			props.put(MGCORDER, "34");
			props.put(STRORDER, "5");

			if (sampRate >= 48000) {
				props.put(STRFILTERNAME, "filters/mix_excitation_5filters_199taps_48Kz.txt");
			} else {
				props.put(STRFILTERNAME, "filters/mix_excitation_5filters_99taps_16Kz.txt");
			}
			props.put(MGCBANDWIDTH, "35");
			props.put(STRBANDWIDTH, "5");
			props.put(LF0BANDWIDTH, "1");

			props.put(LNGAIN, "1");
			props.put(NSTATE, "5");
			props.put(NITER, "5");

			/** settings for HTS ADAPT training scripts */
			props.put(ADAPTTRAINSPKR, "'bdl clb jmk rms'");
			props.put(ADAPTSPKR, "slt");
			// props.put(F0_RANGES, "'awb 40 280  bdl 40 280  clb 80 350  jmk 40 280  rms 40 280  slt 80 350'");
			props.put(ADAPTF0_RANGES, "'bdl 40 210 clb 130 260 jmk 50 180 rms 40 200 slt 110 280'");
			props.put(ADAPTSPKRMASK, "*/cmu_us_arctic_%%%_*");
			props.put(ADAPTHEAD, "b05");
			props.put(NUMTESTFILES, "5");

			props.put(ADAPTTREEKIND, "dec");
			props.put(ADAPTTRANSKIND, "feat");

			props.put(ADAPTSCRIPTS, "false");
		}

		return props;
	}

	protected void setupHelp() {
		props2Help = new TreeMap();

		props2Help.put(CONFIGUREFILE, "Path and name of configure file.");
		props2Help.put(SPEAKER, "speaker name (default=slt)");
		props2Help.put(DATASET, "dataset (default=cmu_us_arctic)");
		props2Help.put(LOWERF0, "Lower limit for F0 extraction in Hz (default slt=80 female=80, male=40)");
		props2Help.put(UPPERF0, "Upper limit for F0 extraction in Hz (default slt=350 female=350, male=280)");
		props2Help.put(NUMTESTFILES, "Number of test files used for testing, these are copied from phonefeatures set.");

		props2Help.put(VER, "version number of this setting (default=1)");
		props2Help.put(QNUM, "question set number (default='001')");
		props2Help.put(SAMPFREQ, "Sampling frequency in Hz (default=48000)");
		props2Help.put(FRAMELEN, "Frame length in point (16Khz: 400, 48Khz:1200 default=1200)");
		props2Help.put(FRAMESHIFT, "Frame shift in point (16Khz: 80, 48Khz: 240, default=240)");
		props2Help.put(WINDOWTYPE, "Window type -> 0: Blackman 1: Hamming 2: Hanning (default=1)");
		props2Help.put(NORMALIZE, "Normalization -> 0: none 1: by power 2: by magnitude (default=1)");
		props2Help.put(FFTLEN, "FFT length in point (16KHz: 512, 48KHz: 2048,  default=2048)");
		props2Help.put(FREQWARP, "Frequency warping factor +" + "8000  FREQWARP=0.31 " + "10000 FREQWARP=0.35 "
				+ "12000 FREQWARP=0.37 " + "16000 FREQWARP=0.42 " + "22050 FREQWARP=0.45 " + "32000 FREQWARP=0.45 "
				+ "44100 FREQWARP=0.53 " + "48000 FREQWARP=0.55  default=0.55)");
		props2Help
				.put(GAMMA,
						"Pole/Zero weight factor (0: mel-cepstral analysis 1: LPC analysis 2,3,...,N: mel-generalized cepstral (MGC) analysis) (default=0)");
		props2Help.put(MGCORDER, "Order of MGC analysis (default=24 for cepstral form, default=12 for LSP form)");
		props2Help.put(STRORDER, "Order of strengths analysis (default=5 for 5 filter bands)");
		props2Help
				.put(STRFILTERNAME,
						"Name of file containig the filters for voicing strengths analysis in mixed excitation "
								+ "(default 48Kz = filters/mix_excitation_5filters_199taps_48Kz.txt "
								+ " default 16Kz = filters/mix_excitation_5filters_99taps_16Kz.txt). More than 5 filters can be defined so STRORDER should"
								+ "be defined accordingly.");
		props2Help.put(MGCBANDWIDTH, "band width for MGC transforms (default=24 for cepstral form, derault=1 for LSP form)");
		props2Help.put(STRBANDWIDTH, "band width for STR transforms (default=5)");
		props2Help.put(LF0BANDWIDTH, "band width for log F0 transforms (default=1)");

		props2Help.put(LNGAIN, "Use logarithmic gain instead of linear gain (default=1)");
		props2Help.put(NSTATE, "number of HMM states (default=5)");
		props2Help.put(NITER, "number of iterations of embedded training (default=5)");

		// for scripts adapt
		props2Help.put(ADAPTTRAINSPKR, "only ADAPTSCRIPTS: speakers for training (default='awb bdl clb jmk rms')");
		props2Help.put(ADAPTSPKR, "only ADAPTSCRIPTS: speakers for adaptation (default=slt)");
		props2Help
				.put(ADAPTSPKRMASK,
						"only ADAPTSCRIPTS: speaker name pattern (mask for file names, -h option in HERest) (default=*/cmu_us_arctic_%%%_*)");
		props2Help.put(ADAPTHEAD, "only ADAPTSCRIPTS: file name header for adaptation data (default=b05)");
		props2Help
				.put(ADAPTF0_RANGES,
						"only ADAPTSCRIPTS: F0 search ranges (spkr1 lower1 upper1  spkr2 lower2 upper2...). "
								+ "only ADAPTSCRIPTS: Order of speakers in F0_RANGES should be equal to that in ALLSPKR=$(TRAINSPKR) $(ADAPTSPKR)"
								+ "(default='bdl 40 210 clb 130 260 jmk 50 180 rms 40 200 slt 110 280')");
		props2Help.put(ADAPTTREEKIND,
				"only ADAPTSCRIPTS: regression class tree kind (dec: decision tree, reg: regression tree, default=dec)");
		props2Help.put(ADAPTTRANSKIND,
				"only ADAPTSCRIPTS: adaptation transform kind (mean: MLLRMEAN, cov: MLLRCOV, feat: CMLLR, default=feat)");

		props2Help.put(ADAPTSCRIPTS,
				"ADAPTSCRIPTS=false: speaker dependent scripts, ADAPTSCRIPTS=true: speaker adaptation/adaptive scripts.  ");
	}

	/**
	 * Do the computations required by this component.
	 * 
	 * @throws Exception
	 *             Exception
	 * @return true on success, false on failure
	 */
	public boolean compute() throws Exception {

		System.out.println("\nChecking directories and files for running HTS training scripts...");

		String filedir = db.getProp(DatabaseLayout.ROOTDIR);
		String cmdLine;
		boolean speech_transcriptions = true;

		File dirWav = new File(filedir + "wav");
		File dirText = new File(filedir + "text");
		File dirRaw = new File(filedir + "hts/data/raw");
		File dirUtt = new File(filedir + "hts/data/utts");
		File dirFea = new File(filedir + "phonefeatures");
		File dirLab = new File(filedir + "phonelab");

		if (getProp(ADAPTSCRIPTS).contentEquals("false")) {
			// Check if wav directory exist and have files
			if (!dirWav.exists() || dirWav.list().length == 0 || !dirRaw.exists() || dirRaw.list().length == 0) {
				System.out.println("Problem with wav and hts/data/raw directories: wav files and raw files do not exist"
						+ " in current directory: " + filedir);
				speech_transcriptions = false;
			}
			// check if hts/data/raw directory exist and have files
			if (!dirWav.exists() || dirWav.list().length == 0 || !dirRaw.exists() || dirRaw.list().length == 0) {
				System.out.println("Problem with wav and hts/data/raw directories: wav files and raw files do not exist"
						+ " in current directory: " + filedir);
				speech_transcriptions = false;
			}
			// Check if text directory exist and have files
			if ((!dirText.exists() || dirText.list().length == 0) && (!dirUtt.exists() || dirUtt.list().length == 0)) {
				System.out
						.println("Problem with transcription directories text or hts/data/utts (Festival format): utts files and text files do not exist"
								+ " in current directory: " + filedir);
				System.out
						.println(" the transcriptions in the directory text will be used to generate the phonelab directory, if there are no hts/data/utts files"
								+ "(in Festival format), please provide the transcriptions of the files you are going to use for trainning.");
				speech_transcriptions = false;
			}
			// Check if phonefeatures and phonelab directory exist and have files
			if ((!dirFea.exists() || dirFea.list().length == 0) && (!dirLab.exists() || dirLab.list().length == 0)) {
				System.out.println("Problems with directories phonefeatures or phonelab, they do not exist or they are empty.");
				speech_transcriptions = false;
			}
		} else { // configuration for ADAPTSCRIPT
			// Get the speakers directories
			File dirSpeakersFea = new File(filedir + "/phonefeatures");
			File dirSpeakersLab = new File(filedir + "/phonelab");

			String[] speakers;
			if (dirSpeakersFea.exists() && dirSpeakersFea.list().length > 0 && dirSpeakersLab.exists()
					&& dirSpeakersLab.list().length > 0) {
				speakers = dirSpeakersFea.list();
				for (int i = 0; i < speakers.length; i++) {
					File dirSpeakerFea = new File(filedir + "/phonefeatures/" + speakers[i]);
					File dirSpeakerLab = new File(filedir + "/phonelab/" + speakers[i]);
					if (dirSpeakerFea.exists() && dirSpeakerFea.list().length > 0 && dirSpeakerLab.exists()
							&& dirSpeakerLab.list().length > 0) {
						speech_transcriptions = true;
					} else {
						System.out.println("Error: directories " + filedir + "/phonefeatures/" + speakers[i] + " and/or "
								+ filedir + "/phonelab/" + speakers[i] + " do not contain files.");
						speech_transcriptions = false;
						break;
					}
				}
			} else {
				System.out.println("Error: directories " + filedir + "phonefeatures and/or " + filedir
						+ "phonelab do not contain files.");
				speech_transcriptions = false;
			}
		}

		if (speech_transcriptions) {

			if (getProp(ADAPTSCRIPTS).contentEquals("false")) {
				/* if previous files and directories exist then run configure */
				/* first it should go to the hts directory and there run ./configure */
				System.out.println("Running make configure: ");
				cmdLine = "chmod +x " + getProp(CONFIGUREFILE);
				General.launchProc(cmdLine, "configure", filedir);

				cmdLine = "cd " + filedir + "hts\n" + getProp(CONFIGUREFILE) + " --with-tcl-search-path="
						+ db.getExternal(DatabaseLayout.TCLPATH) + " --with-sptk-search-path="
						+ db.getExternal(DatabaseLayout.SPTKPATH) + " --with-hts-search-path="
						+ db.getExternal(DatabaseLayout.HTSPATH) + " --with-hts-engine-search-path="
						+ db.getExternal(DatabaseLayout.HTSENGINEPATH) + " --with-sox-search-path="
						+ db.getExternal(DatabaseLayout.SOXPATH) + " SPEAKER=" + getProp(SPEAKER) + " DATASET="
						+ getProp(DATASET) + " LOWERF0=" + getProp(LOWERF0) + " UPPERF0=" + getProp(UPPERF0) + " VER="
						+ getProp(VER) + " QNUM=" + getProp(QNUM) + " FRAMELEN=" + getProp(FRAMELEN) + " FRAMESHIFT="
						+ getProp(FRAMESHIFT) + " WINDOWTYPE=" + getProp(WINDOWTYPE) + " NORMALIZE=" + getProp(NORMALIZE)
						+ " FFTLEN=" + getProp(FFTLEN) + " FREQWARP=" + getProp(FREQWARP) + " GAMMA=" + getProp(GAMMA)
						+ " MGCORDER=" + getProp(MGCORDER) + " STRORDER=" + getProp(STRORDER) + " STRFILTERNAME="
						+ getProp(STRFILTERNAME) + " LNGAIN=" + getProp(LNGAIN) + " SAMPFREQ=" + getProp(SAMPFREQ) + " NSTATE="
						+ getProp(NSTATE) + " NITER=" + getProp(NITER);

			} else {
				/* if previous files and directories exist then run configure */
				System.out.println("Running make configure: ");
				cmdLine = "chmod +x " + getProp(CONFIGUREFILE);
				General.launchProc(cmdLine, "configure", filedir);

				cmdLine = "cd " + filedir + "hts\n" + getProp(CONFIGUREFILE) + " --with-tcl-search-path="
						+ db.getExternal(DatabaseLayout.TCLPATH) + " --with-sptk-search-path="
						+ db.getExternal(DatabaseLayout.SPTKPATH) + " --with-hts-search-path="
						+ db.getExternal(DatabaseLayout.HTSPATH) + " --with-hts-engine-search-path="
						+ db.getExternal(DatabaseLayout.HTSENGINEPATH) + " --with-sox-search-path="
						+ db.getExternal(DatabaseLayout.SOXPATH) + " SPEAKER=" + getProp(SPEAKER) + " DATASET="
						+ getProp(DATASET) + " TRAINSPKR=" + getProp(ADAPTTRAINSPKR) + " ADAPTSPKR=" + getProp(ADAPTSPKR)
						+ " F0_RANGES=" + getProp(ADAPTF0_RANGES) + " SPKRMASK=" + getProp(ADAPTSPKRMASK) + " ADAPTHEAD="
						+ getProp(ADAPTHEAD) + " VER=" + getProp(VER) + " QNUM=" + getProp(QNUM) + " FRAMELEN="
						+ getProp(FRAMELEN) + " FRAMESHIFT=" + getProp(FRAMESHIFT) + " WINDOWTYPE=" + getProp(WINDOWTYPE)
						+ " NORMALIZE=" + getProp(NORMALIZE) + " FFTLEN=" + getProp(FFTLEN) + " FREQWARP=" + getProp(FREQWARP)
						+ " GAMMA=" + getProp(GAMMA) + " MGCORDER=" + getProp(MGCORDER) + " STRORDER=" + getProp(STRORDER)
						+ " STRFILTERNAME=" + getProp(STRFILTERNAME) + " LNGAIN=" + getProp(LNGAIN) + " SAMPFREQ="
						+ getProp(SAMPFREQ) + " NSTATE=" + getProp(NSTATE) + " NITER=" + getProp(NITER) + " MGCBANDWIDTH="
						+ getProp(MGCBANDWIDTH) + " STRBANDWIDTH=" + getProp(STRBANDWIDTH) + " LF0BANDWIDTH="
						+ getProp(LF0BANDWIDTH) + " TREEKIND=" + getProp(ADAPTTREEKIND) + " TRANSKIND=" + getProp(ADAPTTRANSKIND);
			}

			General.launchBatchProc(cmdLine, "Configure", filedir);

		} else {
			System.out.println("Problems with directories phonefeatures or phonelab, they do not exist or they are empty.");
			System.out.println("Problems with directories wav, text or hts/data/raw, they do not exist or they are empty.");
			return false;
		}

		return true;

	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return -1;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy