marytts.htsengine.HTSEngineTest Maven / Gradle / Ivy
The newest version!
//* ----------------------------------------------------------------- */
/* The HMM-Based Speech Synthesis Engine "hts_engine API" */
/* developed by HTS Working Group */
/* http://hts-engine.sourceforge.net/ */
/* ----------------------------------------------------------------- */
/* */
/* Copyright (c) 2001-2010 Nagoya Institute of Technology */
/* Department of Computer Science */
/* */
/* 2001-2008 Tokyo Institute of Technology */
/* Interdisciplinary Graduate School of */
/* Science and Engineering */
/* */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials provided */
/* with the distribution. */
/* - Neither the name of the HTS working group nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* ----------------------------------------------------------------- */
/**
* Copyright 2011 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.htsengine;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Scanner;
import java.util.Vector;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import marytts.modules.HTSEngine;
import marytts.signalproc.analysis.Mfccs;
import marytts.util.data.audio.AudioPlayer;
import marytts.util.data.text.SnackTextfileDoubleDataSource;
import marytts.util.io.LEDataInputStream;
/***
* Several functions for running the htsEngine or other components stand alone
*
* @author Marcela Charfuelan
*
*/
public class HTSEngineTest {
public class PhonemeDuration {
private String phone;
private float duration;
public PhonemeDuration(String ph, float dur) {
phone = ph;
duration = dur;
}
public void setPhoneme(String str) {
phone = str;
}
public void setDuration(float fval) {
duration = fval;
}
public String getPhoneme() {
return phone;
}
public float getDuration() {
return duration;
}
}
/**
* Generation of speech using external specification of duration: using ContinuousFeatureProcessors of TARGETFEATURES Input: a
* TARGETFEATURES (.pfeats) file, this file should contain ContinuousFeatureProcessors: unit_duration float unit_logf0 float
* unit_logf0delta float The features unit_duration and unit_logf0 are used as external prosody, unit_logf0Delta is not used.
* The TARGETFEATURES (.pfeats) file including ContinuousFeatureProcessors values can be generated with a unitselection voice
* or a mbrola voice, it can NOT be generated with HMM voices.
*
*
* @throws Exception
* Exception
*/
public void synthesisWithContinuousFeatureProcessors() throws Exception {
int i, j, n, t;
// context features file
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/red_ball.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/THAT_ball.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/RED_ball.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/red_BALL.pfeats";
String feaFile = "/project/mary/marcela/f0-hsmm-experiment/THAT_BALL.pfeats";
//
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/us1-mbrola.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/cmu-slt-unit-selection.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/welcome.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/canadian.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/arctic_a0003.pfeats";
// String feaFile = "/project/mary/marcela/f0-hsmm-experiment/author.pfeats";
HTSEngine hmm_tts = new HTSEngine();
HMMData htsData = new HMMData();
/* For initialise provide the name of the hmm voice and the name of its configuration file, */
String MaryBase = "/project/mary/marcela/openmary/"; /* MARY_BASE directory. */
String voiceName = "cmu-slt-hsmm"; /* voice name */
String voiceConfig = "en_US-cmu-slt-hsmm.config"; /* voice configuration file name. */
String outWavFile = MaryBase + "tmp/tmp.wav"; /* to save generated audio file */
htsData.initHMMData(voiceName, MaryBase, voiceConfig);
// Set these variables so the htsEngine use the ContinuousFeatureProcessors features
htsData.setUseAcousticModels(true);
// The settings for using GV and MixExc can besynthesisWithExternalProsodySpecificationFiles changed in this way:
htsData.setUseGV(true);
htsData.setUseMixExc(true);
htsData.setUseFourierMag(true); // if the voice was trained with Fourier magnitudes
/**
* The utterance model, um, is a Vector (or linked list) of Model objects. It will contain the list of models for current
* label file.
*/
HTSUttModel um = new HTSUttModel();
HTSParameterGeneration pdf2par = new HTSParameterGeneration();
HTSVocoder par2speech = new HTSVocoder();
AudioInputStream ais;
try {
/* Process Mary context features file and creates UttModel um. */
um = hmm_tts.processUttFromFile(feaFile, htsData);
/* Generate sequence of speech parameter vectors, generate parameters out of sequence of pdf's */
/* the generated parameters will be saved in tmp.mfc and tmp.f0, including Mary header. */
boolean debug = false; /* so it DOES NOT save the generated parameters in parFile */
pdf2par.htsMaximumLikelihoodParameterGeneration(um, htsData);
/* Synthesize speech waveform, generate speech out of sequence of parameters */
ais = par2speech.htsMLSAVocoder(pdf2par, htsData);
System.out.println("saving to file: " + outWavFile);
File fileOut = new File(outWavFile);
if (AudioSystem.isFileTypeSupported(AudioFileFormat.Type.WAVE, ais)) {
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, fileOut);
}
System.out.println("Calling audioplayer:");
AudioPlayer player = new AudioPlayer(fileOut);
player.start();
player.join();
System.out.println("audioplayer finished...");
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
}
} /* main method */
/**
* Generation of speech using external specification of duration: duration and logf0 in external files Input: a TARGETFEATURES
* (.pfeats) file
*
*
* @throws Exception
* Exception
*/
public void synthesisWithProsodySpecificationInExternalFiles() throws Exception {
int i, j, n, t;
// context features file
String feaFile = "/project/mary/marcela/openmary/lib/voices/cmu-slt-hsmm/cmu_us_arctic_slt_a0001.pfeats";
HTSEngine hmm_tts = new HTSEngine();
HMMData htsData = new HMMData();
/* For initialise provide the name of the hmm voice and the name of its configuration file, */
String MaryBase = "/project/mary/marcela/openmary/"; /* MARY_BASE directory. */
String voiceName = "cmu-slt-hsmm"; /* voice name */
String voiceConfig = "en_US-cmu-slt-hsmm.config"; /* voice configuration file name. */
String outWavFile = MaryBase + "tmp/tmp.wav"; /* to save generated audio file */
htsData.initHMMData(voiceName, MaryBase, voiceConfig);
// The settings for using GV and MixExc can be changed in this way:
htsData.setUseGV(true);
htsData.setUseMixExc(true);
htsData.setUseFourierMag(true); // if the voice was trained with Fourier magnitudes
/**
* The utterance model, um, is a Vector (or linked list) of Model objects. It will contain the list of models for current
* label file.
*/
HTSUttModel um = new HTSUttModel();
HTSParameterGeneration pdf2par = new HTSParameterGeneration();
HTSVocoder par2speech = new HTSVocoder();
AudioInputStream ais;
// Specify external files:
// external duration extracted with the voice import tools - EHMM
String labFile = "/project/mary/marcela/f0-hsmm-experiment/cmu_us_arctic_slt_a0001.lab";
// external duration obtained with MARY, there is a problem with this because it does not have an initial sil
// String labFile = "/project/mary/marcela/f0-hsmm-experiment/cmu_us_arctic_slt_a0001.realised_durations";
// external F0 contour obtained with SPTK during HMMs creation
String lf0File = "/project/mary/marcela/f0-hsmm-experiment/cmu_us_arctic_slt_a0001.lf0";
// Load and set external durations
// ---this is not working in MARY 4.1
// ---htsData.setUseDurationFromExternalFile(true);
float totalDuration;
int totalDurationFrames;
float fperiodsec = ((float) htsData.getFperiod() / (float) htsData.getRate());
hmm_tts.setPhonemeAlignmentForDurations(true);
Vector durations = new Vector();
totalDuration = loadDurationsForAlignment(labFile, durations);
// set the external durations
hmm_tts.setAlignDurations(durations);
totalDurationFrames = (int) ((totalDuration / fperiodsec));
// Depending on how well aligned the durations and the lfo file are
// this factor can be used to extend or shrink the durations per phoneme so
// it syncronize with the number of frames in the lf0 file
hmm_tts.setNewStateDurationFactor(0.37);
// set external logf0
htsData.setUseAcousticModels(true);
try {
/* Process Mary context features file and creates UttModel um. */
um = hmm_tts.processUttFromFile(feaFile, htsData);
/* Generate sequence of speech parameter vectors, generate parameters out of sequence of pdf's */
/* the generated parameters will be saved in tmp.mfc and tmp.f0, including Mary header. */
boolean debug = false; /* so it DOES NOT save the generated parameters in parFile */
pdf2par.htsMaximumLikelihoodParameterGeneration(um, htsData);
/* Synthesize speech waveform, generate speech out of sequence of parameters */
ais = par2speech.htsMLSAVocoder(pdf2par, htsData);
System.out.println("saving to file: " + outWavFile);
File fileOut = new File(outWavFile);
if (AudioSystem.isFileTypeSupported(AudioFileFormat.Type.WAVE, ais)) {
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, fileOut);
}
System.out.println("Calling audioplayer:");
AudioPlayer player = new AudioPlayer(fileOut);
player.start();
player.join();
System.out.println("audioplayer finished...");
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
}
} /* main method */
/***
* Load durations for phone alignment when the durations have been generated by EHMMs.
*
* @param fileName
* the format is the same as for phonelab.
* @param alignDur
* alignDur
* @return totalDuration
*/
public float loadDurationsForAlignment(String fileName, Vector alignDur) {
Scanner s = null;
String line;
float totalDuration = 0;
float previous = 0;
float current = 0;
try {
s = new Scanner(new File(fileName));
int i = 0;
while (s.hasNext()) {
line = s.nextLine();
if (!line.startsWith("#") && !line.startsWith("format")) {
String val[] = line.split(" ");
current = Float.parseFloat(val[0]);
PhonemeDuration var;
if (previous == 0)
alignDur.add(new PhonemeDuration(val[2], current));
else
alignDur.add(new PhonemeDuration(val[2], (current - previous)));
totalDuration += alignDur.get(i).getDuration();
System.out.println("phone = " + alignDur.get(i).getPhoneme() + " dur(" + i + ")="
+ alignDur.get(i).getDuration() + " totalDuration=" + totalDuration);
i++;
previous = current;
}
}
System.out.println();
s.close();
} catch (IOException e) {
e.printStackTrace();
}
// return alignDur;
return totalDuration;
}
/***
* Load logf0, in HTS format, create a voiced array and set this values in pdf2par This contour should be aligned with the
* durations, so the total duration in frames should be the same as in the lf0 file
*
* @param lf0File
* : in HTS formant
* @param totalDurationFrames
* : the total duration in frames can be calculated as: totalDurationFrames = totalDurationInSeconds /
* (framePeriodInSamples / SamplingFrequencyInHz)
* @param pdf2par
* : HTSParameterGeneration object
* @throws Exception
* If the number of frames in the lf0 file is not the same as represented in the total duration (in frames).
*/
public void loadF0contour(String lf0File, int totalDurationFrames, HTSParameterGeneration pdf2par) throws Exception {
HTSPStream lf0Pst = null;
boolean[] voiced = null;
LEDataInputStream lf0Data;
int lf0Vsize = 3;
int totalFrame = 0;
int lf0VoicedFrame = 0;
float fval;
lf0Data = new LEDataInputStream(new BufferedInputStream(new FileInputStream(lf0File)));
/* First i need to know the size of the vectors */
try {
while (true) {
fval = lf0Data.readFloat();
totalFrame++;
if (fval > 0)
lf0VoicedFrame++;
}
} catch (EOFException e) {
}
lf0Data.close();
// Here we need to check that the total duration in frames is the same as the number of frames
// (NOTE: it can be a problem afterwards when the durations per phone are aligned to the lenght of each state
// in htsEngine._processUtt() )
if (totalDurationFrames != totalFrame) {
System.out.println("The total duration in frames " + totalDurationFrames
+ " is not the same as the number of frames " + totalFrame + " in the lf0 file: " + lf0File);
} else
System.out.println("totalDurationFrames = " + totalDurationFrames + " totalF0Frames = " + totalFrame);
voiced = new boolean[totalFrame];
lf0Pst = new HTSPStream(lf0Vsize, totalFrame, HMMData.FeatureType.LF0, 0);
/* load lf0 data */
/* for lf0 i just need to load the voiced values */
lf0VoicedFrame = 0;
lf0Data = new LEDataInputStream(new BufferedInputStream(new FileInputStream(lf0File)));
for (int i = 0; i < totalFrame; i++) {
fval = lf0Data.readFloat();
if (fval < 0) {
voiced[i] = false;
System.out.println("frame: " + i + " = 0.0");
} else {
voiced[i] = true;
lf0Pst.setPar(lf0VoicedFrame, 0, fval);
lf0VoicedFrame++;
System.out.format("frame: %d = %.2f\n", i, fval);
}
}
lf0Data.close();
// Set lf0 and voiced in pdf2par
pdf2par.setlf0Pst(lf0Pst);
pdf2par.setVoicedArray(voiced);
}
/**
* Stand alone testing using a TARGETFEATURES file as input. Generates duration: file.lab, duration state level: file.slab,
* f0: file.f0, mfcc: file.mfcc and sound file: file.wav out of HMM models
*
* @throws IOException
* IOException
* @throws InterruptedException
* InterruptedException
* @throws Exception
* Exception
*/
public void generateParameters() throws IOException, InterruptedException, Exception {
int i, j;
/*
* For initialise provide the name of the hmm voice and the name of its configuration file, also indicate the name of your
* MARY_BASE directory.
*/
String MaryBase = "/project/mary/marcela/openmary/";
String locale = "english";
String voice = "hsmm-slt";
String configFile = locale + "-" + voice + ".config";
// directory where the context features of each file are
String contextFeaDir = "/project/mary/marcela/quality-control-experiment/slt/phonefeatures/";
// the output dir has to be created already
String outputDir = "/project/mary/marcela/quality-control-experiment/slt/hmmGenerated/";
// list of contex features files, the file names contain the basename without path and ext
String filesList = "/project/mary/marcela/quality-control-experiment/slt/phonefeatures-list.txt";
// Create a htsengine object
HTSEngine hmm_tts = new HTSEngine();
// Create and set HMMData
HMMData htsData = new HMMData();
htsData.initHMMData(voice, MaryBase, configFile);
float fperiodmillisec = ((float) htsData.getFperiod() / (float) htsData.getRate()) * 1000;
float fperiodsec = ((float) htsData.getFperiod() / (float) htsData.getRate());
// Settings for using GV, mixed excitation
htsData.setUseGV(true);
htsData.setUseMixExc(true);
/* generate files out of HMMs */
String file, feaFile, parFile, durStateFile, durFile, mgcModifiedFile, outWavFile;
try {
Scanner filesScanner = new Scanner(new BufferedReader(new FileReader(filesList)));
while (filesScanner.hasNext()) {
file = filesScanner.nextLine();
feaFile = contextFeaDir + file + ".pfeats";
parFile = outputDir + file; /* generated parameters mfcc and f0, Mary format */
durFile = outputDir + file + ".lab"; /* realised durations */
durStateFile = outputDir + file + ".slab"; /* state level realised durations */
outWavFile = outputDir + file + ".wav"; /* generated wav file */
/*
* The utterance model, um, is a Vector (or linked list) of Model objects. It will contain the list of models for
* the current label file.
*/
HTSUttModel um = new HTSUttModel();
HTSParameterGeneration pdf2par = new HTSParameterGeneration();
HTSVocoder par2speech = new HTSVocoder();
AudioInputStream ais;
/* Process label file of Mary context features and creates UttModel um. */
um = hmm_tts.processUttFromFile(feaFile, htsData);
/* save realised durations in a lab file */
FileWriter outputStream;
outputStream = new FileWriter(durFile);
outputStream.write(hmm_tts.getRealisedDurations());
outputStream.close();
/* save realised durations at state label in a slab file */
float totalDur = 0;
int numStates = htsData.getCartTreeSet().getNumStates();
outputStream = new FileWriter(durStateFile);
outputStream.write("#\n");
for (i = 0; i < um.getNumModel(); i++) {
for (j = 0; j < numStates; j++) {
totalDur += (um.getUttModel(i).getDur(j) * fperiodsec);
if (j < (numStates - 1))
outputStream.write(totalDur + " 0 " + um.getUttModel(i).getPhoneName() + "\n");
else
outputStream.write(totalDur + " 1 " + um.getUttModel(i).getPhoneName() + "\n");
}
}
outputStream.close();
/* Generate sequence of speech parameter vectors, generate parameters out of sequence of pdf's */
boolean debug = true; /*
* with debug=true it saves the generated parameters f0 and mfcc in parFile.f0 and
* parFile.mfcc in Mary format.
*/
pdf2par.htsMaximumLikelihoodParameterGeneration(um, htsData);
/* Synthesize speech waveform, generate speech out of sequence of parameter */
ais = par2speech.htsMLSAVocoder(pdf2par, htsData);
System.out.println("saving to file: " + outWavFile);
File fileOut = new File(outWavFile);
if (AudioSystem.isFileTypeSupported(AudioFileFormat.Type.WAVE, ais)) {
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, fileOut);
}
/*
* // uncomment to listen the files System.out.println("Calling audioplayer:"); AudioPlayer player = new
* AudioPlayer(fileOut); player.start(); player.join(); System.out.println("audioplayer finished...");
*/
} // while files in testFiles
filesScanner.close();
} catch (Exception e) {
System.err.println("Exception: " + e.getMessage());
}
} /* main method */
/***
* Calculate mfcc using SPTK, uses sox to convert wav→raw
*
* @throws IOException
* IOException
* @throws InterruptedException
* InterruptedException
* @throws Exception
* Exception
*/
public void getSptkMfcc() throws IOException, InterruptedException, Exception {
String inFile = "/project/mary/marcela/quality-control-experiment/slt/cmu_us_arctic_slt_a0001.wav";
String outFile = "/project/mary/marcela/quality-control-experiment/slt/cmu_us_arctic_slt_a0001.mfc";
String tmpFile = "/project/mary/marcela/quality-control-experiment/slt/tmp.mfc";
String tmpRawFile = "/project/mary/marcela/quality-control-experiment/slt/tmp.raw";
String cmd;
// SPTK parameters
int fs = 16000;
int frameLength = 400;
int frameLengthOutput = 512;
int framePeriod = 80;
int mgcOrder = 24;
int mgcDimension = 25;
// Mary header parameters
double ws = (frameLength / fs); // window size in seconds
double ss = (framePeriod / fs); // skip size in seconds
// SOX and SPTK commands
String sox = "/usr/bin/sox";
String x2x = " /project/mary/marcela/sw/SPTK-3.1/bin/x2x";
String frame = " /project/mary/marcela/sw/SPTK-3.1/bin/frame";
String window = " /project/mary/marcela/sw/SPTK-3.1/bin/window";
String mcep = " /project/mary/marcela/sw/SPTK-3.1/bin/mcep";
String swab = "/project/mary/marcela/sw/SPTK-3.1/bin/swab";
// convert the wav file to raw file with sox
cmd = sox + " " + inFile + " " + tmpRawFile;
launchProc(cmd, "sox", inFile);
System.out.println("Extracting MGC coefficients from " + inFile);
cmd = x2x + " +sf " + tmpRawFile + " | " + frame + " +f -l " + frameLength + " -p " + framePeriod + " | " + window
+ " -l " + frameLength + " -L " + frameLengthOutput + " -w 1 -n 1 | " + mcep + " -a 0.42 -m " + mgcOrder
+ " -l " + frameLengthOutput + " | " + swab + " +f > " + tmpFile;
System.out.println("cmd=" + cmd);
launchBatchProc(cmd, "getSptkMfcc", inFile);
// Now get the data and add the Mary header
int numFrames;
DataInputStream mfcData = null;
Vector mfc = new Vector();
mfcData = new DataInputStream(new BufferedInputStream(new FileInputStream(tmpFile)));
try {
while (true) {
mfc.add(mfcData.readFloat());
}
} catch (EOFException e) {
}
mfcData.close();
numFrames = mfc.size();
int numVectors = numFrames / mgcDimension;
Mfccs mgc = new Mfccs(numVectors, mgcDimension);
int k = 0;
for (int i = 0; i < numVectors; i++) {
for (int j = 0; j < mgcDimension; j++) {
mgc.mfccs[i][j] = mfc.get(k);
k++;
}
}
// Mary header parameters
mgc.params.samplingRate = fs; /* samplingRateInHz */
mgc.params.skipsize = (float) ss; /* skipSizeInSeconds */
mgc.params.winsize = (float) ws; /* windowSizeInSeconds */
mgc.writeMfccFile(outFile);
}
/***
* Calculate mfcc using SPTK, uses sox to convert wav→raw
*
* @throws IOException
* IOException
* @throws InterruptedException
* InterruptedException
* @throws Exception
* Exception
*/
public void getSptkSnackLf0() throws IOException, InterruptedException, Exception {
String inFile = "/project/mary/marcela/quality-control-experiment/slt/cmu_us_arctic_slt_a0001.wav";
String outFile = "/project/mary/marcela/quality-control-experiment/slt/cmu_us_arctic_slt_a0001.lf0";
String tmpFile = "/project/mary/marcela/quality-control-experiment/slt/tmp.mfc";
String tmpRawFile = "/project/mary/marcela/quality-control-experiment/slt/tmp.raw";
String tmpRawLongFile = "/project/mary/marcela/quality-control-experiment/slt/tmp_long.raw";
String scriptFileName = "/project/mary/marcela/quality-control-experiment/slt/lf0.tcl";
String snackFile = "/project/mary/marcela/quality-control-experiment/slt/tmp.lf0";
String MAXPITCH;
String MINPITCH;
String gender = "female";
if (gender.contentEquals("female")) {
MAXPITCH = "500";
MINPITCH = "100";
} else { // male
MAXPITCH = "300";
MINPITCH = "75";
}
String FRAMELENGTH = "0.005";
String FRAMERATE = "16000";
String cmd;
// SOX and SPTK commands
String sox = "/usr/bin/sox";
String x2x = " /project/mary/marcela/sw/SPTK-3.1/bin/x2x";
String step = "/project/mary/marcela/sw/SPTK-3.1/bin/step";
String nrand = "/project/mary/marcela/sw/SPTK-3.1/bin/nrand";
String sopr = "/project/mary/marcela/sw/SPTK-3.1/bin/sopr";
String vopr = "/project/mary/marcela/sw/SPTK-3.1/bin/vopr";
String SNACKDIR = "/project/mary/marcela/sw/snack2.2.10/";
// convert the wav file to raw file with sox
cmd = sox + " " + inFile + " " + tmpRawFile;
launchProc(cmd, "sox", inFile);
// create temporary raw file, with 0.005 ms of silence (with a bit noise) added
// at the beginning and 0.025 at the end
System.out.println("Create temporary raw file" + inFile);
cmd = step + " -l 80 -v 0.0 | x2x +fs > tmp.head\n" + step + " -l 400 -v 0.0 | x2x +fs > tmp.tail\n" + "cat tmp.head "
+ tmpRawFile + " tmp.tail | x2x +sf > tmp.long\n" + "leng=`x2x +fa tmp.long | /usr/bin/wc -l`\n"
+ "echo \"leng=$leng\"\n" + nrand + " -l $leng | " + sopr + " -m 50 | " + vopr + " -a tmp.long | " + x2x
+ " +fs > " + tmpRawLongFile + "\n" + "rm tmp.tail tmp.long tmp.head " + tmpRawFile + "\n";
System.out.println("cmd=" + cmd);
launchBatchProc(cmd, "getSptkSnackLf0", tmpRawFile);
// Now extract F0 with snack and the modified raw file
System.out.println("scriptFileName = " + scriptFileName);
File script = new File(scriptFileName);
System.out.println("Extracting LF0 coefficients from " + inFile);
if (script.exists())
script.delete();
PrintWriter toScript = new PrintWriter(new FileWriter(script));
toScript.println("#!" + SNACKDIR);
toScript.println("");
toScript.println("package require snack");
toScript.println("");
toScript.println("snack::sound s");
toScript.println("");
toScript.println("s read [lindex $argv 0] -fileformat RAW -rate [lindex $argv 1] -encoding Lin16 -byteorder littleEndian");
toScript.println("");
toScript.println("set fd [open [lindex $argv 2] w]");
toScript.println("set tmp [s pitch -method esps -maxpitch [lindex $argv 3] "
+ "-minpitch [lindex $argv 4] -framelength [lindex $argv 5]]\n" + "foreach line $tmp {\n"
+ " set x [lindex $line 0]\n" + " if { $x == 0 } {\n" + " puts $fd -1.0e+10\n" + " } else {\n"
+ " puts $fd [expr log($x)]\n" + " }\n" + "}\n");
toScript.println("close $fd");
toScript.println("");
toScript.println("exit");
toScript.println("");
toScript.close();
cmd = "tcl " + scriptFileName + " " + tmpRawLongFile + " " + FRAMERATE + " " + snackFile + " " + MAXPITCH + " "
+ MINPITCH + " " + FRAMELENGTH;
System.out.println("cmd=" + cmd);
launchProc(cmd, "getSptkSnackLf0", tmpRawLongFile);
double[] f0 = new SnackTextfileDoubleDataSource(new FileReader(snackFile)).getAllData();
for (int j = 0; j < f0.length; j++) {
System.out.println(j + " f0[" + j + "]= " + f0[j]);
}
}
/**
* A general process launcher for the various tasks (copied from ESTCaller.java)
*
* @param cmdLine
* the command line to be launched.
* @param task
* a task tag for error messages, such as "Pitchmarks" or "LPC".
* @param baseName
* basename of the file currently processed, for error messages.
*/
private void launchProc(String cmdLine, String task, String baseName) {
Process proc = null;
BufferedReader procStdout = null;
String line = null;
try {
proc = Runtime.getRuntime().exec(cmdLine);
/* Collect stdout and send it to System.out: */
procStdout = new BufferedReader(new InputStreamReader(proc.getInputStream()));
while (true) {
line = procStdout.readLine();
if (line == null)
break;
System.out.println(line);
}
/* Wait and check the exit value */
proc.waitFor();
if (proc.exitValue() != 0) {
throw new RuntimeException(task + " computation failed on file [" + baseName + "]!\n" + "Command line was: ["
+ cmdLine + "].");
}
} catch (IOException e) {
throw new RuntimeException(task + " computation provoked an IOException on file [" + baseName + "].", e);
} catch (InterruptedException e) {
throw new RuntimeException(task + " computation interrupted on file [" + baseName + "].", e);
}
}
/**
* A general process launcher for the various tasks but using an intermediate batch file (copied from ESTCaller.java)
*
* @param cmdLine
* the command line to be launched.
* @param task
* a task tag for error messages, such as "Pitchmarks" or "LPC".
* @param baseName
* basename of the file currently processed, for error messages.
*/
private void launchBatchProc(String cmdLine, String task, String baseName) {
Process proc = null;
Process proctmp = null;
BufferedReader procStdout = null;
String line = null;
String tmpFile = "./tmp.bat";
try {
FileWriter tmp = new FileWriter(tmpFile);
tmp.write(cmdLine);
tmp.close();
/* make it executable... */
proctmp = Runtime.getRuntime().exec("chmod +x " + tmpFile);
proctmp.waitFor();
proc = Runtime.getRuntime().exec(tmpFile);
/* Collect stdout and send it to System.out: */
procStdout = new BufferedReader(new InputStreamReader(proc.getInputStream()));
while (true) {
line = procStdout.readLine();
if (line == null)
break;
System.out.println(line);
}
/* Wait and check the exit value */
proc.waitFor();
if (proc.exitValue() != 0) {
throw new RuntimeException(task + " computation failed on file [" + baseName + "]!\n" + "Command line was: ["
+ cmdLine + "].");
}
} catch (IOException e) {
throw new RuntimeException(task + " computation provoked an IOException on file [" + baseName + "].", e);
} catch (InterruptedException e) {
throw new RuntimeException(task + " computation interrupted on file [" + baseName + "].", e);
}
}
public static void main(String[] args) throws Exception {
/* configure log info */
org.apache.log4j.BasicConfigurator.configure();
HTSEngineTest test = new HTSEngineTest();
// generate parameters out of a hsmm voice
// test.generateParameters();
// extract mfcc from a wav file using sptk
// test.getSptkMfcc();
// extract lf0 from a wav file using sptk and snack
// test.getSptkSnackLf0();
// Synthesis with external duration and f0
// it requires ContinuousFeatureProcessors in the TARGETFEATURES file
test.synthesisWithContinuousFeatureProcessors();
// Synthesis with external duration and f0
// it requires two external files: labels file .lab and logf0 file .lf0
// The duration indicated in the lab file must correspond to the number of frames in the .lf0 file
// The lf0 file must be generated frame syncronous.
// test.synthesisWithProsodySpecificationInExternalFiles();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy