marytts.tools.voiceimport.EndpointDetector Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2000-2009 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.tools.voiceimport;
import java.io.File;
import java.io.IOException;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.util.data.audio.AudioConverterUtils;
import marytts.util.io.BasenameList;
import marytts.util.math.MathUtils;
/**
* Identify and Remove End-ponints (intitial and final silences) from given set of wave files.
*
* @author Sathish and Oytun
*
*/
public class EndpointDetector extends VoiceImportComponent {
protected File textDir;
protected File inputWavDir;
protected File outputWavDir;
protected String waveExt = ".wav";
private BasenameList bnlist;
protected DatabaseLayout db = null;
protected int percent = 0;
public String INPUTWAVDIR = "EndpointDetector.inputWaveDirectory";
public String OUTPUTWAVDIR = "EndpointDetector.outputWaveDirectory";
public String ENERGYBUFFERLENGTH = "EndpointDetector.energyBufferLength";
public String SPEECHSTARTLIKELIHOOD = "EndpointDetector.speechStartLikelihood";
public String SPEECHENDLIKELIHOOD = "EndpointDetector.speechEndLikelihood";
public String SHIFTFROMMINIMUMENERGYCENTER = "EndpointDetector.shiftFromMinimumEnergyCenter";
public String NUMENERGYCLUSTERS = "EndpointDetector.numEnergyClusters";
public String MINIMUMSTARTSILENCEINSECONDS = "EndpointDetector.minimumStartSilenceInSeconds";
public String MINIMUMENDSILENCEINSECONDS = "EndpointDetector.minimumEndSilenceInSeconds";
public String getName() {
return "EndpointDetector";
}
public SortedMap getDefaultProps(DatabaseLayout theDb) {
this.db = theDb;
if (props == null) {
props = new TreeMap();
props.put(INPUTWAVDIR, db.getProp(db.ROOTDIR) + "inputwav" + System.getProperty("file.separator"));
props.put(OUTPUTWAVDIR, db.getProp(db.ROOTDIR) + "outputwav" + System.getProperty("file.separator"));
props.put(ENERGYBUFFERLENGTH, "20");
props.put(SPEECHSTARTLIKELIHOOD, "0.1");
props.put(SPEECHENDLIKELIHOOD, "0.1");
props.put(SHIFTFROMMINIMUMENERGYCENTER, "0.0");
props.put(NUMENERGYCLUSTERS, "4");
props.put(MINIMUMSTARTSILENCEINSECONDS, "1.0");
props.put(MINIMUMENDSILENCEINSECONDS, "1.0");
}
return props;
}
protected void setupHelp() {
props2Help = new TreeMap();
props2Help.put(INPUTWAVDIR, "input wave files directory.");
props2Help.put(OUTPUTWAVDIR, "output directory to store initial-end silences removed wave files."
+ "Will be created if it does not exist");
props2Help.put(ENERGYBUFFERLENGTH, "number of consecutive speech frames when searching for speech/silence start events"
+ "Range [1, 1000], decrease to detect more events");
props2Help.put(SPEECHSTARTLIKELIHOOD, "likelihood of speech starting event"
+ "Range [0.0,1.0], decrease to get more silence before speech segments");
props2Help.put(SPEECHENDLIKELIHOOD, "likelihood of speech ending event"
+ "Range [0.0,1.0], decrease to get more silence after speech segments");
props2Help.put(SHIFTFROMMINIMUMENERGYCENTER,
"multiplied by lowest energy cluster mean to generate speech/silence energy threshold"
+ "Range [0.0,5.0], decrease to get more silence in speech segments");
props2Help.put(NUMENERGYCLUSTERS, "number of energy clusters"
+ "Range [1,20], decrease to get more silence in speech segments");
props2Help.put(MINIMUMSTARTSILENCEINSECONDS, "minimum silence in the beginning of the output files in seconds"
+ "Range [0.0,30.0], increase to get more silence in the beginning");
props2Help.put(MINIMUMENDSILENCEINSECONDS, "minimum silence at the end of the output files in seconds"
+ "Range [0.0,30.0], increase to get more silence at the end");
}
public boolean compute() throws IOException, UnsupportedAudioFileException {
// Check existance of input directory
inputWavDir = new File(getProp(INPUTWAVDIR));
if (!inputWavDir.exists()) {
throw new Error("Could not find input Directory: " + getProp(INPUTWAVDIR));
}
// Check existance of output directory
// if not exists, create a new directory
outputWavDir = new File(getProp(OUTPUTWAVDIR));
if (!outputWavDir.exists()) {
System.out.print(OUTPUTWAVDIR + " " + getProp(OUTPUTWAVDIR) + " does not exist; ");
if (!outputWavDir.mkdir()) {
throw new Error("Could not create OUTPUTWAVDIR");
}
System.out.print("Created successfully.\n");
}
// Automatically collect all ".wav" files from given directory
bnlist = new BasenameList(inputWavDir + File.separator, waveExt);
int energyBufferLength = Integer.valueOf(getProp(ENERGYBUFFERLENGTH));
energyBufferLength = MathUtils.CheckLimits(energyBufferLength, 1, 1000);
double speechStartLikelihood = Double.valueOf(getProp(SPEECHSTARTLIKELIHOOD));
speechStartLikelihood = MathUtils.CheckLimits(speechStartLikelihood, 0.0, 1.0);
double speechEndLikelihood = Double.valueOf(getProp(SPEECHENDLIKELIHOOD));
speechEndLikelihood = MathUtils.CheckLimits(speechEndLikelihood, 0.0, 1.0);
double shiftFromMinimumEnergyCenter = Double.valueOf(getProp(SHIFTFROMMINIMUMENERGYCENTER));
shiftFromMinimumEnergyCenter = MathUtils.CheckLimits(shiftFromMinimumEnergyCenter, 0.0, 5.0);
int numClusters = Integer.valueOf(getProp(NUMENERGYCLUSTERS));
numClusters = MathUtils.CheckLimits(numClusters, 1, 20);
double minimumStartSilenceInSeconds = Double.valueOf(getProp(MINIMUMSTARTSILENCEINSECONDS));
minimumStartSilenceInSeconds = MathUtils.CheckLimits(minimumStartSilenceInSeconds, 0.0, 30.0);
double minimumEndSilenceInSeconds = Double.valueOf(getProp(MINIMUMENDSILENCEINSECONDS));
minimumEndSilenceInSeconds = MathUtils.CheckLimits(minimumEndSilenceInSeconds, 0.0, 30.0);
//
System.out.println("Removing endpoints for " + bnlist.getLength() + " wave files");
for (int i = 0; i < bnlist.getLength(); i++) {
percent = 100 * i / bnlist.getLength();
String inputFile = inputWavDir + File.separator + bnlist.getName(i) + waveExt;
String outputFile = outputWavDir + File.separator + bnlist.getName(i) + waveExt;
AudioConverterUtils.removeEndpoints(inputFile, outputFile, energyBufferLength, speechStartLikelihood,
speechEndLikelihood, shiftFromMinimumEnergyCenter, numClusters, minimumStartSilenceInSeconds,
minimumEndSilenceInSeconds);
System.out.println(" " + bnlist.getName(i));
}
System.out.println("...Done.");
return true;
}
/**
* Provide the progress of computation, in percent, or -1 if that feature is not implemented.
*
* @return -1 if not implemented, or an integer between 0 and 100.
*/
public int getProgress() {
return percent;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy