marytts.modules.synthesis.HMMSynthesizer Maven / Gradle / Ivy
The newest version!
/* ----------------------------------------------------------------- */
/* The HMM-Based Speech Synthesis Engine "hts_engine API" */
/* developed by HTS Working Group */
/* http://hts-engine.sourceforge.net/ */
/* ----------------------------------------------------------------- */
/* */
/* Copyright (c) 2001-2010 Nagoya Institute of Technology */
/* Department of Computer Science */
/* */
/* 2001-2008 Tokyo Institute of Technology */
/* Interdisciplinary Graduate School of */
/* Science and Engineering */
/* */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials provided */
/* with the distribution. */
/* - Neither the name of the HTS working group nor the names of its */
/* contributors may be used to endorse or promote products derived */
/* from this software without specific prior written permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* ----------------------------------------------------------------- */
/**
* Copyright 2011 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.modules.synthesis;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.exceptions.SynthesisException;
import marytts.features.FeatureRegistry;
import marytts.features.TargetFeatureComputer;
import marytts.htsengine.HMMVoice;
import marytts.modules.HTSEngine;
import marytts.modules.MaryModule;
import marytts.modules.ModuleRegistry;
import marytts.modules.TargetFeatureLister;
import marytts.modules.synthesis.Voice.Gender;
import marytts.server.MaryProperties;
import marytts.unitselection.select.Target;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.NodeIterator;
import org.w3c.dom.traversal.TreeWalker;
/**
* HTS-HMM synthesiser.
*
* Java port and extension of HTS engine version 2.0 Extension: mixed excitation
*
* @author Marc Schröder, Marcela Charfuelan
*/
public class HMMSynthesizer implements WaveformSynthesizer {
private TargetFeatureLister targetFeatureLister;
private HTSEngine htsEngine;
private Logger logger;
// private TargetFeatureComputer comp;
public HMMSynthesizer() {
}
public void startup() throws Exception {
logger = MaryUtils.getLogger(this.toString());
// Try to get instances of our tools from Mary; if we cannot get them,
// instantiate new objects.
try {
targetFeatureLister = (TargetFeatureLister) ModuleRegistry.getModule(TargetFeatureLister.class);
} catch (NullPointerException npe) {
targetFeatureLister = null;
}
if (targetFeatureLister == null) {
logger.info("Starting my own TargetFeatureLister");
targetFeatureLister = new TargetFeatureLister();
targetFeatureLister.startup();
} else if (targetFeatureLister.getState() == MaryModule.MODULE_OFFLINE) {
targetFeatureLister.startup();
}
try {
htsEngine = (HTSEngine) ModuleRegistry.getModule(HTSEngine.class);
} catch (NullPointerException npe) {
htsEngine = null;
}
if (htsEngine == null) {
logger.info("Starting my own HTSEngine");
htsEngine = new HTSEngine();
htsEngine.startup();
} else if (htsEngine.getState() == MaryModule.MODULE_OFFLINE) {
htsEngine.startup();
}
// Register HMM voices:
List voiceNames = MaryProperties.getList("hmm.voices.list");
for (String voiceName : voiceNames) {
logger.debug("Voice '" + voiceName + "'");
/**
* When creating a HMMVoice object it should create and initialise a TreeSet ts, a ModelSet ms and load the context
* feature list used in this voice.
*/
HMMVoice v = new HMMVoice(voiceName, this);
Voice.registerVoice(v);
}
logger.info("started.");
}
/**
* Perform a power-on self test by processing some example input data.
*
* @throws Error
* if the module does not work properly.
*/
public synchronized void powerOnSelfTest() throws Error {
logger.info("Starting power-on self test.");
try {
Collection myVoices = Voice.getAvailableVoices(this);
if (myVoices.size() == 0) {
return;
}
Voice v = (Voice) myVoices.iterator().next();
MaryData in = new MaryData(MaryDataType.ACOUSTPARAMS, v.getLocale());
String exampleText = MaryDataType.ACOUSTPARAMS.exampleText(v.getLocale());
if (exampleText != null) {
in.readFrom(new StringReader(exampleText));
in.setDefaultVoice(v);
assert v instanceof HMMVoice : "Expected voice to be a HMMVoice, but it is a " + v.getClass().toString();
// -- Here it is set the targetFeatureComputer for this voice
String features = ((HMMVoice) v).getHMMData().getFeatureDefinition().getFeatureNames();
TargetFeatureComputer comp = FeatureRegistry.getTargetFeatureComputer(v, features);
in.setOutputParams(features);
Document doc = in.getDocument();
// First, get the list of segments and boundaries in the current document
TreeWalker tw = MaryDomUtils.createTreeWalker(doc, doc, MaryXML.PHONE, MaryXML.BOUNDARY);
List segmentsAndBoundaries = new ArrayList();
Element e;
while ((e = (Element) tw.nextNode()) != null) {
segmentsAndBoundaries.add(e);
}
List targetFeaturesList = targetFeatureLister.getListTargetFeatures(comp, segmentsAndBoundaries);
// The actual durations are already fixed in the htsEngine.process()
// here i pass segements and boundaries to update the realised acoustparams, dur and f0
MaryData audio = htsEngine.process(in, targetFeaturesList, segmentsAndBoundaries, null);
assert audio.getAudio() != null;
} else {
logger.debug("No example text -- no power-on self test!");
}
} catch (Throwable t) {
throw new Error("Module " + toString() + ": Power-on self test failed.", t);
}
logger.info("Power-on self test complete.");
}
public String toString() {
return "HMMSynthesizer";
}
/**
* {@inheritDoc}
*/
public AudioInputStream synthesize(List tokensAndBoundaries, Voice voice, String outputParams)
throws SynthesisException {
if (!voice.synthesizer().equals(this)) {
throw new IllegalArgumentException("Voice " + voice.getName() + " is not an HMM voice.");
}
logger.info("Synthesizing one sentence.");
// from tokens and boundaries, extract segments and boundaries:
List segmentsAndBoundaries = new ArrayList();
Document doc = null;
for (Element tOrB : tokensAndBoundaries) {
if (tOrB.getTagName().equals(MaryXML.BOUNDARY)) {
segmentsAndBoundaries.add(tOrB);
} else { // a token -- add all segments below it
if (doc == null) {
doc = tOrB.getOwnerDocument();
}
NodeIterator ni = MaryDomUtils.createNodeIterator(doc, tOrB, MaryXML.PHONE);
Element s;
while ((s = (Element) ni.nextNode()) != null) {
segmentsAndBoundaries.add(s);
}
}
}
try {
assert voice instanceof HMMVoice : "Expected voice to be a HMMVoice, but it is a " + voice.getClass().toString();
// -- This can be done just once when powerOnSelfTest() of this voice
// -- mmmmmm it did not work, it takes the comp from the default voice
// -- CHECK: do we need to do this for every call???
String features = ((HMMVoice) voice).getHMMData().getFeatureDefinition().getFeatureNames();
TargetFeatureComputer comp = FeatureRegistry.getTargetFeatureComputer(voice, features);
// it is not faster to pass directly a list of targets?
// --String targetFeatureString = targetFeatureLister.listTargetFeatures(comp, segmentsAndBoundaries);
MaryData d = new MaryData(targetFeatureLister.outputType(), voice.getLocale());
// --d.setPlainText(targetFeatureString);
d.setDefaultVoice(voice);
List targetFeaturesList = targetFeatureLister.getListTargetFeatures(comp, segmentsAndBoundaries);
// the actual durations are already fixed in the htsEngine.process()
// here i pass segements and boundaries to update the realised acoustparams, dur and f0
MaryData audio = htsEngine.process(d, targetFeaturesList, segmentsAndBoundaries, tokensAndBoundaries);
return audio.getAudio();
} catch (Exception e) {
throw new SynthesisException("HMM Synthesiser could not synthesise: ", e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy