marytts.unitselection.UnitSelectionSynthesizer Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.unitselection;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.exceptions.SynthesisException;
import marytts.modules.synthesis.Voice;
import marytts.modules.synthesis.WaveformSynthesizer;
import marytts.modules.synthesis.Voice.Gender;
import marytts.server.MaryProperties;
import marytts.unitselection.concat.UnitConcatenator;
import marytts.unitselection.concat.BaseUnitConcatenator.UnitData;
import marytts.unitselection.data.Unit;
import marytts.unitselection.data.UnitDatabase;
import marytts.unitselection.select.HalfPhoneTarget;
import marytts.unitselection.select.SelectedUnit;
import marytts.unitselection.select.Target;
import marytts.unitselection.select.UnitSelector;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryNormalisedWriter;
import marytts.util.dom.NameNodeFilter;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.TreeWalker;
/**
* Builds and synthesizes unit selection voices
*
* @author Marc Schröder, Anna Hunecke
*
*/
public class UnitSelectionSynthesizer implements WaveformSynthesizer {
/**
* A map with Voice objects as keys, and Lists of UtteranceProcessors as values. Idea: For a given voice, find the list of
* utterance processors to apply.
*/
private Logger logger;
public UnitSelectionSynthesizer() {
}
/**
* Start up the waveform synthesizer. This must be called once before calling synthesize().
*
* @throws Exception
* Exception
*/
public void startup() throws Exception {
logger = MaryUtils.getLogger("UnitSelectionSynthesizer");
// Register UnitSelection voices:
logger.debug("Register UnitSelection voices:");
List voiceNames = MaryProperties.getList("unitselection.voices.list");
for (String voiceName : voiceNames) {
long time = System.currentTimeMillis();
Voice unitSelVoice = new UnitSelectionVoice(voiceName, this);
logger.debug("Voice '" + unitSelVoice + "'");
Voice.registerVoice(unitSelVoice);
long newtime = System.currentTimeMillis() - time;
logger.info("Loading of voice " + voiceName + " took " + newtime + " milliseconds");
}
logger.info("started.");
}
/**
* Perform a power-on self test by processing some example input data.
*
* @throws Error
* if the module does not work properly.
*/
public void powerOnSelfTest() throws Error {
try {
Collection myVoices = Voice.getAvailableVoices(this);
if (myVoices.size() == 0) {
return;
}
UnitSelectionVoice unitSelVoice = (UnitSelectionVoice) myVoices.iterator().next();
assert unitSelVoice != null;
MaryData in = new MaryData(MaryDataType.get("ACOUSTPARAMS"), unitSelVoice.getLocale());
if (!unitSelVoice.getDomain().equals("general")) {
logger.info("Cannot perform power-on self test using limited-domain voice '" + unitSelVoice.getName()
+ "' - skipping.");
return;
}
String exampleText = MaryDataType.ACOUSTPARAMS.exampleText(unitSelVoice.getLocale());
if (exampleText != null) {
in.readFrom(new StringReader(exampleText));
in.setDefaultVoice(unitSelVoice);
if (in == null) {
System.out.println(exampleText + " is null");
}
List tokensAndBoundaries = new ArrayList();
TreeWalker tw = ((DocumentTraversal) in.getDocument()).createTreeWalker(in.getDocument(),
NodeFilter.SHOW_ELEMENT, new NameNodeFilter(new String[] { MaryXML.TOKEN, MaryXML.BOUNDARY }), false);
Element el = null;
while ((el = (Element) tw.nextNode()) != null)
tokensAndBoundaries.add(el);
AudioInputStream ais = synthesize(tokensAndBoundaries, unitSelVoice, null);
assert ais != null;
} else {
logger.debug("No example text -- no power-on self test!");
}
} catch (Throwable t) {
t.printStackTrace();
throw new Error("Module " + toString() + ": Power-on self test failed.", t);
}
logger.info("Power-on self test complete.");
}
/**
* {@inheritDoc}
*
* @param tokensAndBoundaries
* tokensAndBoundaries
* @param voice
* voice
* @param outputParams
* outputParams
* @throws SynthesisException
* SynthesisException
* @return audio
*/
public AudioInputStream synthesize(List tokensAndBoundaries, Voice voice, String outputParams)
throws SynthesisException {
assert voice instanceof UnitSelectionVoice;
UnitSelectionVoice v = (UnitSelectionVoice) voice;
UnitDatabase udb = v.getDatabase();
// Select:
UnitSelector unitSel = v.getUnitSelector();
UnitConcatenator unitConcatenator;
if (outputParams != null && outputParams.contains("MODIFICATION")) {
unitConcatenator = v.getModificationConcatenator();
} else {
unitConcatenator = v.getConcatenator();
}
// TODO: check if we actually need to access v.getDatabase() here
UnitDatabase database = v.getDatabase();
logger.debug("Selecting units with a " + unitSel.getClass().getName() + " from a " + database.getClass().getName());
List selectedUnits = unitSel.selectUnits(tokensAndBoundaries, voice);
// if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
// StringWriter sw = new StringWriter();
// PrintWriter pw = new PrintWriter(sw);
// for (Iterator selIt=selectedUnits.iterator(); selIt.hasNext(); )
// pw.println(selIt.next());
// logger.debug("Units selected:\n"+sw.toString());
// }
// Concatenate:
logger.debug("Now creating audio with a " + unitConcatenator.getClass().getName());
AudioInputStream audio = null;
try {
audio = unitConcatenator.getAudio(selectedUnits);
} catch (IOException ioe) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
for (Iterator selIt = selectedUnits.iterator(); selIt.hasNext();)
pw.println(selIt.next());
throw new SynthesisException("Problems generating audio for unit chain: " + sw.toString(), ioe);
}
// Propagate unit durations to XML tree:
float endInSeconds = 0;
float durLeftHalfInSeconds = 0;
String unitString = "";
String unitAttrName = "units"; // name of the attribute that is added for unit selection diagnostics
for (SelectedUnit su : selectedUnits) {
Target t = su.getTarget();
boolean halfphone = (t instanceof HalfPhoneTarget);
Object concatenationData = su.getConcatenationData();
assert concatenationData instanceof UnitData;
UnitData unitData = (UnitData) concatenationData;
Unit unit = su.getUnit();
// For the unit durations, keep record in floats because of precision;
// convert to millis only at export time, and re-compute duration in millis
// from the end in millis, to avoid discrepancies due to rounding
int unitDurationInSamples = unitData.getUnitDuration();
float unitDurationInSeconds = unitDurationInSamples / (float) database.getUnitFileReader().getSampleRate();
int prevEndInMillis = (int) (1000 * endInSeconds);
endInSeconds += unitDurationInSeconds;
int endInMillis = (int) (1000 * endInSeconds);
int unitDurationInMillis = endInMillis - prevEndInMillis;
unitString = t.getName() + " " + udb.getFilename(unit) + " " + unit.index + " " + unitDurationInSeconds;
if (halfphone) {
if (((HalfPhoneTarget) t).isLeftHalf()) {
durLeftHalfInSeconds = unitDurationInSeconds;
} else { // right half
// re-compute unit duration from both halves
float totalUnitDurInSeconds = durLeftHalfInSeconds + unitDurationInSeconds;
float prevEndInSeconds = endInSeconds - totalUnitDurInSeconds;
prevEndInMillis = (int) (1000 * prevEndInSeconds);
unitDurationInMillis = endInMillis - prevEndInMillis;
durLeftHalfInSeconds = 0;
}
}
Element maryxmlElement = t.getMaryxmlElement();
if (maryxmlElement != null) {
if (maryxmlElement.getNodeName().equals(MaryXML.PHONE)) {
if (!maryxmlElement.hasAttribute("d") || !maryxmlElement.hasAttribute("end")) {
throw new IllegalStateException("No duration information in MaryXML -- check log file"
+ " for messages warning about unloadable acoustic models"
+ " instead of voice-specific acoustic feature predictors");
}
// int oldD = Integer.parseInt(maryxmlElement.getAttribute("d"));
// int oldEnd = Integer.parseInt(maryxmlElement.getAttribute("end"));
// double doubleEnd = Double.parseDouble(maryxmlElement.getAttribute("end"));
// int oldEnd = (int)(doubleEnd * 1000);
maryxmlElement.setAttribute("d", String.valueOf(unitDurationInMillis));
maryxmlElement.setAttribute("end", String.valueOf(endInSeconds));
// the following messes up all end values!
// if (oldEnd == oldD) {
// // start new end computation
// endInSeconds = unitDurationInSeconds;
// }
} else { // not a PHONE
assert maryxmlElement.getNodeName().equals(MaryXML.BOUNDARY);
maryxmlElement.setAttribute("duration", String.valueOf(unitDurationInMillis));
}
if (maryxmlElement.hasAttribute(unitAttrName)) {
String prevUnitString = maryxmlElement.getAttribute(unitAttrName);
maryxmlElement.setAttribute(unitAttrName, prevUnitString + "; " + unitString);
} else {
maryxmlElement.setAttribute(unitAttrName, unitString);
}
} else {
logger.debug("Unit " + su.getTarget().getName() + " of length " + unitDurationInMillis
+ " ms has no maryxml element.");
}
}
if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
try {
MaryNormalisedWriter writer = new MaryNormalisedWriter();
ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
writer.output(tokensAndBoundaries.get(0).getOwnerDocument(), debugOut);
logger.debug("Propagating the realised unit durations to the XML tree: \n" + debugOut.toString());
} catch (Exception e) {
logger.warn("Problem writing XML to logfile: " + e);
}
}
return audio;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy