marytts.vocalizations.VocalizationSynthesizer Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2000-2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.vocalizations;
import java.io.IOException;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import marytts.datatypes.MaryXML;
import marytts.exceptions.MaryConfigurationException;
import marytts.exceptions.SynthesisException;
import marytts.features.FeatureDefinition;
import marytts.modules.synthesis.Voice;
import marytts.server.MaryProperties;
import marytts.unitselection.data.Unit;
import marytts.util.MaryUtils;
import org.apache.log4j.Logger;
import org.w3c.dom.Element;
/**
* The vocalization synthesis module.
*
* @author Sathish Pammi
*/
public class VocalizationSynthesizer {
protected VocalizationSynthesisTechnology vSynthesizer;
protected VocalizationSelector vSelector;
protected VocalizationUnitFileReader unitFileReader;
protected boolean f0ContourImposeSupport;
protected Logger logger = MaryUtils.getLogger("Vocalization Synthesizer");
public VocalizationSynthesizer(Voice voice) throws MaryConfigurationException {
if (!voice.hasVocalizationSupport()) {
throw new MaryConfigurationException("This voice " + voice.toString() + " doesn't support synthesis of vocalizations");
}
String unitFileName = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.unitfile");
try {
this.unitFileReader = new VocalizationUnitFileReader(unitFileName);
} catch (IOException e) {
throw new MaryConfigurationException("can't read unit file");
}
String intonationFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.intonationfile");
String technology = MaryProperties.getProperty("voice." + voice.getName() + ".vocalization.synthesisTechnology",
"fdpsola");
f0ContourImposeSupport = MaryProperties.getBoolean("voice." + voice.getName() + ".f0ContourImposeSupport", false);
if ("fdpsola".equals(technology)) {
String timelineFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.timeline");
vSynthesizer = new FDPSOLASynthesisTechnology(timelineFile, unitFileName, intonationFile, f0ContourImposeSupport);
} else if ("mlsa".equals(technology)) {
boolean imposePolynomialContour = MaryProperties.getBoolean("voice." + voice.getName()
+ ".vocalization.imposePolynomialContour", true);
String mlsaFeatureFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.mlsafeaturefile");
String mixedExcitationFilter = MaryProperties.getFilename("voice." + voice.getName()
+ ".vocalization.mixedexcitationfilter");
vSynthesizer = new MLSASynthesisTechnology(mlsaFeatureFile, intonationFile, mixedExcitationFilter,
imposePolynomialContour);
} else if ("hnm".equals(technology)) {
String timelineFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.timeline");
String hnmFeatureFile = MaryProperties.getFilename("voice." + voice.getName() + ".vocalization.hnmfeaturefile");
vSynthesizer = new HNMSynthesisTechnology(timelineFile, unitFileName, hnmFeatureFile, intonationFile,
f0ContourImposeSupport);
} else {
throw new MaryConfigurationException("the property 'voice." + voice.getName()
+ ".vocalization.synthesisTechnology' should be one among 'hnm', 'mlsa' and 'fdpsola'");
}
this.vSelector = new VocalizationSelector(voice);
}
/**
* Handle a request for synthesis of vocalization
*
* @param voice
* the selected voice
* @param aft
* AudioFileFormat of the output AudioInputStream
* @param domElement
* target xml element ('vocalization' element)
* @return AudioInputStream of requested vocalization it returns null if the voice doesn't support synthesis of vocalizations
* @throws Exception
* if domElement contains 'variant' attribute value is greater than available number of vocalizations
*/
public AudioInputStream synthesize(Voice voice, AudioFileFormat aft, Element domElement) throws Exception {
if (!voice.hasVocalizationSupport())
return null;
if (domElement.hasAttribute("variant")) {
return synthesizeVariant(aft, domElement);
}
if (f0ContourImposeSupport) {
return synthesizeImposedIntonation(aft, domElement);
}
return synthesizeVocalization(aft, domElement);
}
/**
* Synthesize a "variant" vocalization
*
* @param aft
* AudioFileFormat of the output AudioInputStream
* @param domElement
* target 'vocalization' xml element
* @return AudioInputStream of requested vocalization
* @throws SynthesisException
* if it can't synthesize vocalization
* @throws IllegalArgumentException
* if domElement contains 'variant' attribute value is greater than available number of vocalizations
*/
private AudioInputStream synthesizeVariant(AudioFileFormat aft, Element domElement) throws SynthesisException {
int numberOfBackChannels = unitFileReader.getNumberOfUnits();
int backchannelNumber = 0;
if (domElement.hasAttribute("variant")) {
backchannelNumber = Integer.parseInt(domElement.getAttribute("variant"));
}
if (backchannelNumber >= numberOfBackChannels) {
throw new IllegalArgumentException("This voice has " + numberOfBackChannels
+ " backchannels only. so it doesn't support unit number " + backchannelNumber);
}
return synthesizeSelectedVocalization(backchannelNumber, aft, domElement);
}
/**
* Synthesize a vocalization which fits better for given target
*
* @param aft
* AudioFileFormat of the output AudioInputStream
* @param domElement
* target 'vocalization' xml element
* @return AudioInputStream output audio
* @throws SynthesisException
* if it can't synthesize vocalization
*/
private AudioInputStream synthesizeVocalization(AudioFileFormat aft, Element domElement) throws SynthesisException {
int numberOfBackChannels = unitFileReader.getNumberOfUnits();
int backchannelNumber = vSelector.getBestMatchingCandidate(domElement);
// here it is a bug, if getBestMatchingCandidate select a backchannelNumber greater than numberOfBackChannels
assert backchannelNumber < numberOfBackChannels : "This voice has " + numberOfBackChannels
+ " backchannels only. so it doesn't support unit number " + backchannelNumber;
return synthesizeSelectedVocalization(backchannelNumber, aft, domElement);
}
/**
* Synthesize a vocalization which fits better for given target, in addition, impose intonation from closest best vocalization
* according to given feature definition for intonation selection
*
* @param aft
* AudioFileFormat of the output AudioInputStream
* @param domElement
* target 'vocalization' xml element
* @return AudioInputStream output audio
* @throws SynthesisException
* if it can't synthesize vocalization
*/
private AudioInputStream synthesizeImposedIntonation(AudioFileFormat aft, Element domElement) throws SynthesisException {
SourceTargetPair imposeF0Data = vSelector.getBestCandidatePairtoImposeF0(domElement);
int targetIndex = imposeF0Data.getTargetUnitIndex();
int sourceIndex = imposeF0Data.getSourceUnitIndex();
logger.debug("Synthesizing candidate " + sourceIndex + " with intonation contour " + targetIndex);
if (targetIndex == sourceIndex) {
return synthesizeSelectedVocalization(sourceIndex, aft, domElement);
}
return imposeF0ContourOnVocalization(sourceIndex, targetIndex, aft, domElement);
}
/**
* Impose a target f0 contour onto a (source) unit
*
* @param sourceIndex
* unit index of segmentalform unit
* @param targetIndex
* unit index of target f0 contour
* @param aft
* AudioFileFormat of the output AudioInputStream
* @param domElement
* target 'vocalization' xml element
* @return AudioInputStream of requested vocalization
* @throws SynthesisException
* if no data can be read at the given target time or if audio processing fails
*/
private AudioInputStream imposeF0ContourOnVocalization(int sourceIndex, int targetIndex, AudioFileFormat aft,
Element domElement) throws SynthesisException {
int numberOfBackChannels = unitFileReader.getNumberOfUnits();
if (targetIndex >= numberOfBackChannels) {
throw new IllegalArgumentException("This voice has " + numberOfBackChannels
+ " backchannels only. so it doesn't support unit number " + targetIndex);
}
if (sourceIndex >= numberOfBackChannels) {
throw new IllegalArgumentException("This voice has " + numberOfBackChannels
+ " backchannels only. so it doesn't support unit number " + sourceIndex);
}
VocalizationUnit bUnit = unitFileReader.getUnit(sourceIndex);
Unit[] units = bUnit.getUnits();
String[] unitNames = bUnit.getUnitNames();
long endTime = 0l;
for (int i = 0; i < units.length; i++) {
int unitDuration = units[i].duration * 1000 / unitFileReader.getSampleRate();
endTime += unitDuration;
Element element = MaryXML.createElement(domElement.getOwnerDocument(), MaryXML.PHONE);
element.setAttribute("d", Integer.toString(unitDuration));
element.setAttribute("end", Long.toString(endTime));
element.setAttribute("p", unitNames[i]);
domElement.appendChild(element);
}
return this.vSynthesizer.synthesizeUsingImposedF0(sourceIndex, targetIndex, aft);
}
/**
* Synthesize a selected vocalization
*
* @param backchannelNumber
* unit index number
* @param aft
* AudioFileFormat of the output AudioInputStream
* @param domElement
* target 'vocalization' xml element
* @return AudioInputStream output audio
* @throws SynthesisException
* if it can't synthesize vocalization
* @throws IllegalArgumentException
* if given backchannelNumber > no. of available vocalizations
*/
private AudioInputStream synthesizeSelectedVocalization(int backchannelNumber, AudioFileFormat aft, Element domElement)
throws SynthesisException {
int numberOfBackChannels = unitFileReader.getNumberOfUnits();
if (backchannelNumber >= numberOfBackChannels) {
throw new IllegalArgumentException("This voice has " + numberOfBackChannels
+ " backchannels only. so it doesn't support unit number " + backchannelNumber);
}
VocalizationUnit bUnit = unitFileReader.getUnit(backchannelNumber);
Unit[] units = bUnit.getUnits();
String[] unitNames = bUnit.getUnitNames();
long endTime = 0l;
for (int i = 0; i < units.length; i++) {
int unitDuration = units[i].duration * 1000 / unitFileReader.getSampleRate();
endTime += unitDuration;
Element element = MaryXML.createElement(domElement.getOwnerDocument(), MaryXML.PHONE);
element.setAttribute("d", Integer.toString(unitDuration));
element.setAttribute("end", Long.toString(endTime));
element.setAttribute("p", unitNames[i]);
domElement.appendChild(element);
}
return this.vSynthesizer.synthesize(backchannelNumber, aft);
}
/**
* List the possible vocalization names that are available for the given voice. These values can be used in the "name"
* attribute of the vocalization tag.
*
* @return an array of Strings, each string containing one unique vocalization name.
*/
public String[] listAvailableVocalizations() {
FeatureDefinition featureDefinition = vSelector.getFeatureDefinition();
assert featureDefinition.hasFeature("name");
int nameIndex = featureDefinition.getFeatureIndex("name");
return featureDefinition.getPossibleValues(nameIndex);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy