marytts.unitselection.interpolation.InterpolatingSynthesizer Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.unitselection.interpolation;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Iterator;
import java.util.List;
import javax.sound.sampled.AudioInputStream;
import marytts.datatypes.MaryXML;
import marytts.exceptions.SynthesisException;
import marytts.modules.synthesis.Voice;
import marytts.modules.synthesis.WaveformSynthesizer;
import marytts.signalproc.process.FramewiseMerger;
import marytts.signalproc.process.LSFInterpolator;
import marytts.unitselection.UnitSelectionVoice;
import marytts.unitselection.concat.BaseUnitConcatenator;
import marytts.unitselection.concat.UnitConcatenator;
import marytts.unitselection.select.SelectedUnit;
import marytts.unitselection.select.UnitSelector;
import marytts.util.MaryUtils;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.Datagram;
import marytts.util.data.DoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.dom.MaryDomUtils;
import org.apache.log4j.Logger;
import org.w3c.dom.Element;
/**
* @author marc
*
*/
public class InterpolatingSynthesizer implements WaveformSynthesizer {
protected Logger logger;
/**
*
*/
public InterpolatingSynthesizer() {
}
/**
* Start up the waveform synthesizer. This must be called once before calling synthesize().
*
* @throws Exception
* Exception
*/
public void startup() throws Exception {
logger = MaryUtils.getLogger("InterpolatingSynthesizer");
// Register interpolating voice:
Voice.registerVoice(new InterpolatingVoice(this, "interpolatingvoice"));
logger.info("started.");
}
/**
* Perform a power-on self test by processing some example input data.
*
* @throws Error
* if the module does not work properly.
*/
public void powerOnSelfTest() throws Error {
}
/**
* {@inheritDoc}
*
* @param tokensAndBoundaries
* tokensAndBoundaries
* @param voice
* voice
* @param outputParams
* outputParams
* @throws SynthesisException
* SynthesisException
* @return outputAudio
*/
public AudioInputStream synthesize(List tokensAndBoundaries, Voice voice, String outputParams)
throws SynthesisException {
if (tokensAndBoundaries.size() == 0)
return null;
// 1. determine the two voices involved;
Element first = (Element) tokensAndBoundaries.get(0);
Element voiceElement = (Element) MaryDomUtils.getAncestor(first, MaryXML.VOICE);
String name = voiceElement.getAttribute("name");
// name has the form: voice1 with XY% voice2
// We trust that InerpolatingVoice.hasName() has done its job to verify that
// name actually has that form.
String[] parts = name.split("\\s+");
assert parts.length == 4;
assert parts[1].equals("with");
assert parts[2].endsWith("%");
int percent = Integer.parseInt(parts[2].substring(0, parts[2].length() - 1));
Voice voice1 = Voice.getVoice(parts[0]);
assert voice1 != null;
Voice voice2 = Voice.getVoice(parts[3]);
assert voice2 != null;
// 2. do unit selection with each;
if (!(voice1 instanceof UnitSelectionVoice)) {
throw new IllegalArgumentException("Voices of type " + voice.getClass().getName() + " not supported!");
}
if (!(voice2 instanceof UnitSelectionVoice)) {
throw new IllegalArgumentException("Voices of type " + voice.getClass().getName() + " not supported!");
}
UnitSelectionVoice usv1 = (UnitSelectionVoice) voice1;
UnitSelectionVoice usv2 = (UnitSelectionVoice) voice2;
UnitSelector unitSel1 = usv1.getUnitSelector();
List selectedUnits1 = unitSel1.selectUnits(tokensAndBoundaries, voice);
UnitSelector unitSel2 = usv2.getUnitSelector();
List selectedUnits2 = unitSel2.selectUnits(tokensAndBoundaries, voice);
assert selectedUnits1.size() == selectedUnits2.size() : "Unexpected difference in number of units: "
+ selectedUnits1.size() + " vs. " + selectedUnits2.size();
int numUnits = selectedUnits1.size();
// 3. do unit concatenation with each, retrieve actual unit durations from list of units;
UnitConcatenator unitConcatenator1 = usv1.getConcatenator();
AudioInputStream audio1;
try {
audio1 = unitConcatenator1.getAudio(selectedUnits1);
} catch (IOException ioe) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
for (Iterator selIt = selectedUnits1.iterator(); selIt.hasNext();)
pw.println(selIt.next());
throw new SynthesisException("For voice " + voice1.getName() + ", problems generating audio for unit chain: "
+ sw.toString(), ioe);
}
DoubleDataSource audioSource1 = new AudioDoubleDataSource(audio1);
UnitConcatenator unitConcatenator2 = usv2.getConcatenator();
AudioInputStream audio2;
try {
audio2 = unitConcatenator2.getAudio(selectedUnits2);
} catch (IOException ioe) {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
for (Iterator selIt = selectedUnits2.iterator(); selIt.hasNext();)
pw.println(selIt.next());
throw new SynthesisException("For voice " + voice2.getName() + ", problems generating audio for unit chain: "
+ sw.toString(), ioe);
}
DoubleDataSource audioSource2 = new AudioDoubleDataSource(audio2);
// Retrieve actual durations from list of units:
int sampleRate1 = (int) usv1.dbAudioFormat().getSampleRate();
double[] label1 = new double[numUnits];
double t1 = 0;
int sampleRate2 = (int) usv2.dbAudioFormat().getSampleRate();
double[] label2 = new double[numUnits];
double t2 = 0;
for (int i = 0; i < numUnits; i++) {
SelectedUnit u1 = selectedUnits1.get(i);
SelectedUnit u2 = selectedUnits2.get(i);
BaseUnitConcatenator.UnitData ud1 = (BaseUnitConcatenator.UnitData) u1.getConcatenationData();
int unitDuration1 = ud1.getUnitDuration();
if (unitDuration1 < 0) { // was not set by the unit concatenator, have to count ourselves
unitDuration1 = 0;
Datagram[] d = ud1.getFrames();
for (int id = 0; id < d.length; id++) {
unitDuration1 += d[id].getDuration();
}
}
t1 += unitDuration1 / (float) sampleRate1;
label1[i] = t1;
BaseUnitConcatenator.UnitData ud2 = (BaseUnitConcatenator.UnitData) u2.getConcatenationData();
int unitDuration2 = ud2.getUnitDuration();
if (unitDuration2 < 0) { // was not set by the unit concatenator, have to count ourselves
unitDuration2 = 0;
Datagram[] d = ud2.getFrames();
for (int id = 0; id < d.length; id++) {
unitDuration2 += d[id].getDuration();
}
}
t2 += unitDuration2 / (float) sampleRate2;
label2[i] = t2;
logger.debug(usv1.getName() + " [" + u1.getTarget() + "] " + label1[i] + " -- " + usv2.getName() + " ["
+ u2.getTarget() + "] " + label2[i]);
}
// 4. with these unit durations, run the LSFInterpolator on the two audio streams.
int frameLength = Integer.getInteger("signalproc.lpcanalysisresynthesis.framelength", 512).intValue();
int predictionOrder = Integer.getInteger("signalproc.lpcanalysisresynthesis.predictionorder", 20).intValue();
double r = (double) percent / 100;
assert r >= 0;
assert r <= 1;
FramewiseMerger foas = new FramewiseMerger(audioSource1, frameLength, sampleRate1, new BufferedDoubleDataSource(label1),
audioSource2, sampleRate2, new BufferedDoubleDataSource(label2), new LSFInterpolator(predictionOrder, r));
DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(foas), audio1.getFormat());
return outputAudio;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy