marytts.unitselection.analysis.HnmVoiceDataDumper Maven / Gradle / Ivy
/**
* Copyright 2010 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.unitselection.analysis;
import java.io.IOException;
import javax.sound.sampled.AudioFormat;
import marytts.exceptions.MaryConfigurationException;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechFrame;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizedSignal;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizer;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizerParams;
import marytts.unitselection.data.HnmDatagram;
import marytts.unitselection.data.HnmTimelineReader;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.Datagram;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.MathUtils;
/**
* Convenience class to dump relevant data from a HNM unit selection voice to a Praat TextGrid and a wav file for inspection of
* timeline data in external tools (e.g. Praat, WaveSurfer, etc.)
*
* @author steiner
*
*/
public class HnmVoiceDataDumper extends VoiceDataDumper {
private AudioFormat audioformat;
public HnmVoiceDataDumper() {
super();
}
/**
* {@inheritDoc}
*
* Also set the audioFormat needed in {@link #getSamples(Datagram[])}
*/
@Override
protected HnmTimelineReader loadAudioTimeline(String fileName) throws IOException, MaryConfigurationException {
HnmTimelineReader audioTimeline = new HnmTimelineReader(fileName);
int sampleRate = audioTimeline.getSampleRate();
this.audioformat = new AudioFormat(AudioFormat.Encoding.PCM_SIGNED, // encoding
sampleRate, // samples per second
16, // bits per sample
1, // mono
2, // nr. of bytes per frame
sampleRate, // nr. of frames per second
true); // big-endian;
return audioTimeline;
}
/**
* {@inheritDoc}
*
* For {@link HnmDatagram}s, the samples must be resynthesized from the HntmSpeechFrame in each HnmDatagram. This requires
* quite a bit of processing.
*/
@Override
protected byte[] getSamples(Datagram[] datagrams) throws IOException {
// init required objects:
HntmSynthesizer hnmSynthesizer = new HntmSynthesizer();
HntmAnalyzerParams hnmAnalysisParams = new HntmAnalyzerParams();
HntmSynthesizerParams hnmSynthesisParams = new HntmSynthesizerParams();
// get duration from datagrams:
float originalDurationInSeconds = 0;
for (Datagram datagram : datagrams) {
HnmDatagram hnmDatagram = (HnmDatagram) datagram;
originalDurationInSeconds += hnmDatagram.getFrame().deltaAnalysisTimeInSeconds;
}
// generate HNM signal from frames, correcting the analysis times:
HntmSpeechSignal hnmSpeechSignal = new HntmSpeechSignal(datagrams.length, unitDB.getAudioTimeline().getSampleRate(),
originalDurationInSeconds);
float tAnalysisInSeconds = 0;
for (int i = 0; i < datagrams.length; i++) {
HntmSpeechFrame hnmSpeechFrame = ((HnmDatagram) datagrams[i]).getFrame();
// correct analysis time:
tAnalysisInSeconds += hnmSpeechFrame.deltaAnalysisTimeInSeconds;
hnmSpeechFrame.tAnalysisInSeconds = tAnalysisInSeconds;
hnmSpeechSignal.frames[i] = hnmSpeechFrame;
}
// synthesize signal
HntmSynthesizedSignal hnmSynthesizedSignal = hnmSynthesizer.synthesize(hnmSpeechSignal, null, null, null, null,
hnmAnalysisParams, hnmSynthesisParams);
// scale amplitude:
double[] output = MathUtils.multiply(hnmSynthesizedSignal.output, 1.0 / 32768.0);
// repack output into byte array:
BufferedDoubleDataSource buffer = new BufferedDoubleDataSource(output);
DDSAudioInputStream audio = new DDSAudioInputStream(buffer, audioformat);
byte[] samples = new byte[(int) audio.getFrameLength() * audioformat.getFrameSize()];
audio.read(samples);
return samples;
}
public static void main(String[] args) throws Exception {
new HnmVoiceDataDumper().dumpData(args[0]);
}
}