marytts.unitselection.data.HnmTimelineReader Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.unitselection.data;
import java.io.ByteArrayInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.LinkedList;
import java.util.Properties;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.exceptions.MaryConfigurationException;
import marytts.signalproc.adaptation.prosody.BasicProsodyModifierParams;
import marytts.signalproc.sinusoidal.hntm.analysis.FrameNoisePartWaveform;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechFrame;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizedSignal;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizer;
import marytts.signalproc.sinusoidal.hntm.synthesis.HntmSynthesizerParams;
import marytts.util.data.Datagram;
import marytts.util.io.FileUtils;
import marytts.util.math.ArrayUtils;
import marytts.util.math.MathUtils;
/**
* A reader class for the harmonics plus noise timeline file.
*
* @author Oytun Türk
*
*/
public class HnmTimelineReader extends TimelineReader {
public HntmAnalyzerParams analysisParams;
public HnmTimelineReader(String fileName) throws IOException, MaryConfigurationException {
super();
load(fileName);
}
protected void load(String fileName) throws IOException, MaryConfigurationException {
super.load(fileName);
// Now make sense of the processing header
Properties props = new Properties();
ByteArrayInputStream bais = new ByteArrayInputStream(procHdr.getString().getBytes("latin1"));
props.load(bais);
ensurePresent(props, "hnm.noiseModel");
analysisParams = new HntmAnalyzerParams();
analysisParams.noiseModel = Integer.parseInt(props.getProperty("hnm.noiseModel"));
analysisParams.hnmPitchVoicingAnalyzerParams.numFilteringStages = Integer
.parseInt(props.getProperty("hnm.numFiltStages"));
analysisParams.hnmPitchVoicingAnalyzerParams.medianFilterLength = Integer
.parseInt(props.getProperty("hnm.medianFiltLen"));
analysisParams.hnmPitchVoicingAnalyzerParams.movingAverageFilterLength = Integer.parseInt(props
.getProperty("hnm.maFiltLen"));
analysisParams.hnmPitchVoicingAnalyzerParams.cumulativeAmpThreshold = Float.parseFloat(props.getProperty("hnm.cumAmpTh"));
analysisParams.hnmPitchVoicingAnalyzerParams.maximumAmpThresholdInDB = Float
.parseFloat(props.getProperty("hnm.maxAmpTh"));
analysisParams.hnmPitchVoicingAnalyzerParams.harmonicDeviationPercent = Float.parseFloat(props
.getProperty("hnm.harmDevPercent"));
analysisParams.hnmPitchVoicingAnalyzerParams.sharpPeakAmpDiffInDB = Float.parseFloat(props
.getProperty("hnm.sharpPeakAmpDiff"));
analysisParams.hnmPitchVoicingAnalyzerParams.minimumTotalHarmonics = Integer.parseInt(props
.getProperty("hnm.minHarmonics"));
analysisParams.hnmPitchVoicingAnalyzerParams.maximumTotalHarmonics = Integer.parseInt(props
.getProperty("hnm.maxHarmonics"));
analysisParams.hnmPitchVoicingAnalyzerParams.minimumVoicedFrequencyOfVoicing = Float.parseFloat(props
.getProperty("hnm.minVoicedFreq"));
analysisParams.hnmPitchVoicingAnalyzerParams.maximumVoicedFrequencyOfVoicing = Float.parseFloat(props
.getProperty("hnm.maxVoicedFreq"));
analysisParams.hnmPitchVoicingAnalyzerParams.maximumFrequencyOfVoicingFinalShift = Float.parseFloat(props
.getProperty("hnm.maxFreqVoicingFinalShift"));
analysisParams.hnmPitchVoicingAnalyzerParams.neighsPercent = Float.parseFloat(props.getProperty("hnm.neighsPercent"));
analysisParams.harmonicPartCepstrumOrder = Integer.parseInt(props.getProperty("hnm.harmCepsOrder"));
analysisParams.regularizedCepstrumWarpingMethod = Integer.parseInt(props.getProperty("hnm.regCepWarpMethod"));
analysisParams.regularizedCepstrumLambdaHarmonic = Float.parseFloat(props.getProperty("hnm.regCepsLambda"));
analysisParams.noisePartLpOrder = Integer.parseInt(props.getProperty("hnm.noiseLpOrder"));
analysisParams.preemphasisCoefNoise = Float.parseFloat(props.getProperty("hnm.preCoefNoise"));
analysisParams.hpfBeforeNoiseAnalysis = Boolean.parseBoolean(props.getProperty("hnm.hpfBeforeNoiseAnalysis"));
analysisParams.numPeriodsHarmonicsExtraction = Float.parseFloat(props.getProperty("hnm.harmNumPer"));
}
private void ensurePresent(Properties props, String key) throws MaryConfigurationException {
if (!props.containsKey(key))
throw new MaryConfigurationException("Processing header does not contain required field '" + key + "'");
}
/**
* {@inheritDoc}
*
* @param bb
* bb
* @return d
*/
@Override
protected Datagram getNextDatagram(ByteBuffer bb) {
Datagram d = null;
/* If the end of the datagram zone is reached, refuse to read */
if (bb.position() == bb.limit()) {
// throw new IndexOutOfBoundsException( "Time out of bounds: you are trying to read a datagram at" +
// " a time which is bigger than the total timeline duration." );
return null;
}
/* Else, pop the datagram out of the file */
try {
d = new HnmDatagram(bb, analysisParams.noiseModel);
}
/* Detect a possible EOF encounter */
catch (IOException e) {
return null;
}
return d;
}
private void testSynthesizeFromDatagrams(LinkedList datagrams, int startIndex, int endIndex,
DataOutputStream output) throws IOException {
HntmSynthesizer s = new HntmSynthesizer();
// TODO: These should come from timeline and user choices...
HntmAnalyzerParams hnmAnalysisParams = new HntmAnalyzerParams();
HntmSynthesizerParams synthesisParams = new HntmSynthesizerParams();
BasicProsodyModifierParams pmodParams = new BasicProsodyModifierParams();
int samplingRateInHz = this.getSampleRate();
int totalFrm = 0;
int i;
float originalDurationInSeconds = 0.0f;
float deltaTimeInSeconds;
for (i = startIndex; i <= endIndex; i++) {
HnmDatagram datagram;
try {
datagram = datagrams.get(i);
} catch (IndexOutOfBoundsException e) {
throw e;
}
if (datagram != null && datagram instanceof HnmDatagram) {
totalFrm++;
// deltaTimeInSeconds = SignalProcUtils.sample2time(((HnmDatagram)datagrams.get(i)).getDuration(),
// samplingRateInHz);
deltaTimeInSeconds = datagram.frame.deltaAnalysisTimeInSeconds;
originalDurationInSeconds += deltaTimeInSeconds;
}
}
HntmSpeechSignal hnmSignal = new HntmSpeechSignal(totalFrm, samplingRateInHz, originalDurationInSeconds);
int frameCount = 0;
float tAnalysisInSeconds = 0.0f;
for (i = startIndex; i <= endIndex; i++) {
HnmDatagram datagram;
try {
datagram = datagrams.get(i);
} catch (IndexOutOfBoundsException e) {
throw e;
}
if (datagram != null && datagram instanceof HnmDatagram) {
// tAnalysisInSeconds += SignalProcUtils.sample2time(((HnmDatagram)datagrams.get(i)).getDuration(),
// samplingRateInHz);
tAnalysisInSeconds += datagram.getFrame().deltaAnalysisTimeInSeconds;
if (frameCount < totalFrm) {
hnmSignal.frames[frameCount] = new HntmSpeechFrame(datagram.getFrame());
hnmSignal.frames[frameCount].tAnalysisInSeconds = tAnalysisInSeconds;
frameCount++;
}
}
}
HntmSynthesizedSignal ss = null;
if (totalFrm > 0) {
ss = s.synthesize(hnmSignal, null, null, pmodParams, null, hnmAnalysisParams, synthesisParams);
FileUtils.writeBinaryFile(ArrayUtils.copyDouble2Short(ss.output), output);
if (ss.output != null) {
ss.output = MathUtils.multiply(ss.output, 1.0 / 32768.0); // why is this done here?
}
}
return;
}
/**
* Dump audio from HNM timeline to a series big-endian raw audio files in chunks of Datagrams (clusterSize). Run this
* with
*
*
* -ea - Xmx2gb
*
*
* @param args
*
* - path to timeline_hnm.mry file
* - path to dump output files
*
* @throws UnsupportedAudioFileException
* UnsupportedAudioFileException
* @throws IOException
* IOException
* @throws MaryConfigurationException
* MaryConfigurationException
*/
public static void main(String[] args) throws UnsupportedAudioFileException, IOException, MaryConfigurationException {
HnmTimelineReader h = new HnmTimelineReader(args[0]);
LinkedList datagrams = new LinkedList();
int count = 0;
long startDatagramTime = 0;
int numDatagrams = (int) h.numDatagrams;
// long numDatagrams = 2000;
Datagram[] rawDatagrams = h.getDatagrams(0l, numDatagrams, h.getSampleRate(), null);
for (int i = 0; i < rawDatagrams.length; i++) {
HnmDatagram d = (HnmDatagram) rawDatagrams[i];
datagrams.add(d);
count++;
System.out.println("Datagram " + String.valueOf(count) + "Noise waveform size="
+ ((FrameNoisePartWaveform) (((HnmDatagram) d).frame.n)).waveform().length);
}
int clusterSize = 1000;
int numClusters = (int) Math.floor((numDatagrams) / ((double) clusterSize) + 0.5);
int startIndex, endIndex;
for (int i = 0; i < numClusters; i++) {
DataOutputStream output = new DataOutputStream(new FileOutputStream(
new File(String.format("%s_%06d.bin", args[1], i))));
startIndex = (int) (i * clusterSize);
endIndex = (int) Math.min((i + 1) * clusterSize - 1, numDatagrams - 1);
h.testSynthesizeFromDatagrams(datagrams, startIndex, endIndex, output);
System.out.println("Timeline cluster " + String.valueOf(i + 1) + " of " + String.valueOf(numClusters)
+ " synthesized...");
output.close();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy