marytts.tools.voiceimport.vocalizations.VocalizationF0PolyFeatureFileWriter Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.tools.voiceimport.vocalizations;
import java.awt.Color;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.sound.sampled.AudioFormat;
import javax.swing.JFrame;
import marytts.exceptions.MaryConfigurationException;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureVector;
import marytts.signalproc.analysis.F0TrackerAutocorrelationHeuristic;
import marytts.signalproc.analysis.PitchFileHeader;
import marytts.signalproc.display.FunctionGraph;
import marytts.tools.voiceimport.DatabaseLayout;
import marytts.tools.voiceimport.VoiceImportComponent;
import marytts.unitselection.data.FeatureFileReader;
import marytts.unitselection.data.TimelineReader;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.Datagram;
import marytts.util.data.DatagramDoubleDataSource;
import marytts.util.data.MaryHeader;
import marytts.util.data.audio.AudioPlayer;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.ArrayUtils;
import marytts.util.math.Polynomial;
import marytts.util.signal.SignalProcUtils;
import marytts.vocalizations.VocalizationUnitFileReader;
/**
* NOT COMPLETED (USEFUL FOR FUTURE)
*
* @author sathish
*
*/
public class VocalizationF0PolyFeatureFileWriter extends VoiceImportComponent {
protected File maryDir;
protected FeatureFileReader features;
protected FeatureDefinition inFeatureDefinition;
protected File outFeatureFile;
protected FeatureDefinition outFeatureDefinition;
protected VocalizationUnitFileReader listenerUnits;
protected TimelineReader audio;
protected DatabaseLayout db = null;
protected int percent = 0;
private final String name = "F0PolynomialFeatureFileWriter";
public final String UNITFILE = name + ".unitFile";
public final String WAVETIMELINE = name + ".waveTimeLine";
public final String FEATUREFILE = name + ".featureFile";
public final String F0FEATUREFILE = name + ".f0FeatureFile";
public final String POLYNOMORDER = name + ".polynomOrder";
public final String SHOWGRAPH = name + ".showGraph";
public final String INTERPOLATE = name + ".interpolate";
public final String MINPITCH = name + ".minPitch";
public final String MAXPITCH = name + ".maxPitch";
public String getName() {
return name;
}
public SortedMap getDefaultProps(DatabaseLayout db) {
this.db = db;
if (props == null) {
props = new TreeMap();
String fileDir = db.getProp(db.FILEDIR);
String maryExt = db.getProp(db.MARYEXT);
props.put(UNITFILE, fileDir + "halfphoneUnits" + maryExt);
props.put(WAVETIMELINE, fileDir + "timeline_waveforms" + maryExt);
props.put(FEATUREFILE, fileDir + "halfphoneFeatures" + maryExt);
props.put(F0FEATUREFILE, fileDir + "vocalizationF0Polynomials" + maryExt);
props.put(POLYNOMORDER, "3");
props.put(SHOWGRAPH, "false");
props.put(INTERPOLATE, "true");
if (db.getProp(db.GENDER).equals("female")) {
props.put(MINPITCH, "100");
props.put(MAXPITCH, "600");
} else {
props.put(MINPITCH, "60");
props.put(MAXPITCH, "400");
}
}
return props;
}
protected void setupHelp() {
if (props2Help == null) {
props2Help = new TreeMap();
props2Help.put(UNITFILE, "file containing all halfphone units");
props2Help.put(WAVETIMELINE, "file containing all waveforms or models that can genarate them");
props2Help.put(FEATUREFILE, "file containing all halfphone units and their target cost features");
props2Help.put(F0FEATUREFILE, "file containing syllable-based polynom coefficients on vowels");
props2Help.put(POLYNOMORDER, "order of the polynoms used to approximate syllable F0 curves");
props2Help.put(SHOWGRAPH, "whether to show a graph with f0 aproximations for each sentence");
props2Help.put(INTERPOLATE, "whether to interpolate F0 across unvoiced regions");
props2Help.put(MINPITCH, "minimum value for the pitch (in Hz). Default: female 100, male 75");
props2Help.put(MAXPITCH, "maximum value for the pitch (in Hz). Default: female 500, male 300");
}
}
@Override
public boolean compute() throws IOException, MaryConfigurationException {
logger.info("F0 polynomial feature file writer started.");
maryDir = new File(db.getProp(db.FILEDIR));
if (!maryDir.exists()) {
maryDir.mkdirs();
System.out.println("Created the output directory [" + (db.getProp(db.FILEDIR)) + "] to store the feature file.");
}
listenerUnits = new VocalizationUnitFileReader(getProp(UNITFILE));
audio = new TimelineReader(getProp(WAVETIMELINE));
// features = new FeatureFileReader(getProp(FEATUREFILE));
// inFeatureDefinition = features.getFeatureDefinition();
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
pw.println(FeatureDefinition.BYTEFEATURES); // no byte features
pw.println(FeatureDefinition.SHORTFEATURES); // no short features
pw.println(FeatureDefinition.CONTINUOUSFEATURES);
int polynomOrder = Integer.parseInt(getProp(POLYNOMORDER));
for (int i = polynomOrder; i >= 0; i--) {
pw.println("0 linear | f0contour_a" + i);
}
pw.close();
String fd = sw.toString();
logger.debug("Generated the following feature definition:");
logger.debug(fd);
StringReader sr = new StringReader(fd);
BufferedReader br = new BufferedReader(sr);
outFeatureDefinition = new FeatureDefinition(br, true);
outFeatureFile = new File(getProp(F0FEATUREFILE));
DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outFeatureFile)));
writeHeaderTo(out);
writeUnitFeaturesTo(out);
out.close();
logger.debug("Number of processed units: " + listenerUnits.getNumberOfUnits());
FeatureFileReader tester = FeatureFileReader.getFeatureFileReader(getProp(F0FEATUREFILE));
int unitsOnDisk = tester.getNumberOfUnits();
if (unitsOnDisk == listenerUnits.getNumberOfUnits()) {
System.out.println("Can read right number of units");
return true;
} else {
System.out.println("Read wrong number of units: " + unitsOnDisk);
return false;
}
}
/**
* @param out
* out
* @throws IOException
* IOException
* @throws UnsupportedEncodingException
* UnsupportedEncodingException
* @throws FileNotFoundException
* FileNotFoundException
*/
protected void writeUnitFeaturesTo(DataOutput out) throws IOException, UnsupportedEncodingException, FileNotFoundException {
int numUnits = listenerUnits.getNumberOfUnits();
int unitSampleRate = listenerUnits.getSampleRate();
int audioSampleRate = audio.getSampleRate();
boolean showGraph = Boolean.parseBoolean(getProp(SHOWGRAPH));
boolean interpolate = Boolean.parseBoolean(getProp(INTERPOLATE));
int polynomOrder = Integer.parseInt(getProp(POLYNOMORDER));
float[] zeros = new float[polynomOrder + 1];
int unitIndex = 0;
out.writeInt(numUnits);
logger.debug("Number of units : " + numUnits);
FeatureDefinition featureDefinition = features.getFeatureDefinition();
int fiPhoneme = featureDefinition.getFeatureIndex("phone");
byte fvPhoneme_0 = featureDefinition.getFeatureValueAsByte(fiPhoneme, "0");
byte fvPhoneme_Silence = featureDefinition.getFeatureValueAsByte(fiPhoneme, "_");
int fiLR = featureDefinition.getFeatureIndex("halfphone_lr");
byte fvLR_L = featureDefinition.getFeatureValueAsByte(fiLR, "L");
byte fvLR_R = featureDefinition.getFeatureValueAsByte(fiLR, "R");
int fiSylStart = featureDefinition.getFeatureIndex("segs_from_syl_start");
int fiSylEnd = featureDefinition.getFeatureIndex("segs_from_syl_end");
int fiSentenceStart = featureDefinition.getFeatureIndex("words_from_sentence_start");
int fiSentenceEnd = featureDefinition.getFeatureIndex("words_from_sentence_end");
int fiWordStart = featureDefinition.getFeatureIndex("segs_from_word_start");
int fiWordEnd = featureDefinition.getFeatureIndex("segs_from_word_end");
int fiVowel = featureDefinition.getFeatureIndex("ph_vc");
byte fvVowel_Plus = featureDefinition.getFeatureValueAsByte(fiVowel, "+");
boolean haveUnitLogF0 = false;
int fiUnitLogF0 = -1;
int fiUnitLogF0delta = -1;
if (featureDefinition.hasFeature("unit_logf0") && featureDefinition.hasFeature("unit_logf0delta")) {
haveUnitLogF0 = true;
fiUnitLogF0 = featureDefinition.getFeatureIndex("unit_logf0");
fiUnitLogF0delta = featureDefinition.getFeatureIndex("unit_logf0delta");
}
FunctionGraph f0Graph = null;
JFrame jf = null;
int iSentenceStart = -1;
int iSentenceEnd = -1;
List iSylStarts = new ArrayList();
List iSylEnds = new ArrayList();
List iSylVowels = new ArrayList();
if (showGraph) {
f0Graph = new FunctionGraph(0, 1, new double[1]);
f0Graph.setYMinMax(50, 300);
f0Graph.setPrimaryDataSeriesStyle(Color.BLUE, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_FULLCIRCLE);
jf = f0Graph.showInJFrame("Sentence", false, true);
}
for (int i = 0; i < numUnits; i++) {
percent = 100 * i / numUnits;
FeatureVector fv = features.getFeatureVector(i);
// System.out.print(featureDefinition.getFeatureValueAsString("phone", fv));
// if (fv.getByteFeature(fiPhoneme) == fvPhoneme_0
// || fv.getByteFeature(fiPhoneme) == fvPhoneme_Silence) continue;
if (iSentenceStart == -1 && fv.getByteFeature(fiSentenceStart) == 0 && fv.getByteFeature(fiWordStart) == 0
&& fv.getByteFeature(fiLR) == fvLR_L) { // first unit in sentence
iSentenceStart = i;
iSylStarts.clear();
iSylEnds.clear();
iSylVowels.clear();
// System.out.print(", is sentence start");
}
// Silence and edge units cannot be part of syllables, but they can
// mark start/end of sentence:
if (fv.getByteFeature(fiPhoneme) != fvPhoneme_0 && fv.getByteFeature(fiPhoneme) != fvPhoneme_Silence) {
if (fv.getByteFeature(fiSylStart) == 0 && fv.getByteFeature(fiLR) == fvLR_L) { // first segment in syllable
if (iSylStarts.size() > iSylEnds.size()) {
System.err.println("Syllable ends before other syllable starts!");
}
iSylStarts.add(i);
// System.out.print(", is syl start");
}
if (fv.getByteFeature(fiVowel) == fvVowel_Plus && iSylVowels.size() < iSylStarts.size()) { // first vowel unit in
// syllable
iSylVowels.add(i);
// System.out.print(", is vowel");
}
if (fv.getByteFeature(fiSylEnd) == 0 && fv.getByteFeature(fiLR) == fvLR_R) { // last segment in syllable
iSylEnds.add(i);
// System.out.print(", is syl end");
assert iSylStarts.size() == iSylEnds.size();
if (iSylVowels.size() < iSylEnds.size()) {
// System.err.println("Syllable contains no vowel -- skipping");
iSylStarts.remove(iSylStarts.size() - 1);
iSylEnds.remove(iSylEnds.size() - 1);
}
}
}
if (iSentenceStart != -1 && fv.getByteFeature(fiSentenceEnd) == 0 && fv.getByteFeature(fiWordEnd) == 0
&& fv.getByteFeature(fiLR) == fvLR_R) { // last unit in sentence
iSentenceEnd = i;
// System.out.print(", is sentence end");
if (iSylEnds.size() < iSylStarts.size()) {
System.err.println("Last syllable in sentence is not properly closed");
iSylEnds.add(i);
}
}
// System.out.println();
if (iSentenceStart >= 0 && iSentenceEnd >= iSentenceStart && iSylVowels.size() > 0) {
assert iSylStarts.size() == iSylEnds.size() : "Have " + iSylStarts.size() + " syllable starts, but "
+ iSylEnds.size() + " syllable ends!";
assert iSylStarts.size() == iSylVowels.size();
long tsSentenceStart = listenerUnits.getUnit(iSentenceStart).startTime;
long tsSentenceEnd = listenerUnits.getUnit(iSentenceEnd).startTime + listenerUnits.getUnit(iSentenceEnd).duration;
long tsSentenceDuration = tsSentenceEnd - tsSentenceStart;
Datagram[] sentenceData = audio.getDatagrams(tsSentenceStart, tsSentenceDuration);
DatagramDoubleDataSource ddds = new DatagramDoubleDataSource(sentenceData);
double[] sentenceAudio = ddds.getAllData();
AudioPlayer ap = null;
if (showGraph) {
ap = new AudioPlayer(new DDSAudioInputStream(new BufferedDoubleDataSource(sentenceAudio), new AudioFormat(
AudioFormat.Encoding.PCM_SIGNED, audioSampleRate, // samples per second
16, // bits per sample
1, // mono
2, // nr. of bytes per frame
audioSampleRate, // nr. of frames per second
true))); // big-endian;))
ap.start();
}
PitchFileHeader params = new PitchFileHeader();
params.fs = audioSampleRate;
params.minimumF0 = Double.parseDouble(getProp(MINPITCH));
params.maximumF0 = Double.parseDouble(getProp(MAXPITCH));
F0TrackerAutocorrelationHeuristic tracker = new F0TrackerAutocorrelationHeuristic(params);
tracker.pitchAnalyze(new BufferedDoubleDataSource(sentenceAudio));
double frameShiftTime = tracker.getSkipSizeInSeconds();
double[] f0Array = tracker.getF0Contour();
if (f0Array != null) {
for (int j = 0; j < f0Array.length; j++) {
if (f0Array[j] == 0) {
f0Array[j] = Double.NaN;
}
}
if (f0Array.length >= 3) {
f0Array = SignalProcUtils.medianFilter(f0Array, 5);
}
if (showGraph) {
f0Graph.updateData(0, tsSentenceDuration / (double) audioSampleRate / f0Array.length, f0Array);
jf.repaint();
}
double[] f0AndInterpol;
if (interpolate) {
double[] interpol = new double[f0Array.length];
Arrays.fill(interpol, Double.NaN);
f0AndInterpol = new double[f0Array.length];
int iLastValid = -1;
for (int j = 0; j < f0Array.length; j++) {
if (!Double.isNaN(f0Array[j])) { // a valid value
if (iLastValid == j - 1) {
// no need to interpolate
f0AndInterpol[j] = f0Array[j];
} else {
// need to interpolate
double prevF0;
if (iLastValid < 0) { // we don't have a previous value -- use current one
prevF0 = f0Array[j];
} else {
prevF0 = f0Array[iLastValid];
}
double delta = (f0Array[j] - prevF0) / (j - iLastValid);
double f0 = prevF0;
for (int k = iLastValid + 1; k < j; k++) {
f0 += delta;
interpol[k] = f0;
f0AndInterpol[k] = f0;
}
}
iLastValid = j;
}
}
if (showGraph) {
f0Graph.addDataSeries(interpol, Color.GREEN, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_EMPTYCIRCLE);
jf.repaint();
}
} else {
f0AndInterpol = f0Array.clone();
}
for (int j = 0; j < f0AndInterpol.length; j++) {
if (f0AndInterpol[j] == 0)
f0AndInterpol[j] = Double.NaN;
else
f0AndInterpol[j] = Math.log(f0AndInterpol[j]);
}
double[] approx = new double[f0Array.length];
Arrays.fill(approx, Double.NaN);
for (int s = 0; s < iSylStarts.size(); s++) {
long tsSylStart = listenerUnits.getUnit(iSylStarts.get(s)).startTime;
long tsSylEnd = listenerUnits.getUnit(iSylEnds.get(s)).startTime
+ listenerUnits.getUnit(iSylEnds.get(s)).duration;
long tsSylDuration = tsSylEnd - tsSylStart;
int iSylVowel = iSylVowels.get(s);
// now map time to position in f0AndInterpol array:
int iSylStart = (int) (((double) (tsSylStart - tsSentenceStart) / tsSentenceDuration) * f0AndInterpol.length);
assert iSylStart >= 0;
int iSylEnd = iSylStart + (int) ((double) tsSylDuration / tsSentenceDuration * f0AndInterpol.length) + 1;
if (iSylEnd > f0AndInterpol.length)
iSylEnd = f0AndInterpol.length;
// System.out.println("Syl "+s+" from "+iSylStart+" to "+iSylEnd+" out of "+f0AndInterpol.length);
double[] sylF0 = new double[iSylEnd - iSylStart];
System.arraycopy(f0AndInterpol, iSylStart, sylF0, 0, sylF0.length);
double[] coeffs = Polynomial.fitPolynomial(sylF0, polynomOrder);
if (coeffs != null) {
if (showGraph) {
double[] sylPred = Polynomial.generatePolynomialValues(coeffs, sylF0.length, 0, 1);
System.arraycopy(sylPred, 0, approx, iSylStart, sylPred.length);
}
// Write coefficients to file
while (unitIndex < iSylVowel) {
FeatureVector outFV = outFeatureDefinition.toFeatureVector(unitIndex, null, null, zeros);
outFV.writeTo(out);
unitIndex++;
}
float[] fcoeffs = ArrayUtils.copyDouble2Float(coeffs);
// System.out.print("Polynomial values (unit "+unitIndex+") ");
// for (int p=0; p
© 2015 - 2025 Weber Informatics LLC | Privacy Policy