marytts.tools.dbselection.SimpleCoverageComputer Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2011 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.tools.dbselection;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Locale;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureRegistry;
import marytts.util.MaryUtils;
/**
* This class takes a text file containing one sentence per line, and computes the phone, diphone and prosody coverage of the
* corpus.
*
* @author marc
*
*/
public class SimpleCoverageComputer {
/**
* @param args
* args
* @throws Exception
* Exception
*/
public static void main(String[] args) throws Exception {
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8"));
PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[1]), "UTF-8"));
Locale locale = MaryUtils.string2locale(args[2]);
String featureNames = "phone next_phone selection_prosody";
ArrayList lines = new ArrayList();
String line;
while ((line = in.readLine()) != null) {
if (line.trim().isEmpty())
continue;
lines.add(line);
}
System.out.println("Computing coverage features for " + lines.size() + " sentences from " + args[0] + "...");
byte[][] coverageFeatures = new byte[lines.size()][];
for (int i = 0, max = lines.size(); i < max; i++) {
coverageFeatures[i] = CoverageUtils.sentenceToFeatures(lines.get(i), locale, featureNames, false);
if (i % 10 == 0) {
System.out.print("\r" + i + "/" + max);
}
}
System.out.println();
System.out.println("Computing coverage...");
CoverageFeatureProvider cfProvider = new InMemoryCFProvider(coverageFeatures, null);
FeatureDefinition featDef = FeatureRegistry.getTargetFeatureComputer(locale, featureNames).getFeatureDefinition();
CoverageDefinition coverageDefinition = new CoverageDefinition(featDef, cfProvider, null);
coverageDefinition.initialiseCoverage();
coverageDefinition.printTextCorpusStatistics(out);
out.close();
System.out.println("done -- see " + args[1] + " for results.");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy