All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.dbselection.SimpleCoverageComputer Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2011 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.dbselection;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Locale;

import marytts.features.FeatureDefinition;
import marytts.features.FeatureRegistry;
import marytts.util.MaryUtils;

/**
 * This class takes a text file containing one sentence per line, and computes the phone, diphone and prosody coverage of the
 * corpus.
 * 
 * @author marc
 * 
 */
public class SimpleCoverageComputer {

	/**
	 * @param args
	 *            args
	 * @throws Exception
	 *             Exception
	 */
	public static void main(String[] args) throws Exception {

		BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8"));
		PrintWriter out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(args[1]), "UTF-8"));
		Locale locale = MaryUtils.string2locale(args[2]);

		String featureNames = "phone next_phone selection_prosody";
		ArrayList lines = new ArrayList();
		String line;
		while ((line = in.readLine()) != null) {
			if (line.trim().isEmpty())
				continue;
			lines.add(line);
		}
		System.out.println("Computing coverage features for " + lines.size() + " sentences from " + args[0] + "...");
		byte[][] coverageFeatures = new byte[lines.size()][];
		for (int i = 0, max = lines.size(); i < max; i++) {
			coverageFeatures[i] = CoverageUtils.sentenceToFeatures(lines.get(i), locale, featureNames, false);
			if (i % 10 == 0) {
				System.out.print("\r" + i + "/" + max);
			}
		}
		System.out.println();
		System.out.println("Computing coverage...");
		CoverageFeatureProvider cfProvider = new InMemoryCFProvider(coverageFeatures, null);
		FeatureDefinition featDef = FeatureRegistry.getTargetFeatureComputer(locale, featureNames).getFeatureDefinition();
		CoverageDefinition coverageDefinition = new CoverageDefinition(featDef, cfProvider, null);
		coverageDefinition.initialiseCoverage();
		coverageDefinition.printTextCorpusStatistics(out);
		out.close();
		System.out.println("done -- see " + args[1] + " for results.");

	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy