All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.UnknownWordsFrequencyComputer Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2000-2009 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.voiceimport;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Locale;
import java.util.SortedMap;
import java.util.TreeMap;

import marytts.client.MaryClient;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.util.MaryUtils;
import marytts.util.dom.NameNodeFilter;
import marytts.util.http.Address;
import marytts.util.io.FileUtils;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.NodeIterator;

/**
 * For the given texts, compute unit features and align them with the given unit labels.
 * 
 * @author schroed
 *
 */
public class UnknownWordsFrequencyComputer extends VoiceImportComponent {
	protected File textDir;
	protected File unitfeatureDir;
	protected String featsExt = ".pfeats";
	protected String locale;
	protected MaryClient mary;
	protected String maryInputType;
	protected String maryOutputType;

	protected DatabaseLayout db = null;
	protected int percent = 0;

	public String FEATUREDIR = "UnknownWordsFrequencyComputer.featureDir";
	public String MARYSERVERHOST = "UnknownWordsFrequencyComputer.maryServerHost";
	public String MARYSERVERPORT = "UnknownWordsFrequencyComputer.maryServerPort";

	public String getName() {
		return "UnknownWordsFrequencyComputer";
	}

	public static String getMaryXMLHeaderWithInitialBoundary(String locale) {
		return "\n" + "\n" + "\n";

	}

	@Override
	protected void initialiseComp() {
		locale = db.getProp(db.LOCALE);

		mary = null; // initialised only if needed
		unitfeatureDir = new File(getProp(FEATUREDIR));
		if (!unitfeatureDir.exists()) {
			System.out.print(FEATUREDIR + " " + getProp(FEATUREDIR) + " does not exist; ");
			if (!unitfeatureDir.mkdir()) {
				throw new Error("Could not create FEATUREDIR");
			}
			System.out.print("Created successfully.\n");
		}

		maryInputType = "RAWMARYXML";
		maryOutputType = "PHONEMES";
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			props.put(FEATUREDIR, db.getProp(db.ROOTDIR) + "phonemisedXML" + System.getProperty("file.separator"));
			props.put(MARYSERVERHOST, "localhost");
			props.put(MARYSERVERPORT, "59125");
		}
		return props;
	}

	protected void setupHelp() {
		props2Help = new TreeMap();
		props2Help.put(FEATUREDIR, "directory containing the phone features." + "Will be created if it does not exist");
		props2Help.put(MARYSERVERHOST, "the host were the Mary server is running, default: \"localhost\"");
		props2Help.put(MARYSERVERPORT, "the port were the Mary server is listening, default: \"59125\"");
	}

	public MaryClient getMaryClient() throws IOException {
		if (mary == null) {
			try {
				mary = MaryClient.getMaryClient(new Address(getProp(MARYSERVERHOST), Integer.parseInt(getProp(MARYSERVERPORT))));
			} catch (IOException e) {
				throw new IOException("Could not connect to Maryserver at " + getProp(MARYSERVERHOST) + " "
						+ getProp(MARYSERVERPORT));
			}
		}
		return mary;
	}

	public boolean compute() throws IOException, Exception {
		textDir = new File(db.getProp(db.TEXTDIR));
		System.out.println("Computing unit features for " + bnl.getLength() + " files");
		for (int i = 0; i < bnl.getLength(); i++) {
			percent = 100 * i / bnl.getLength();
			computeFeaturesFor(bnl.getName(i));
			// System.out.println( "    " + bnl.getName(i) );
		}
		bnl.write(db.getProp(db.ROOTDIR) + File.separator + "newbaselist.txt");
		System.out.println("Finished computing the unit features.");
		return true;
	}

	public void computeFeaturesFor(String basename) throws IOException, Exception {
		String text;
		Locale localVoice;
		localVoice = MaryUtils.string2locale(locale);

		// First, test if there is a corresponding .rawmaryxml file in textdir:
		File rawmaryxmlFile = new File(db.getProp(db.MARYXMLDIR) + basename + db.getProp(db.MARYXMLEXT));
		if (rawmaryxmlFile.exists()) {
			text = FileUtils.getFileAsString(rawmaryxmlFile, "UTF-8");
		} else {
			text = getMaryXMLHeaderWithInitialBoundary(locale)
					+ FileUtils.getFileAsString(new File(db.getProp(db.TEXTDIR) + basename + db.getProp(db.TEXTEXT)), "UTF-8")
					+ "";
		}
		File pfeatFile = new File(unitfeatureDir, basename + featsExt);
		OutputStream os = new BufferedOutputStream(new FileOutputStream(pfeatFile));
		MaryClient maryClient = getMaryClient();
		/*
		 * Vector voices = maryClient.getVoices(localVoice); MaryClient.Voice defaultVoice = (MaryClient.Voice)
		 * voices.firstElement(); String voiceName = defaultVoice.name();
		 */
		// maryClient.process(text, maryInputType, maryOutputType, null, null, os);

		maryClient.process(text, maryInputType, maryOutputType, locale, null, "slt-arctic", os);
		// maryClient.process(text, maryInputType, maryOutputType, null, "slt-arctic", os, timeout);
		// maryClient.getOutputDataTypes().size()
		// MaryData result = new MaryData(os);

		os.flush();
		os.close();

		// System.out.println(" TO STRING: "+new FileReader(pfeatFile).toString());
		// BufferedReader bfr = new BufferedReader(new FileReader(pfeatFile));
		String line;
		MaryData d = new MaryData(MaryDataType.get("PHONEMISED_EN"), Locale.US);
		// d.readFrom(new ByteArrayInputStream(os.toByteArray()));
		d.readFrom(new FileReader(pfeatFile));

		// MaryData d = new MaryData(pfeatFile);
		Document doc = d.getDocument();
		// Document acoustparams = d.getDocument();

		// NodeIterator it = ((DocumentTraversal)acoustparams).createNodeIterator(acoustparams, NodeFilter.SHOW_ELEMENT,new
		// NameNodeFilter(new String[]{MaryXML.TOKEN, MaryXML.BOUNDARY}),false);
		NodeIterator it = ((DocumentTraversal) doc).createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter(
				MaryXML.TOKEN), false);

		Element t = null;
		while ((t = (Element) it.nextNode()) != null) {
			if (t.hasAttribute("g2p_method")) {
				String g2p = t.getAttribute("g2p_method");
				String nodeText = t.getTextContent().trim();
				if (g2p.equals("rules")) {// && nodeText.equals("!")){
					System.out.print(basename + " ----> " + nodeText);
					if (bnl.contains(basename))
						bnl.remove(basename);
					System.out.println(" SO removing basename: " + basename);

				}

				// System.out.println("G2P:"+t.getAttribute("g2p_method"));
				// System.out.println("Text:"+t.getTextContent());
			}
		}

		/*
		 * while((line =bfr.readLine()) != null){ //boolean b = m.matches(); if(Pattern.matches("rules", line))
		 * System.out.println(basename + " LINE ---> " + line);
		 * 
		 * }
		 */
		// System.out.println(" TO STRING: "+line);

	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return percent;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy