marytts.tools.voiceimport.UnknownWordsFrequencyComputer Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2000-2009 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.tools.voiceimport;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Locale;
import java.util.SortedMap;
import java.util.TreeMap;
import marytts.client.MaryClient;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.util.MaryUtils;
import marytts.util.dom.NameNodeFilter;
import marytts.util.http.Address;
import marytts.util.io.FileUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.NodeIterator;
/**
* For the given texts, compute unit features and align them with the given unit labels.
*
* @author schroed
*
*/
public class UnknownWordsFrequencyComputer extends VoiceImportComponent {
protected File textDir;
protected File unitfeatureDir;
protected String featsExt = ".pfeats";
protected String locale;
protected MaryClient mary;
protected String maryInputType;
protected String maryOutputType;
protected DatabaseLayout db = null;
protected int percent = 0;
public String FEATUREDIR = "UnknownWordsFrequencyComputer.featureDir";
public String MARYSERVERHOST = "UnknownWordsFrequencyComputer.maryServerHost";
public String MARYSERVERPORT = "UnknownWordsFrequencyComputer.maryServerPort";
public String getName() {
return "UnknownWordsFrequencyComputer";
}
public static String getMaryXMLHeaderWithInitialBoundary(String locale) {
return "\n" + "\n" + "\n";
}
@Override
protected void initialiseComp() {
locale = db.getProp(db.LOCALE);
mary = null; // initialised only if needed
unitfeatureDir = new File(getProp(FEATUREDIR));
if (!unitfeatureDir.exists()) {
System.out.print(FEATUREDIR + " " + getProp(FEATUREDIR) + " does not exist; ");
if (!unitfeatureDir.mkdir()) {
throw new Error("Could not create FEATUREDIR");
}
System.out.print("Created successfully.\n");
}
maryInputType = "RAWMARYXML";
maryOutputType = "PHONEMES";
}
public SortedMap getDefaultProps(DatabaseLayout db) {
this.db = db;
if (props == null) {
props = new TreeMap();
props.put(FEATUREDIR, db.getProp(db.ROOTDIR) + "phonemisedXML" + System.getProperty("file.separator"));
props.put(MARYSERVERHOST, "localhost");
props.put(MARYSERVERPORT, "59125");
}
return props;
}
protected void setupHelp() {
props2Help = new TreeMap();
props2Help.put(FEATUREDIR, "directory containing the phone features." + "Will be created if it does not exist");
props2Help.put(MARYSERVERHOST, "the host were the Mary server is running, default: \"localhost\"");
props2Help.put(MARYSERVERPORT, "the port were the Mary server is listening, default: \"59125\"");
}
public MaryClient getMaryClient() throws IOException {
if (mary == null) {
try {
mary = MaryClient.getMaryClient(new Address(getProp(MARYSERVERHOST), Integer.parseInt(getProp(MARYSERVERPORT))));
} catch (IOException e) {
throw new IOException("Could not connect to Maryserver at " + getProp(MARYSERVERHOST) + " "
+ getProp(MARYSERVERPORT));
}
}
return mary;
}
public boolean compute() throws IOException, Exception {
textDir = new File(db.getProp(db.TEXTDIR));
System.out.println("Computing unit features for " + bnl.getLength() + " files");
for (int i = 0; i < bnl.getLength(); i++) {
percent = 100 * i / bnl.getLength();
computeFeaturesFor(bnl.getName(i));
// System.out.println( " " + bnl.getName(i) );
}
bnl.write(db.getProp(db.ROOTDIR) + File.separator + "newbaselist.txt");
System.out.println("Finished computing the unit features.");
return true;
}
public void computeFeaturesFor(String basename) throws IOException, Exception {
String text;
Locale localVoice;
localVoice = MaryUtils.string2locale(locale);
// First, test if there is a corresponding .rawmaryxml file in textdir:
File rawmaryxmlFile = new File(db.getProp(db.MARYXMLDIR) + basename + db.getProp(db.MARYXMLEXT));
if (rawmaryxmlFile.exists()) {
text = FileUtils.getFileAsString(rawmaryxmlFile, "UTF-8");
} else {
text = getMaryXMLHeaderWithInitialBoundary(locale)
+ FileUtils.getFileAsString(new File(db.getProp(db.TEXTDIR) + basename + db.getProp(db.TEXTEXT)), "UTF-8")
+ " ";
}
File pfeatFile = new File(unitfeatureDir, basename + featsExt);
OutputStream os = new BufferedOutputStream(new FileOutputStream(pfeatFile));
MaryClient maryClient = getMaryClient();
/*
* Vector voices = maryClient.getVoices(localVoice); MaryClient.Voice defaultVoice = (MaryClient.Voice)
* voices.firstElement(); String voiceName = defaultVoice.name();
*/
// maryClient.process(text, maryInputType, maryOutputType, null, null, os);
maryClient.process(text, maryInputType, maryOutputType, locale, null, "slt-arctic", os);
// maryClient.process(text, maryInputType, maryOutputType, null, "slt-arctic", os, timeout);
// maryClient.getOutputDataTypes().size()
// MaryData result = new MaryData(os);
os.flush();
os.close();
// System.out.println(" TO STRING: "+new FileReader(pfeatFile).toString());
// BufferedReader bfr = new BufferedReader(new FileReader(pfeatFile));
String line;
MaryData d = new MaryData(MaryDataType.get("PHONEMISED_EN"), Locale.US);
// d.readFrom(new ByteArrayInputStream(os.toByteArray()));
d.readFrom(new FileReader(pfeatFile));
// MaryData d = new MaryData(pfeatFile);
Document doc = d.getDocument();
// Document acoustparams = d.getDocument();
// NodeIterator it = ((DocumentTraversal)acoustparams).createNodeIterator(acoustparams, NodeFilter.SHOW_ELEMENT,new
// NameNodeFilter(new String[]{MaryXML.TOKEN, MaryXML.BOUNDARY}),false);
NodeIterator it = ((DocumentTraversal) doc).createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter(
MaryXML.TOKEN), false);
Element t = null;
while ((t = (Element) it.nextNode()) != null) {
if (t.hasAttribute("g2p_method")) {
String g2p = t.getAttribute("g2p_method");
String nodeText = t.getTextContent().trim();
if (g2p.equals("rules")) {// && nodeText.equals("!")){
System.out.print(basename + " ----> " + nodeText);
if (bnl.contains(basename))
bnl.remove(basename);
System.out.println(" SO removing basename: " + basename);
}
// System.out.println("G2P:"+t.getAttribute("g2p_method"));
// System.out.println("Text:"+t.getTextContent());
}
}
/*
* while((line =bfr.readLine()) != null){ //boolean b = m.matches(); if(Pattern.matches("rules", line))
* System.out.println(basename + " LINE ---> " + line);
*
* }
*/
// System.out.println(" TO STRING: "+line);
}
/**
* Provide the progress of computation, in percent, or -1 if that feature is not implemented.
*
* @return -1 if not implemented, or an integer between 0 and 100.
*/
public int getProgress() {
return percent;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy