marytts.modules.PronunciationModel Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2008 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.modules;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import marytts.cart.StringPredictionTree;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureProcessorManager;
import marytts.features.TargetFeatureComputer;
import marytts.modules.phonemiser.Allophone;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.server.MaryProperties;
import marytts.unitselection.select.Target;
import marytts.util.MaryRuntimeUtils;
import marytts.util.dom.MaryDomUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.TreeWalker;
/**
*
* This module serves as a post-lexical pronunciation model. Its appropriate place in the module chain is after intonisation. The
* target features are taken and fed into decision trees that predict the new pronunciation. A new mary xml is output, with the
* difference being that the old pronunciation is replaced by the newly predicted one, and a finer grained xml structure.
*
* @author ben
*
*/
public class PronunciationModel extends InternalModule {
// for prediction, core of the model - maps phones to decision trees
private Map treeMap;
// used in startup() and later for convenience
private FeatureDefinition featDef;
private TargetFeatureComputer featureComputer;
/**
* Constructor, stating that the input is of type INTONATION, the output of type ALLOPHONES.
*
*/
public PronunciationModel() {
this(null);
}
public PronunciationModel(Locale locale) {
super("PronunciationModel", MaryDataType.INTONATION, MaryDataType.ALLOPHONES, locale);
}
public void startup() throws Exception {
super.startup();
// TODO: pronunciation model tree and feature definition should be voice-specific
// get featureDefinition used for trees - just to tell the tree that the
// features are discrete
String fdFilename = null;
if (getLocale() != null) {
fdFilename = MaryProperties
.getFilename(MaryProperties.localePrefix(getLocale()) + ".pronunciation.featuredefinition");
}
if (fdFilename != null) {
File fdFile = new File(fdFilename);
// reader for file, readweights = false
featDef = new FeatureDefinition(new BufferedReader(new FileReader(fdFile)), false);
// get path where the prediction trees lie
File treePath = new File(MaryProperties.needFilename(MaryProperties.localePrefix(getLocale())
+ ".pronunciation.treepath"));
// valid predicion tree files are named prediction_.tree
Pattern treeFilePattern = Pattern.compile("^prediction_(.*)\\.tree$");
// initialize the map that contains the trees
this.treeMap = new HashMap();
// iterate through potential prediction tree files
File[] fileArray = treePath.listFiles();
for (int fileIndex = 0; fileIndex < fileArray.length; fileIndex++) {
File f = fileArray[fileIndex];
// is file name valid?
Matcher filePatternMatcher = treeFilePattern.matcher(f.getName());
if (filePatternMatcher.matches()) {
// phone of file name is a group in the regex
String phoneId = filePatternMatcher.group(1);
// construct tree from file and map phone to it
StringPredictionTree predictionTree = new StringPredictionTree(new BufferedReader(new FileReader(f)), featDef);
// back mapping from short id
int index = this.featDef.getFeatureIndex("phone");
this.treeMap.put(this.featDef.getFeatureValueAsString(index, Short.parseShort(phoneId)), predictionTree);
// logger.debug("Read in tree for " + PhoneNameConverter.normForm2phone(phone));
}
}
logger.debug("Reading in feature definition and decision trees finished.");
// TODO: change property name to german.pronunciation.featuremanager/features
String managerClass = MaryProperties.needProperty(MaryProperties.localePrefix(getLocale())
+ ".pronunciation.targetfeaturelister.featuremanager");
FeatureProcessorManager manager = (FeatureProcessorManager) Class.forName(managerClass).newInstance();
String features = MaryProperties.needProperty(MaryProperties.localePrefix(getLocale())
+ ".pronunciation.targetfeaturelister.features");
this.featureComputer = new TargetFeatureComputer(manager, features);
}
logger.debug("Building feature computer finished.");
}
/**
* Optionally, a language-specific subclass can implement any postlexical rules on the document.
*
* @param token
* a <t> element with a syllable and <ph> substructure.
* @param allophoneSet
* allophoneSet
* @return true if something was changed, false otherwise
*/
protected boolean postlexicalRules(Element token, AllophoneSet allophoneSet) {
return false;
}
/**
* This computes a new pronunciation for the elements of some MaryData, that is phonemised.
*
* @param d
* d
* @throws Exception
* Exception
*/
public MaryData process(MaryData d) throws Exception {
// get the xml document
Document doc = d.getDocument();
logger.debug("Getting xml-data from document finished.");
TreeWalker tw = MaryDomUtils.createTreeWalker(doc, doc, MaryXML.TOKEN);
Element t;
AllophoneSet allophoneSet = null;
while ((t = (Element) tw.nextNode()) != null) {
// First, create the substructure of elements: and .
if (allophoneSet == null) { // need to determine it once, then assume it is the same for all
allophoneSet = MaryRuntimeUtils.determineAllophoneSet(t);
}
createSubStructure(t, allophoneSet);
// Modify by rule:
boolean changedSomething = postlexicalRules(t, allophoneSet);
if (changedSomething) {
updatePhAttributesFromPhElements(t);
}
if (treeMap == null)
continue;
// Modify by trained model:
assert featureComputer != null;
// Now, predict modified pronunciations, adapt elements accordingly,
// and update ph for syllable and t elements where necessary
StringBuilder tPh = new StringBuilder();
TreeWalker sylWalker = MaryDomUtils.createTreeWalker(doc, t, MaryXML.SYLLABLE);
Element syllable;
while ((syllable = (Element) sylWalker.nextNode()) != null) {
StringBuilder sylPh = new StringBuilder();
String stressed = syllable.getAttribute("stress");
if (stressed.equals("1")) {
sylPh.append("'");
} else if (stressed.equals("2")) {
sylPh.append(",");
}
TreeWalker segWalker = MaryDomUtils.createTreeWalker(doc, syllable, MaryXML.PHONE);
Element seg;
// Cannot use tree walker directly, because we concurrently modify the tree:
List originalSegments = new ArrayList();
while ((seg = (Element) segWalker.nextNode()) != null) {
originalSegments.add(seg);
}
for (Element s : originalSegments) {
String phoneString = s.getAttribute("p");
String[] predicted;
// in case we have a decision tree for phone, predict - otherwise leave unchanged
if (treeMap.containsKey(phoneString)) {
Target tgt = new Target(phoneString, s);
tgt.setFeatureVector(featureComputer.computeFeatureVector(tgt));
StringPredictionTree tree = (StringPredictionTree) treeMap.get(phoneString);
String predictStr = tree.getMostProbableString(tgt);
if (sylPh.length() > 0)
sylPh.append(" ");
sylPh.append(predictStr);
// if phone is deleted:
if (predictStr.equals("")) {
predicted = null;
} else {
// predictStr contains whitespace between phones
predicted = predictStr.split(" ");
}
} else {
logger.debug("didn't find decision tree for phone (" + phoneString + "). Just keeping it.");
predicted = new String[] { phoneString };
}
logger.debug(" Predicted phone in sequence of " + predicted.length + " phones.");
// deletions:
if (predicted == null || predicted.length == 0) {
syllable.removeChild(s);
continue; // skip what follows
}
assert predicted != null && predicted.length > 0;
// insertions: for each but the last predicted phone, make a new element
for (int lc = 0; lc < predicted.length - 1; lc++) {
Element newPh = MaryXML.createElement(doc, MaryXML.PHONE);
newPh.setAttribute("p", predicted[lc]);
syllable.insertBefore(newPh, s);
}
// for the last (or only) predicted segment, just update the phone label
if (!phoneString.equals(predicted[predicted.length - 1])) {
s.setAttribute("p", predicted[predicted.length - 1]);
}
} // for each segment in syllable
String newSylPh = sylPh.toString();
syllable.setAttribute("ph", newSylPh);
if (tPh.length() > 0)
tPh.append(" -"); // syllable boundary
tPh.append(newSylPh);
} // for each syllable in token
t.setAttribute("ph", tPh.toString());
} // for each token in document
// return new MaryData with changed phonology
MaryData result = new MaryData(outputType(), d.getLocale());
result.setDocument(doc);
logger.debug("Setting the changed xml document finished.");
return result;
}
private void createSubStructure(Element token, AllophoneSet allophoneSet) {
String phone = token.getAttribute("ph");
if (phone.equals(""))
return; // nothing to do
if (token.getElementsByTagName(MaryXML.SYLLABLE).getLength() > 0) {
return; // there is already a substructure under this token; nothing to do
}
StringTokenizer tok = new StringTokenizer(phone, "-");
Document document = token.getOwnerDocument();
Element prosody = (Element) MaryDomUtils.getAncestor(token, MaryXML.PROSODY);
String vq = null; // voice quality
if (prosody != null) {
// Ignore any effects of ancestor prosody tags for now:
String volumeString = prosody.getAttribute("volume");
int volume = -1;
try {
volume = Integer.parseInt(volumeString);
} catch (NumberFormatException e) {
}
if (volume >= 0) {
if (volume >= 60) {
vq = "loud";
} else if (volume <= 40) {
vq = "soft";
} else {
vq = null;
}
}
}
while (tok.hasMoreTokens()) {
String sylString = tok.nextToken();
if (sylString.trim().isEmpty()) {
continue;
}
Allophone[] allophones = allophoneSet.splitIntoAllophones(sylString);
Element syllable = MaryXML.createElement(document, MaryXML.SYLLABLE);
token.appendChild(syllable);
String syllableText = "";
for (int i = 0; i < allophones.length; i++) {
if (allophones[i].isTone()) {
syllable.setAttribute("tone", allophones[i].name());
continue;
}
if (i == 0) {
syllableText = allophones[i].name();
} else {
syllableText = syllableText + " " + allophones[i].name();
}
}
// Check for stress signs:
String first = sylString.trim().substring(0, 1);
if (first.equals("'")) {
syllable.setAttribute("stress", "1");
// The primary stressed syllable of a word
// inherits the accent:
if (token.hasAttribute("accent")) {
syllable.setAttribute("accent", token.getAttribute("accent"));
}
} else if (first.equals(",")) {
syllable.setAttribute("stress", "2");
}
// Remember transcription in ph attribute:
syllable.setAttribute("ph", syllableText);
// Now identify the composing segments:
for (int i = 0; i < allophones.length; i++) {
if (allophones[i].isTone()) {
continue;
}
Element segment = MaryXML.createElement(document, MaryXML.PHONE);
syllable.appendChild(segment);
segment.setAttribute("p", allophones[i].name());
if (vq != null && !(allophones[i].name().equals("_") || allophones[i].name().equals("?"))) {
segment.setAttribute("vq", vq);
}
}
}
}
protected void updatePhAttributesFromPhElements(Element token) {
if (token == null)
throw new NullPointerException("Got null token");
if (!token.getTagName().equals(MaryXML.TOKEN)) {
throw new IllegalArgumentException("Argument should be a <" + MaryXML.TOKEN + ">, not a <" + token.getTagName() + ">");
}
StringBuilder tPh = new StringBuilder();
TreeWalker sylWalker = MaryDomUtils.createTreeWalker(token, MaryXML.SYLLABLE);
Element syl;
while ((syl = (Element) sylWalker.nextNode()) != null) {
StringBuilder sylPh = new StringBuilder();
String stress = syl.getAttribute("stress");
if (stress.equals("1"))
sylPh.append("'");
else if (stress.equals("2"))
sylPh.append(",");
TreeWalker phWalker = MaryDomUtils.createTreeWalker(syl, MaryXML.PHONE);
Element ph;
while ((ph = (Element) phWalker.nextNode()) != null) {
if (sylPh.length() > 0)
sylPh.append(" ");
sylPh.append(ph.getAttribute("p"));
}
String sylPhString = sylPh.toString();
syl.setAttribute("ph", sylPhString);
if (tPh.length() > 0)
tPh.append(" - ");
tPh.append(sylPhString);
if (syl.hasAttribute("tone")) {
tPh.append(" " + syl.getAttribute("tone"));
}
}
token.setAttribute("ph", tPh.toString());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy