All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.modules.HMMDurationF0Modeller Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.modules;

import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Scanner;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.exceptions.SynthesisException;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureProcessorManager;
import marytts.features.FeatureRegistry;
import marytts.features.FeatureVector;
import marytts.features.TargetFeatureComputer;
import marytts.htsengine.CartTreeSet;
import marytts.htsengine.HMMData;
import marytts.htsengine.HMMVoice;
import marytts.htsengine.HTSModel;
import marytts.htsengine.HTSPStream;
import marytts.htsengine.HTSParameterGeneration;
import marytts.htsengine.HTSUttModel;
import marytts.modules.synthesis.Voice;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import marytts.util.math.Polynomial;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.w3c.dom.traversal.NodeIterator;
import org.w3c.dom.traversal.TreeWalker;

/***
 * This modeller uses the HMMs of the provided hmmVoice. This modeller can be set as preferred module in the configuration file,
 * for example:
 * 
 * voice.unitSelection.preferredModules = \ marytts.modules.HMMDurationF0Modeller(local,hmmVoice)
 *
 * @author marcela
 * @deprecated
 */
public class HMMDurationF0Modeller extends InternalModule {

	private String hmmVoiceName;
	private Locale locale;
	private FeatureProcessorManager featureProcessorManager;
	private TargetFeatureLister targetFeatureLister;
	protected TargetFeatureComputer featureComputer;

	public HMMDurationF0Modeller(String locale, String hmmVoiceName) throws Exception {
		this(MaryUtils.string2locale(locale), hmmVoiceName, FeatureRegistry.getFeatureProcessorManager(MaryUtils
				.string2locale(locale)));
	}

	public HMMDurationF0Modeller(Locale locale, String hmmVoiceName, FeatureProcessorManager featureProcessorManager) {
		super("HMMDurationF0Modeller", MaryDataType.ALLOPHONES, MaryDataType.ACOUSTPARAMS, locale);
		this.hmmVoiceName = hmmVoiceName;
		this.locale = locale;
		this.featureProcessorManager = featureProcessorManager;

	}

	public void startup() throws Exception {
		super.startup();

		try {
			targetFeatureLister = (TargetFeatureLister) ModuleRegistry.getModule(TargetFeatureLister.class);
		} catch (NullPointerException npe) {
			targetFeatureLister = null;
		}
		if (targetFeatureLister == null) {
			logger.info("Starting my own TargetFeatureLister");
			targetFeatureLister = new TargetFeatureLister();
			targetFeatureLister.startup();
		} else if (targetFeatureLister.getState() == MaryModule.MODULE_OFFLINE) {
			targetFeatureLister.startup();
		}
	}

	public MaryData process(MaryData d) throws Exception {

		/**
		 * The utterance model, um, is a Vector (or linked list) of Model objects. It will contain the list of models for current
		 * label file.
		 */
		HTSUttModel um = new HTSUttModel();
		double f0[];

		/*
		 * here we need to use a HMM voice that has been trained with the same data as the unit slection, for example, if this
		 * module is going to be used in the unit selection voice: en_US-cmu-slt then we should load the HMMs from the
		 * en_US-cmu-slt-hsmm
		 */
		HMMVoice hmmVoice = (HMMVoice) Voice.getVoice(hmmVoiceName);
		String features = d.getOutputParams();
		if (hmmVoice != null) {
			featureComputer = FeatureRegistry.getTargetFeatureComputer(hmmVoice, features);
		}
		assert featureComputer != null : "Cannot get a feature computer!";
		Document doc = d.getDocument();
		// First, get the list of segments and boundaries in the current document
		TreeWalker tw = MaryDomUtils.createTreeWalker(doc, doc, MaryXML.PHONE, MaryXML.BOUNDARY);
		List segmentsAndBoundaries = new ArrayList();
		Element e;
		while ((e = (Element) tw.nextNode()) != null) {
			segmentsAndBoundaries.add(e);
		}
		TargetFeatureComputer comp = FeatureRegistry.getTargetFeatureComputer(hmmVoice, features);
		String targetFeatureString = targetFeatureLister.listTargetFeatures(comp, segmentsAndBoundaries);

		if (hmmVoice != null) {
			String context = targetFeatureString;
			// System.out.println("TARGETFEATURES:" + context);

			/* Process label file of Mary context features and creates UttModel um */
			Scanner s = null;
			String realisedDurations;
			String realisedDurF0s;
			try {
				s = new Scanner(context);
				// Create the Uttmodel list and get durations
				realisedDurations = processUtt(s, um, hmmVoice.getHMMData(), hmmVoice.getHMMData().getCartTreeSet());
				// setActualDurations(tw, realisedDurations);

				// Given the UttModel list generate the F0 parameters
				realisedDurF0s = HmmF0Generation(um, hmmVoice.getHMMData());
				setActualDurationsAndF0s(tw, realisedDurF0s);

			} finally {
				if (s != null)
					s.close();
			}
		} else {
			logger.debug("No HMM voice called " + hmmVoiceName);
		}

		// processing 'prosody' tags
		ByteArrayOutputStream dummy = new ByteArrayOutputStream();
		d.writeTo(dummy);

		applyProsodySpecifications(doc);

		// the result is already in d
		return d;
	}

	/**
	 * A method to modify prosody modifications
	 * 
	 * @param doc
	 *            doc
	 */
	private void applyProsodySpecifications(Document doc) {

		TreeWalker tw = MaryDomUtils.createTreeWalker(doc, doc, MaryXML.PHONE, MaryXML.BOUNDARY, MaryXML.PROSODY);
		Element e = null;

		// TODO: read prosody tags recursively
		while ((e = (Element) tw.nextNode()) != null) {

			if ("prosody".equals(e.getNodeName())) {
				NodeList nl = e.getElementsByTagName("ph");
				applyNewContourSpecifications(nl, e);
				applySpeechRateSpecifications(nl, e);
			}
		}
	}

	/**
	 * Apply 'rate' requirements to ACOUSTPARAMS
	 * 
	 * @param nl
	 *            nl
	 * @param prosodyElement
	 *            prosodyElement
	 */
	private void applySpeechRateSpecifications(NodeList nl, Element prosodyElement) {

		String rateAttribute = null;
		if (!prosodyElement.hasAttribute("rate")) {
			return;
		}

		rateAttribute = prosodyElement.getAttribute("rate");
		Pattern p = Pattern.compile("[+|-]\\d+%");

		// Split input with the pattern
		Matcher m = p.matcher(rateAttribute);
		if (m.find()) {
			double percentage = new Integer(rateAttribute.substring(1, rateAttribute.length() - 1)).doubleValue();
			if (rateAttribute.startsWith("+")) {
				setSpeechRateSpecifications(nl, percentage, -1.0);
			} else {
				setSpeechRateSpecifications(nl, percentage, +1.0);
			}
		}
	}

	/**
	 * set duration specifications according to 'rate' requirements
	 * 
	 * @param nl
	 *            nl
	 * @param percentage
	 *            percentage
	 * @param incriment
	 *            incriment
	 */
	private void setSpeechRateSpecifications(NodeList nl, double percentage, double incriment) {

		for (int i = 0; i < nl.getLength(); i++) {
			Element e = (Element) nl.item(i);
			if (!e.hasAttribute("d")) {
				continue;
			}
			double durAttribute = new Double(e.getAttribute("d")).doubleValue();
			double newDurAttribute = durAttribute + (incriment * percentage * durAttribute / 100);
			e.setAttribute("d", newDurAttribute + "");
			// System.out.println(durAttribute+" = " +newDurAttribute);
		}

		Element e = (Element) nl.item(0);

		Element rootElement = e.getOwnerDocument().getDocumentElement();
		NodeIterator nit = MaryDomUtils.createNodeIterator(rootElement, MaryXML.PHONE, MaryXML.BOUNDARY);
		Element nd;
		double duration = 0.0;
		for (int i = 0; (nd = (Element) nit.nextNode()) != null; i++) {
			if ("boundary".equals(nd.getNodeName())) {
				if (nd.hasAttribute("duration")) {
					duration += new Double(nd.getAttribute("duration")).doubleValue();
				}
			} else {
				if (nd.hasAttribute("d")) {
					duration += new Double(nd.getAttribute("d")).doubleValue();
				}
			}
			double endTime = 0.001 * duration;
			nd.setAttribute("end", endTime + "");
			// System.out.println(nd.getNodeName()+" = " +nd.getAttribute("end"));
		}

	}

	/**
	 * 
	 * @param nl
	 *            nl
	 * @param prosodyElement
	 *            prosodyElement
	 */
	private void applyNewContourSpecifications(NodeList nl, Element prosodyElement) {

		String contourAttribute = null;
		if (prosodyElement.hasAttribute("contour")) {
			contourAttribute = prosodyElement.getAttribute("contour");
		}

		String pitchAttribute = null;
		if (prosodyElement.hasAttribute("pitch")) {
			pitchAttribute = prosodyElement.getAttribute("pitch");
		}

		if (contourAttribute == null && pitchAttribute == null) {
			return;
		}

		double[] contour = getContiniousContour(nl);
		contour = interpolateNonZeroValues(contour);
		double[] coeffs = Polynomial.fitPolynomial(contour, 1);
		double[] polyValues = Polynomial.generatePolynomialValues(coeffs, 100, 0, 1);
		double[] diffValues = new double[100];

		// Extract base contour from original contour
		for (int i = 0; i < contour.length; i++) {
			diffValues[i] = contour[i] - polyValues[i];
		}

		polyValues = setBaseContourModifications(polyValues, contourAttribute, pitchAttribute);

		// Now, imposing back the diff. contour
		for (int i = 0; i < contour.length; i++) {
			contour[i] = diffValues[i] + polyValues[i];
		}

		setModifiedContour(nl, contour);

		return;
	}

	/**
	 * To set new modified contour into XML
	 * 
	 * @param nl
	 *            nl
	 * @param contour
	 *            contour
	 */
	private void setModifiedContour(NodeList nl, double[] contour) {

		Element firstElement = (Element) nl.item(0);
		Element lastElement = (Element) nl.item(nl.getLength() - 1);

		double fEnd = (new Double(firstElement.getAttribute("end"))).doubleValue();
		double fDuration = 0.001 * (new Double(firstElement.getAttribute("d"))).doubleValue();
		double lEnd = (new Double(lastElement.getAttribute("end"))).doubleValue();
		double fStart = fEnd - fDuration; // 'prosody' tag starting point
		double duration = lEnd - fStart; // duaration of 'prosody' modification request

		Map f0Map;

		for (int i = 0; i < nl.getLength(); i++) {

			Element e = (Element) nl.item(i);
			String f0Attribute = e.getAttribute("f0");

			if (f0Attribute == null || "".equals(f0Attribute)) {
				continue;
			}

			double phoneEndTime = (new Double(e.getAttribute("end"))).doubleValue();
			double phoneDuration = 0.001 * (new Double(e.getAttribute("d"))).doubleValue();

			Pattern p = Pattern.compile("(\\d+,\\d+)");

			// Split input with the pattern
			Matcher m = p.matcher(e.getAttribute("f0"));
			String setF0String = "";
			while (m.find()) {
				String[] f0Values = (m.group().trim()).split(",");
				Integer percent = new Integer(f0Values[0]);
				Integer f0Value = new Integer(f0Values[1]);
				double partPhone = phoneDuration * (percent.doubleValue() / 100.0);

				int placeIndex = (int) Math.floor(((((phoneEndTime - phoneDuration) - fStart) + partPhone) * 100)
						/ (double) duration);
				if (placeIndex >= 100) {
					placeIndex = 99;
				}
				setF0String = setF0String + "(" + percent + "," + (int) contour[placeIndex] + ")";

			}

			e.setAttribute("f0", setF0String);
		}
	}

	/**
	 * Set modifications to base contour (first order polynomial fit contour)
	 * 
	 * @param polyValues
	 *            polyValues
	 * @param contourAttribute
	 *            contourAttribute
	 * @param pitchAttribute
	 *            pitchAttribute
	 * @return polyValues
	 */
	private double[] setBaseContourModifications(double[] polyValues, String contourAttribute, String pitchAttribute) {

		if (pitchAttribute != null && !"".equals(pitchAttribute)) {
			polyValues = setPitchSpecifications(polyValues, pitchAttribute);
		}

		if (contourAttribute != null && !"".equals(contourAttribute)) {
			polyValues = setContourSpecifications(polyValues, contourAttribute);
		}

		return polyValues;
	}

	/**
	 * Set all specifications to original contour
	 * 
	 * @param polyValues
	 *            polyValues
	 * @param contourAttribute
	 *            contourAttribute
	 * @return modifiedF0Values
	 */
	private double[] setContourSpecifications(double[] polyValues, String contourAttribute) {

		Map f0Specifications = getContourSpecifications(contourAttribute);
		Iterator it = f0Specifications.keySet().iterator();
		double[] modifiedF0Values = new double[100];
		Arrays.fill(modifiedF0Values, 0.0);

		if (polyValues.length != modifiedF0Values.length) {
			throw new RuntimeException("The lengths of two arrays are not same!");
		}

		modifiedF0Values[0] = polyValues[0];
		modifiedF0Values[modifiedF0Values.length - 1] = polyValues[modifiedF0Values.length - 1];

		while (it.hasNext()) {

			String percent = it.next();
			String f0Value = f0Specifications.get(percent);

			int percentDuration = (new Integer(percent.substring(0, percent.length() - 1))).intValue();

			// System.out.println( percent + " " + f0Value );

			if (f0Value.startsWith("+")) {
				if (f0Value.endsWith("%")) {
					double f0Mod = (new Double(f0Value.substring(1, f0Value.length() - 1))).doubleValue();
					modifiedF0Values[percentDuration] = polyValues[percentDuration]
							+ (polyValues[percentDuration] * (f0Mod / 100.0));
				} else if (f0Value.endsWith("Hz")) {
					int f0Mod = (new Integer(f0Value.substring(1, f0Value.length() - 2))).intValue();
					modifiedF0Values[percentDuration] = polyValues[percentDuration] + f0Mod;
				}
			} else if (f0Value.startsWith("-")) {
				if (f0Value.endsWith("%")) {
					double f0Mod = (new Double(f0Value.substring(1, f0Value.length() - 1))).doubleValue();
					modifiedF0Values[percentDuration] = polyValues[percentDuration]
							- (polyValues[percentDuration] * (f0Mod / 100.0));

				} else if (f0Value.endsWith("Hz")) {
					int f0Mod = (new Integer(f0Value.substring(1, f0Value.length() - 2))).intValue();
					modifiedF0Values[percentDuration] = polyValues[percentDuration] - f0Mod;
				}
			}
		}

		modifiedF0Values = interpolateNonZeroValues(modifiedF0Values);

		return modifiedF0Values;

	}

	/**
	 * set pitch specifications: Ex: pitch="+20%" or pitch="+50Hz"
	 * 
	 * @param polyValues
	 *            polyValues
	 * @param pitchAttribute
	 *            pitchAttribute
	 * @return polyValues
	 */
	private double[] setPitchSpecifications(double[] polyValues, String pitchAttribute) {

		boolean positivePitch = pitchAttribute.startsWith("+");
		double modificationPitch = (new Integer(pitchAttribute.substring(1, pitchAttribute.length() - 1))).doubleValue();

		if (pitchAttribute.startsWith("+")) {
			if (pitchAttribute.endsWith("%")) {
				for (int i = 0; i < polyValues.length; i++) {
					polyValues[i] = polyValues[i] + (polyValues[i] * (modificationPitch / 100.0));
				}
			} else if (pitchAttribute.endsWith("Hz")) {
				for (int i = 0; i < polyValues.length; i++) {
					polyValues[i] = polyValues[i] + modificationPitch;
				}
			}
		} else if (pitchAttribute.startsWith("-")) {
			if (pitchAttribute.endsWith("%")) {
				for (int i = 0; i < polyValues.length; i++) {
					polyValues[i] = polyValues[i] - (polyValues[i] * (modificationPitch / 100.0));
				}
			} else if (pitchAttribute.endsWith("Hz")) {
				for (int i = 0; i < polyValues.length; i++) {
					polyValues[i] = polyValues[i] - modificationPitch;
				}
			}
		}

		return polyValues;

	}

	/**
	 * to get contour specifications into MAP
	 * 
	 * @param attribute
	 *            attribute
	 * @return f0Map
	 */
	private Map getContourSpecifications(String attribute) {

		Map f0Map = new HashMap();
		Pattern p = Pattern.compile("(\\d+%,[+|-]\\d+[%|Hz])");

		// Split input with the pattern
		Matcher m = p.matcher(attribute);
		while (m.find()) {
			// System.out.println(m.group());
			String[] f0Values = (m.group().trim()).split(",");
			f0Map.put(f0Values[0], f0Values[1]);
		}
		return f0Map;
	}

	/**
	 * To interpolate Zero values with respect to NonZero values
	 * 
	 * @param contour
	 *            contour
	 * @return contour
	 */
	private double[] interpolateNonZeroValues(double[] contour) {

		for (int i = 0; i < contour.length; i++) {
			if (contour[i] == 0) {
				int index = findNextIndexNonZero(contour, i);
				// System.out.println("i: "+i+"index: "+index);
				if (index == -1) {
					for (int j = i; j < contour.length; j++) {
						contour[j] = contour[j - 1];
					}
					break;
				} else {
					for (int j = i; j < index; j++) {
						// contour[j] = contour[i-1] * (index - j) + contour[index] * (j - (i-1)) / ( index - i );
						if (i == 0) {
							contour[j] = contour[index];
						} else {
							contour[j] = contour[j - 1] + ((contour[index] - contour[i - 1]) / (index - i));
						}
					}
					i = index - 1;
				}
			}
		}

		return contour;
	}

	/**
	 * To find next NonZero index
	 * 
	 * @param contour
	 *            contour
	 * @param current
	 *            current
	 * @return -1
	 */
	private int findNextIndexNonZero(double[] contour, int current) {
		for (int i = current + 1; i < contour.length; i++) {
			if (contour[i] != 0) {
				return i;
			}
		}
		return -1;
	}

	/**
	 * get Continuous contour from "ph" nodelist
	 * 
	 * @param nl
	 *            nl
	 * @return contour
	 */
	private double[] getContiniousContour(NodeList nl) {

		Element firstElement = (Element) nl.item(0);
		Element lastElement = (Element) nl.item(nl.getLength() - 1);

		double[] contour = new double[100]; // Assume contour has 100 frames
		Arrays.fill(contour, 0.0);

		double fEnd = (new Double(firstElement.getAttribute("end"))).doubleValue();
		double fDuration = 0.001 * (new Double(firstElement.getAttribute("d"))).doubleValue();
		double lEnd = (new Double(lastElement.getAttribute("end"))).doubleValue();
		double fStart = fEnd - fDuration; // 'prosody' tag starting point
		double duration = lEnd - fStart; // duaration of 'prosody' modification request

		Map f0Map;

		for (int i = 0; i < nl.getLength(); i++) {
			Element e = (Element) nl.item(i);
			String f0Attribute = e.getAttribute("f0");

			if (f0Attribute == null || "".equals(f0Attribute)) {
				continue;
			}

			double phoneEndTime = (new Double(e.getAttribute("end"))).doubleValue();
			double phoneDuration = 0.001 * (new Double(e.getAttribute("d"))).doubleValue();
			// double localStartTime = endTime - phoneDuration;

			f0Map = getPhoneF0Data(e.getAttribute("f0"));

			Iterator it = f0Map.keySet().iterator();
			while (it.hasNext()) {
				Integer percent = it.next();
				Integer f0Value = f0Map.get(percent);
				double partPhone = phoneDuration * (percent.doubleValue() / 100.0);
				int placeIndex = (int) Math.floor(((((phoneEndTime - phoneDuration) - fStart) + partPhone) * 100)
						/ (double) duration);
				if (placeIndex >= 100) {
					placeIndex = 99;
				}
				contour[placeIndex] = f0Value.doubleValue();
			}
		}

		return contour;
	}

	/**
	 * Get f0 specifications in HashMap
	 * 
	 * @param attribute
	 *            attribute
	 * @return f0Map
	 */
	private Map getPhoneF0Data(String attribute) {

		Map f0Map = new HashMap();
		Pattern p = Pattern.compile("(\\d+,\\d+)");

		// Split input with the pattern
		Matcher m = p.matcher(attribute);
		while (m.find()) {
			String[] f0Values = (m.group().trim()).split(",");
			f0Map.put(new Integer(f0Values[0]), new Integer(f0Values[1]));
		}

		// attribute.split(regex)
		return f0Map;

	}

	/**
	 * Parse Mary context features. For each triphone model in the file, it creates a Model object in a linked list of Model
	 * objects -> UttModel um It also estimates state duration from state duration model (Gaussian). For each model in the vector,
	 * the mean and variance of the DUR and LF0 are searched in the ModelSet and copied in each triphone model.
	 * 
	 * @param s
	 *            s
	 * @param um
	 *            um
	 * @param htsData
	 *            htsData
	 * @param cart
	 *            cart
	 * @throws Exception
	 *             Exception
	 */
	private String processUtt(Scanner s, HTSUttModel um, HMMData htsData, CartTreeSet cart) throws Exception {
		int i, mstate, frame, k, statesDuration, newStateDuration;
		;
		HTSModel m; /* current model, corresponds to a line in label file */
		String nextLine;
		double diffdurOld = 0.0;
		double diffdurNew = 0.0;
		float fperiodmillisec = ((float) htsData.getFperiod() / (float) htsData.getRate()) * 1000;
		float fperiodsec = ((float) htsData.getFperiod() / (float) htsData.getRate());
		Integer dur;
		boolean firstPh = true;
		boolean lastPh = false;

		Float durSec;
		Integer numLab = 0;
		FeatureVector fv;
		FeatureDefinition feaDef = htsData.getFeatureDefinition();

		/* Skip mary context features definition */
		while (s.hasNext()) {
			nextLine = s.nextLine();
			if (nextLine.trim().equals(""))
				break;
		}
		/* skip until byte values */
		int numLines = 0;
		while (s.hasNext()) {
			nextLine = s.nextLine();
			if (nextLine.trim().equals(""))
				break;
			numLines++;
		}

		/* Parse byte values */
		i = 0;
		while (s.hasNext()) {
			nextLine = s.nextLine();
			// System.out.println("STR: " + nextLine);

			fv = feaDef.toFeatureVector(0, nextLine);
			um.addUttModel(new HTSModel(cart.getNumStates()));
			m = um.getUttModel(i);
			/* this function also sets the phone name, the phone between - and + */
			m.setPhoneName(fv.getFeatureAsString(feaDef.getFeatureIndex("phone"), feaDef));

			if (!(s.hasNext()))
				lastPh = true;

			// Determine state-level duration
			// Estimate state duration from state duration model (Gaussian)
			diffdurNew = cart.searchDurInCartTree(m, fv, htsData, firstPh, lastPh, diffdurOld);
			um.setTotalFrame(um.getTotalFrame() + m.getTotalDur());

			// Set realised durations in model
			m.setTotalDurMillisec((int) (fperiodmillisec * m.getTotalDur()));
			diffdurOld = diffdurNew;
			durSec = um.getTotalFrame() * fperiodsec;

			numLab++;
			dur = m.getTotalDurMillisec();
			um.concatRealisedAcoustParams(m.getPhoneName() + " " + dur.toString() + "\n");
			// System.out.println("phone=" + m.getPhoneName() + " dur=" + m.getTotalDur() +" durTotal=" + um.getTotalFrame() );

			/*
			 * Find pdf for LF0, this function sets the pdf for each state. here the model (phone) is defined as voiced or
			 * unvoiced.
			 */
			cart.searchLf0InCartTree(m, fv, feaDef, htsData.getUV());

			/* increment number of models in utterance model */
			um.setNumModel(um.getNumModel() + 1);
			/* update number of states */
			um.setNumState(um.getNumState() + cart.getNumStates());
			i++;

			if (firstPh)
				firstPh = false;
		}

		for (i = 0; i < um.getNumUttModel(); i++) {
			m = um.getUttModel(i);
			for (mstate = 0; mstate < cart.getNumStates(); mstate++)
				for (frame = 0; frame < m.getDur(mstate); frame++)
					if (m.getVoiced(mstate))
						um.setLf0Frame(um.getLf0Frame() + 1);
			// System.out.println("Vector m[" + i + "]=" + m.getPhoneName() );
		}

		return um.getRealisedAcoustParams();

	} /* method _ProcessUtt */

	/***
	 * Generate F0 values for voiced frames out of HMMs
	 * 
	 * @param um
	 *            HTSUttModel, linked list of model objects
	 * @param htsData
	 *            HMMData
	 * @return f0Values
	 * @throws Exception
	 *             Exception
	 */
	public String HmmF0Generation(HTSUttModel um, HMMData htsData) throws Exception {

		int frame, uttFrame, lf0Frame;
		int hmmState, k, n, i;
		boolean nobound;
		HTSModel m;
		HTSPStream lf0Pst = null;
		boolean voiced[];
		CartTreeSet ms = htsData.getCartTreeSet();

		/* for lf0 count just the number of lf0frames that are voiced or non-zero */
		lf0Pst = new HTSPStream(ms.getLf0Stream(), um.getLf0Frame(), HMMData.FeatureType.LF0, 200);

		uttFrame = lf0Frame = 0;
		voiced = new boolean[um.getTotalFrame()];

		for (i = 0; i < um.getNumUttModel(); i++) {
			m = um.getUttModel(i);
			for (hmmState = 0; hmmState < ms.getNumStates(); hmmState++)
				for (frame = 0; frame < m.getDur(hmmState); frame++) {
					voiced[uttFrame] = m.getVoiced(hmmState);
					uttFrame++;
					if (m.getVoiced(hmmState))
						lf0Frame++;
				}
		}

		uttFrame = 0;
		lf0Frame = 0;
		/* copy pdfs */
		for (i = 0; i < um.getNumUttModel(); i++) {
			m = um.getUttModel(i);
			for (hmmState = 0; hmmState < ms.getNumStates(); hmmState++) {
				for (frame = 0; frame < m.getDur(hmmState); frame++) {

					// System.out.println("uttFrame=" + uttFrame + "  phone frame=" + frame + "  phone hmmState=" + hmmState);
					/* copy pdfs for lf0 */
					for (k = 0; k < ms.getLf0Stream(); k++) {
						int lw = lf0Pst.getDWLeftBoundary(k);
						int rw = lf0Pst.getDWRightBoundary(k);
						nobound = true;
						/* check if current frame is voiced/unvoiced boundary or not */
						for (n = lw; n <= rw; n++)
							if ((uttFrame + n) <= 0 || um.getTotalFrame() <= (uttFrame + n))
								nobound = false;
							else
								nobound = (nobound && voiced[uttFrame + n]);
						/* copy pdfs */
						if (voiced[uttFrame]) {
							lf0Pst.setMseq(lf0Frame, k, m.getLf0Mean(hmmState, k));
							if (nobound || k == 0)
								lf0Pst.setIvseq(lf0Frame, k, HTSParameterGeneration.finv(m.getLf0Variance(hmmState, k)));
							else
								/* the variances for dynamic feature are set to inf on v/uv boundary */
								lf0Pst.setIvseq(lf0Frame, k, 0.0);
						}
					}
					if (voiced[uttFrame])
						lf0Frame++;
					uttFrame++;
				} /* for each frame in this hmmState */
			} /* for each hmmState in this model */
		} /* for each model in this utterance */

		// System.out.println("After copying pdfs to PStreams uttFrame=" + uttFrame + " lf0frame=" + lf0Frame);
		// System.out.println("mseq[" + uttFrame + "][" + k + "]=" + mceppst.get_mseq(uttFrame, k) + "   " +
		// m.get_mcepmean(hmmState, k));

		double f0s[] = new double[voiced.length];
		i = 0;
		if (lf0Frame > 0) {
			logger.info("Parameter generation for LF0: ");
			lf0Pst.mlpg(htsData, htsData.getUseGV());
			for (int t = 0; t < voiced.length; t++) {
				if (voiced[t]) {
					f0s[t] = Math.exp(lf0Pst.getPar(i, 0));
					// f0s[t] = lf0Pst.getPar(i,0);
					i++;
				} else
					f0s[t] = 0.0;
				// System.out.println("GEN f0s[" + t + "]=" + f0s[t]);
			}
		}

		double totalDur;
		int totalFrames;
		String f0Values = "";
		int t = 0; // total number of frames voiced and unvoiced
		for (i = 0; i < um.getNumUttModel(); i++) {
			m = um.getUttModel(i);
			f0Values += m.getPhoneName() + " " + m.getTotalDurMillisec() + " ";
			// System.out.println(m.getPhoneName() + " dur=" + m.getTotalDurMillisec() + " No. frames=" + m.getTotalDur());
			totalDur = m.getTotalDur();
			totalFrames = 0;
			/**
			 * Here I need to check if the phone, or model is voiced or not. A model has five states and each state can be voiced
			 * or unvoiced, normally if the phone is voiced the majority of the states should be voiced
			 */
			if (checkModelVoiced(m, ms.getNumStates())) // if the majority of the model states are voiced
			{
				for (int j = 0; j < ms.getNumStates(); j++) {
					// System.out.print("  state=" + j);

					for (frame = 0; frame < m.getDur(j); frame++) {
						totalFrames++;
						// System.out.format("(%d frame=%d=%.2f ) %.2f ", t, totalFrames, (totalFrames/totalDur)*100, f0s[t]);
						if (f0s[t] > 0.0) // there are some phoneme states that might contain voiced and unvoiced frames, the
											// unvoiced frames have f0=0.0
							f0Values += "(" + Integer.toString((int) ((totalFrames / totalDur) * 100)) + ","
									+ Integer.toString((int) f0s[t]) + ")";
						t++;
					} // for each frame in this hmmState
						// System.out.println();
				} // for each hmmState in this model

			} else { // if the majority of the model states are unvoiced
				t = t + m.getTotalDur();
				f0Values += "0";
			}

			f0Values += "\n";

		} // for each model in this utterance

		// System.out.println(f0Values);
		return (f0Values);

	} /* method HmmF0Generation */

	/***
	 * Set durations
	 * 
	 * @param tw
	 *            tw
	 * @param durations
	 *            durations
	 * @throws SynthesisException
	 *             SynthesisException
	 */
	public void setActualDurations(TreeWalker tw, String durations) throws SynthesisException {
		int i, j, index;
		NodeList no1, no2;
		NamedNodeMap att;
		Scanner s = null;
		Vector ph = new Vector();
		Vector dur = new Vector(); // individual durations, in millis
		String line, str[];
		float totalDur = 0f; // total duration, in seconds

		s = new Scanner(durations).useDelimiter("\n");
		while (s.hasNext()) {
			line = s.next();
			str = line.split(" ");
			// --- not needed ph.add(PhoneTranslator.replaceBackTrickyPhones(str[0]));
			ph.add(str[0]);
			dur.add(Integer.valueOf(str[1]));
		}
		/* the duration of the first phone includes the duration of the initial pause */
		if (dur.size() > 1 && ph.get(0).contentEquals("_")) {
			dur.set(1, (dur.get(1) + dur.get(0)));
			ph.set(0, "");
			/* remove this element of the vector otherwise next time it will return the same */
			ph.set(0, "");
		}

		Element e;
		tw.setCurrentNode(tw.getRoot());
		while ((e = (Element) tw.nextNode()) != null) {
			// System.out.println("TAG: " + e.getTagName() + " LocalName=" + e.getLocalName() + " NodeName=" + e.getNodeName());
			if (e.getTagName().equals(MaryXML.PHONE)) {
				Element phone = e;
				String p = phone.getAttribute("p");
				index = ph.indexOf(p);
				int currentDur = dur.elementAt(index);
				totalDur += currentDur * 0.001f;
				phone.setAttribute("d", String.valueOf(currentDur));
				phone.setAttribute("end", String.valueOf(totalDur));
				// remove this element of the vector otherwise next time it will return the same
				ph.set(index, "");
			} else if (e.getTagName().contentEquals(MaryXML.BOUNDARY)) {
				int breakindex = 0;
				try {
					breakindex = Integer.parseInt(e.getAttribute("breakindex"));
				} catch (NumberFormatException nfe) {
				}
				if (e.hasAttribute("duration") || breakindex >= 3) {
					index = ph.indexOf("_");
					int currentDur = dur.elementAt(index);
					totalDur += currentDur * 0.001f;
					e.setAttribute("duration", String.valueOf(currentDur));
					// remove this element of the vector otherwise next time it will return the same
					ph.set(index, "");
				}
			} // else ignore whatever other label...

		}
	}

	/***
	 * Set durations and f0 values The meaning of f0="(X,Y)" is: at X% of the phone duration, the F0 value is Y Hz.
	 * 
	 * @param tw
	 *            treewalker
	 * @param durF0s
	 *            String containing in each line one phoneme its duration and its F0 values if it is voiced or 0 if it is unvoiced
	 * @throws SynthesisException
	 *             SynthesisException
	 */
	public void setActualDurationsAndF0s(TreeWalker tw, String durF0s) throws SynthesisException {
		int i, j, index;
		NodeList no1, no2;
		NamedNodeMap att;
		Scanner s = null;
		Vector ph = new Vector();
		Vector dur = new Vector(); // individual durations, in millis
		Vector f0 = new Vector();
		String line, str[];
		float totalDur = 0f; // total duration, in seconds

		s = new Scanner(durF0s).useDelimiter("\n");
		while (s.hasNext()) {
			line = s.next();
			str = line.split(" ");
			// --- not needed ph.add(PhoneTranslator.replaceBackTrickyPhones(str[0]));
			ph.add(str[0]);
			dur.add(Integer.valueOf(str[1]));
			f0.add(str[2]);
		}
		/* the duration of the first phone includes the duration of the initial pause */
		if (dur.size() > 1 && ph.get(0).contentEquals("_")) {
			dur.set(1, (dur.get(1) + dur.get(0)));
			ph.set(0, "");
			/* remove this element of the vector otherwise next time it will return the same */
			ph.set(0, "");
		}

		String f0IniMidEndStr;
		int numPh = 1; // because the first one is _ (sil)
		Element e;
		tw.setCurrentNode(tw.getRoot());
		while ((e = (Element) tw.nextNode()) != null) {
			// System.out.println("TAG: " + e.getTagName() + " LocalName=" + e.getLocalName() + " NodeName=" + e.getNodeName());
			if (e.getTagName().equals(MaryXML.PHONE)) {
				numPh++;

				Element phone = e;
				String p = phone.getAttribute("p");
				index = ph.indexOf(p);
				int currentDur = dur.elementAt(index);
				String currentF0 = f0.elementAt(index);
				totalDur += currentDur * 0.001f;
				phone.setAttribute("d", String.valueOf(currentDur));
				phone.setAttribute("end", String.valueOf(totalDur));
				if (!currentF0.contentEquals("0"))
					phone.setAttribute("f0", currentF0);
				// remove this element of the vector otherwise next time it will return the same
				ph.set(index, "");
			} else if (e.getTagName().contentEquals(MaryXML.BOUNDARY)) {
				int breakindex = 0;
				try {
					breakindex = Integer.parseInt(e.getAttribute("breakindex"));
				} catch (NumberFormatException nfe) {
				}
				if (e.hasAttribute("duration") || breakindex >= 3) {
					index = ph.indexOf("_");
					int currentDur = dur.elementAt(index);
					totalDur += currentDur * 0.001f;
					e.setAttribute("duration", String.valueOf(currentDur));
					// remove this element of the vector otherwise next time it will return the same
					ph.set(index, "");
				}
			} // else ignore whatever other label...

		}
	}

	private boolean checkModelVoiced(HTSModel m, int numStates) {
		int numVoiced = 0;
		int numUnvoiced = 0;
		for (int i = 0; i < numStates; i++) {
			if (m.getVoiced(i))
				numVoiced++;
			else
				numUnvoiced++;
		}
		if (numVoiced >= numUnvoiced) {
			// System.out.println(m.getPhoneName() + " is voiced" + "(" + numVoiced + ":" + numUnvoiced + ")");
			return true;
		} else {
			// System.out.println(m.getPhoneName() + " is unvoiced" + "(" + numVoiced + ":" + numUnvoiced + ")");
			return false;
		}

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy