All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.unitselection.UnitSelectionSynthesizer Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.unitselection;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;

import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.datatypes.MaryXML;
import marytts.exceptions.SynthesisException;
import marytts.modules.synthesis.Voice;
import marytts.modules.synthesis.WaveformSynthesizer;
import marytts.modules.synthesis.Voice.Gender;
import marytts.server.MaryProperties;
import marytts.unitselection.concat.UnitConcatenator;
import marytts.unitselection.concat.BaseUnitConcatenator.UnitData;
import marytts.unitselection.data.Unit;
import marytts.unitselection.data.UnitDatabase;
import marytts.unitselection.select.HalfPhoneTarget;
import marytts.unitselection.select.SelectedUnit;
import marytts.unitselection.select.Target;
import marytts.unitselection.select.UnitSelector;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryNormalisedWriter;
import marytts.util.dom.NameNodeFilter;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.w3c.dom.Element;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.TreeWalker;

/**
 * Builds and synthesizes unit selection voices
 * 
 * @author Marc Schröder, Anna Hunecke
 *
 */

public class UnitSelectionSynthesizer implements WaveformSynthesizer {
	/**
	 * A map with Voice objects as keys, and Lists of UtteranceProcessors as values. Idea: For a given voice, find the list of
	 * utterance processors to apply.
	 */
	private Logger logger;

	public UnitSelectionSynthesizer() {
	}

	/**
	 * Start up the waveform synthesizer. This must be called once before calling synthesize().
	 * 
	 * @throws Exception
	 *             Exception
	 */
	public void startup() throws Exception {
		logger = MaryUtils.getLogger("UnitSelectionSynthesizer");
		// Register UnitSelection voices:
		logger.debug("Register UnitSelection voices:");
		List voiceNames = MaryProperties.getList("unitselection.voices.list");
		for (String voiceName : voiceNames) {
			long time = System.currentTimeMillis();
			Voice unitSelVoice = new UnitSelectionVoice(voiceName, this);
			logger.debug("Voice '" + unitSelVoice + "'");
			Voice.registerVoice(unitSelVoice);
			long newtime = System.currentTimeMillis() - time;
			logger.info("Loading of voice " + voiceName + " took " + newtime + " milliseconds");
		}
		logger.info("started.");
	}

	/**
	 * Perform a power-on self test by processing some example input data.
	 * 
	 * @throws Error
	 *             if the module does not work properly.
	 */
	public void powerOnSelfTest() throws Error {
		try {
			Collection myVoices = Voice.getAvailableVoices(this);
			if (myVoices.size() == 0) {
				return;
			}
			UnitSelectionVoice unitSelVoice = (UnitSelectionVoice) myVoices.iterator().next();
			assert unitSelVoice != null;
			MaryData in = new MaryData(MaryDataType.get("ACOUSTPARAMS"), unitSelVoice.getLocale());
			if (!unitSelVoice.getDomain().equals("general")) {
				logger.info("Cannot perform power-on self test using limited-domain voice '" + unitSelVoice.getName()
						+ "' - skipping.");
				return;
			}
			String exampleText = MaryDataType.ACOUSTPARAMS.exampleText(unitSelVoice.getLocale());
			if (exampleText != null) {
				in.readFrom(new StringReader(exampleText));
				in.setDefaultVoice(unitSelVoice);
				if (in == null) {
					System.out.println(exampleText + " is null");
				}
				List tokensAndBoundaries = new ArrayList();
				TreeWalker tw = ((DocumentTraversal) in.getDocument()).createTreeWalker(in.getDocument(),
						NodeFilter.SHOW_ELEMENT, new NameNodeFilter(new String[] { MaryXML.TOKEN, MaryXML.BOUNDARY }), false);
				Element el = null;
				while ((el = (Element) tw.nextNode()) != null)
					tokensAndBoundaries.add(el);
				AudioInputStream ais = synthesize(tokensAndBoundaries, unitSelVoice, null);
				assert ais != null;
			} else {
				logger.debug("No example text -- no power-on self test!");
			}
		} catch (Throwable t) {
			t.printStackTrace();
			throw new Error("Module " + toString() + ": Power-on self test failed.", t);
		}
		logger.info("Power-on self test complete.");
	}

	/**
	 * {@inheritDoc}
	 * 
	 * @param tokensAndBoundaries
	 *            tokensAndBoundaries
	 * @param voice
	 *            voice
	 * @param outputParams
	 *            outputParams
	 * @throws SynthesisException
	 *             SynthesisException
	 * @return audio
	 */
	public AudioInputStream synthesize(List tokensAndBoundaries, Voice voice, String outputParams)
			throws SynthesisException {
		assert voice instanceof UnitSelectionVoice;
		UnitSelectionVoice v = (UnitSelectionVoice) voice;
		UnitDatabase udb = v.getDatabase();
		// Select:
		UnitSelector unitSel = v.getUnitSelector();
		UnitConcatenator unitConcatenator;
		if (outputParams != null && outputParams.contains("MODIFICATION")) {
			unitConcatenator = v.getModificationConcatenator();
		} else {
			unitConcatenator = v.getConcatenator();
		}
		// TODO: check if we actually need to access v.getDatabase() here
		UnitDatabase database = v.getDatabase();
		logger.debug("Selecting units with a " + unitSel.getClass().getName() + " from a " + database.getClass().getName());
		List selectedUnits = unitSel.selectUnits(tokensAndBoundaries, voice);
		// if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
		// StringWriter sw = new StringWriter();
		// PrintWriter pw = new PrintWriter(sw);
		// for (Iterator selIt=selectedUnits.iterator(); selIt.hasNext(); )
		// pw.println(selIt.next());
		// logger.debug("Units selected:\n"+sw.toString());
		// }

		// Concatenate:
		logger.debug("Now creating audio with a " + unitConcatenator.getClass().getName());
		AudioInputStream audio = null;
		try {
			audio = unitConcatenator.getAudio(selectedUnits);
		} catch (IOException ioe) {
			StringWriter sw = new StringWriter();
			PrintWriter pw = new PrintWriter(sw);
			for (Iterator selIt = selectedUnits.iterator(); selIt.hasNext();)
				pw.println(selIt.next());
			throw new SynthesisException("Problems generating audio for unit chain: " + sw.toString(), ioe);
		}

		// Propagate unit durations to XML tree:
		float endInSeconds = 0;
		float durLeftHalfInSeconds = 0;
		String unitString = "";
		String unitAttrName = "units"; // name of the attribute that is added for unit selection diagnostics
		for (SelectedUnit su : selectedUnits) {
			Target t = su.getTarget();
			boolean halfphone = (t instanceof HalfPhoneTarget);
			Object concatenationData = su.getConcatenationData();
			assert concatenationData instanceof UnitData;
			UnitData unitData = (UnitData) concatenationData;
			Unit unit = su.getUnit();

			// For the unit durations, keep record in floats because of precision;
			// convert to millis only at export time, and re-compute duration in millis
			// from the end in millis, to avoid discrepancies due to rounding
			int unitDurationInSamples = unitData.getUnitDuration();
			float unitDurationInSeconds = unitDurationInSamples / (float) database.getUnitFileReader().getSampleRate();
			int prevEndInMillis = (int) (1000 * endInSeconds);
			endInSeconds += unitDurationInSeconds;
			int endInMillis = (int) (1000 * endInSeconds);
			int unitDurationInMillis = endInMillis - prevEndInMillis;
			unitString = t.getName() + " " + udb.getFilename(unit) + " " + unit.index + " " + unitDurationInSeconds;
			if (halfphone) {
				if (((HalfPhoneTarget) t).isLeftHalf()) {
					durLeftHalfInSeconds = unitDurationInSeconds;
				} else { // right half
					// re-compute unit duration from both halves
					float totalUnitDurInSeconds = durLeftHalfInSeconds + unitDurationInSeconds;
					float prevEndInSeconds = endInSeconds - totalUnitDurInSeconds;
					prevEndInMillis = (int) (1000 * prevEndInSeconds);
					unitDurationInMillis = endInMillis - prevEndInMillis;
					durLeftHalfInSeconds = 0;
				}
			}

			Element maryxmlElement = t.getMaryxmlElement();
			if (maryxmlElement != null) {
				if (maryxmlElement.getNodeName().equals(MaryXML.PHONE)) {
					if (!maryxmlElement.hasAttribute("d") || !maryxmlElement.hasAttribute("end")) {
						throw new IllegalStateException("No duration information in MaryXML -- check log file"
								+ " for messages warning about unloadable acoustic models"
								+ " instead of voice-specific acoustic feature predictors");
					}
					// int oldD = Integer.parseInt(maryxmlElement.getAttribute("d"));
					// int oldEnd = Integer.parseInt(maryxmlElement.getAttribute("end"));
					// double doubleEnd = Double.parseDouble(maryxmlElement.getAttribute("end"));
					// int oldEnd = (int)(doubleEnd * 1000);
					maryxmlElement.setAttribute("d", String.valueOf(unitDurationInMillis));
					maryxmlElement.setAttribute("end", String.valueOf(endInSeconds));
					// the following messes up all end values!
					// if (oldEnd == oldD) {
					// // start new end computation
					// endInSeconds = unitDurationInSeconds;
					// }
				} else { // not a PHONE
					assert maryxmlElement.getNodeName().equals(MaryXML.BOUNDARY);
					maryxmlElement.setAttribute("duration", String.valueOf(unitDurationInMillis));
				}
				if (maryxmlElement.hasAttribute(unitAttrName)) {
					String prevUnitString = maryxmlElement.getAttribute(unitAttrName);
					maryxmlElement.setAttribute(unitAttrName, prevUnitString + "; " + unitString);
				} else {
					maryxmlElement.setAttribute(unitAttrName, unitString);
				}
			} else {
				logger.debug("Unit " + su.getTarget().getName() + " of length " + unitDurationInMillis
						+ " ms has no maryxml element.");
			}
		}
		if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
			try {
				MaryNormalisedWriter writer = new MaryNormalisedWriter();
				ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
				writer.output(tokensAndBoundaries.get(0).getOwnerDocument(), debugOut);
				logger.debug("Propagating the realised unit durations to the XML tree: \n" + debugOut.toString());
			} catch (Exception e) {
				logger.warn("Problem writing XML to logfile: " + e);
			}
		}

		return audio;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy