All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.HalfPhoneUnitLabelComputer Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2000-2009 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.voiceimport;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import java.util.StringTokenizer;
import java.util.TreeMap;

import marytts.modules.phonemiser.Allophone;
import marytts.signalproc.analysis.EnergyContourRms;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;

import org.apache.commons.lang.ArrayUtils;

/**
 * Compute unit labels from phone labels.
 * 
 * @author schroed
 *
 */
public class HalfPhoneUnitLabelComputer extends PhoneUnitLabelComputer {

	private String ENERGYBASEDTRANSIENTSPLITTING = getName() + ".energyBasedTransientSplitting";
	private boolean energyBasedTransientSplitting;
	private String energyExt = ".energy";
	// these could be user configurable properties, but at this stage, it's too easy to screw up:
	private double windowSizeInSeconds = 0.005;
	private double skipSizeInSeconds = 0.0025;

	public String getName() {
		return "HalfPhoneUnitLabelComputer";
	}

	public HalfPhoneUnitLabelComputer() {
	}

	@Override
	protected void initialiseComp() throws Exception {
		super.initialiseComp();
		unitlabelDir = new File(db.getProp(DatabaseLayout.HALFPHONELABDIR));
		unitlabelExt = db.getProp(DatabaseLayout.HALFPHONELABEXT);
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			props.put(ENERGYBASEDTRANSIENTSPLITTING, "false");
		}
		return props;
	}

	protected void setupHelp() {
		props2Help = new TreeMap();
		props2Help.put(ENERGYBASEDTRANSIENTSPLITTING,
				"Whether to analyze energy in the speech signal to determine midpoints of transient phones (plosives).");
	}

	@Override
	public boolean compute() throws Exception {
		energyBasedTransientSplitting = Boolean.parseBoolean(db.getProperty(ENERGYBASEDTRANSIENTSPLITTING));
		return super.compute();
	}

	@Override
	protected List getMidTimes(List labels, List endTimes) {
		assert labels.size() == endTimes.size();

		List midTimes = new ArrayList(endTimes.size());
		double startTime = 0;
		for (int i = 0; i < labels.size(); i++) {
			String label = labels.get(i);
			double endTime = endTimes.get(i);

			boolean isTransient = false;
			double peakTime = Double.NaN;
			if (energyBasedTransientSplitting) {
				try {
					Allophone allophone = db.getAllophoneSet().getAllophone(label);
					isTransient = allophone.isPlosive() || allophone.isAffricate();
					if (isTransient) {
						peakTime = getEnergyPeak(startTime, endTime);
					}
				} catch (NullPointerException e) {
					// ignore for now
				} catch (IOException e) {
					// ignore for now
				}
			}

			double midTime;
			if (isTransient && !Double.isNaN(peakTime)) {
				midTime = peakTime;
			} else {
				midTime = (startTime + endTime) / 2;
			}
			midTimes.add(midTime);
			startTime = endTime;
		}
		return midTimes;
	}

	/**
	 * Get time of energy peak difference between startTime and endTime, based on energy analysis of the wav file for the current
	 * baseName.
	 * 

* The energy analysis (based on the provided parameters {@link #windowSizeInSeconds} and {@link #skipSizeInSeconds}) is saved * to a binary file, which is reused if present (and if the parameter values match those encountered in the file header). * * @param startTime * of energy analysis * @param endTime * of energy analysis * @return the time of the greatest increase in energy between startTime and endTime, or {@link Double#NaN} if no such time * can be determined from the signal (this is then handled in {@link #getMidTimes(List, List)}) * @throws IOException * if the energy analysis file cannot be read or (initially) created * @see EnergyContourRms#WriteEnergyFile(EnergyContourRms, String) */ private double getEnergyPeak(double startTime, double endTime) throws IOException { // determine wav file name and energy analysis file name: String wavDir = db.getProperty(DatabaseLayout.WAVDIR); String baseName = bnl.getName(basenameIndex); String wavExt = db.getProperty(DatabaseLayout.WAVEXT); File wavFile = new File(wavDir, baseName + wavExt); File energyFile = new File(unitlabelDir, baseName + energyExt); // load or create energy analysis file: EnergyContourRms energyContourRMS; try { energyContourRMS = EnergyContourRms.ReadEnergyFile(energyFile.getAbsolutePath()); if (energyContourRMS.header.windowSizeInSeconds != windowSizeInSeconds || energyContourRMS.header.skipSizeInSeconds != skipSizeInSeconds) { logger.debug("File header of " + energyFile.getAbsolutePath() + " has unexpected parameter values! Will re-analyze..."); throw new IOException(); } } catch (IOException e) { logger.info("Analyzing " + wavFile.getAbsolutePath() + " and saving result to " + energyFile.getAbsolutePath()); energyContourRMS = new EnergyContourRms(wavFile.getAbsolutePath(), energyFile.getAbsolutePath(), windowSizeInSeconds, skipSizeInSeconds); } // get energy analysis frames between startTime and endTime from energy contour: double[] energyContour = energyContourRMS.contour; int startFrame = SignalProcUtils.time2frameIndex(startTime, windowSizeInSeconds, skipSizeInSeconds); int endFrame = SignalProcUtils.time2frameIndex(endTime, windowSizeInSeconds, skipSizeInSeconds); double[] energyLocalContour = ArrayUtils.subarray(energyContour, startFrame, endFrame); // get framewise differences: double[] energyDiffs = MathUtils.diff(energyLocalContour); // we need more than one diff frame: if (energyDiffs.length < 2) { return Double.NaN; } // find frame index of peak diff: int peakLocalFrame = MathUtils.findGlobalPeakLocation(energyDiffs); int peakGlobalFrame = startFrame + peakLocalFrame; // convert frame index to time, adding half a window because diffs are between frames: double peakTime = SignalProcUtils.frameIndex2Time(peakGlobalFrame, windowSizeInSeconds, skipSizeInSeconds) + windowSizeInSeconds / 2; // adjust peak diff time to lie inside time range: if (peakTime < startTime) { peakTime = startTime; } else if (peakTime > endTime) { peakTime = endTime; } return peakTime; } @Override @Deprecated protected String[] toUnitLabels(String[] phoneLabels) { // We will create exactly two half phones for every phone: String[] halfPhoneLabels = new String[2 * phoneLabels.length]; float startTime = 0; int unitIndex = 0; for (int i = 0; i < phoneLabels.length; i++) { unitIndex++; StringTokenizer st = new StringTokenizer(phoneLabels[i]); String endTimeString = st.nextToken(); String dummyNumber = st.nextToken(); String phone = st.nextToken(); assert !st.hasMoreTokens(); float endTime = Float.parseFloat(endTimeString); float duration = endTime - startTime; assert duration > 0 : "Duration is not > 0 for phone " + i + " (" + phone + ")"; float midTime = startTime + duration / 2; String leftUnitLine = midTime + " " + unitIndex + " " + phone + "_L"; unitIndex++; String rightUnitLine = endTime + " " + unitIndex + " " + phone + "_R"; halfPhoneLabels[2 * i] = leftUnitLine; halfPhoneLabels[2 * i + 1] = rightUnitLine; startTime = endTime; } return halfPhoneLabels; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy