marytts.tools.analysis.MaryTranscriptionAligner Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of marytts-builder Show documentation
The newest version!
/**
 * 
 */
package marytts.tools.analysis;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;

import marytts.datatypes.MaryXML;
import marytts.exceptions.InvalidDataException;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.util.dom.MaryDomUtils;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.traversal.NodeIterator;
import org.w3c.dom.traversal.TreeWalker;

/**
 * @author marc
 * 
 */
public class MaryTranscriptionAligner extends TranscriptionAligner {

	private boolean insertDummyDurations = false;

	public MaryTranscriptionAligner() {
		super(null);
	}

	/**
	 * @param allophoneSet
	 *            allophoneSet
	 */
	public MaryTranscriptionAligner(AllophoneSet allophoneSet) {
		super(allophoneSet);
	}

	/**
	 * @param allophoneSet
	 *            allophoneSet
	 * @param insertDummyDurations
	 *            if true, in any inserted items, a duration of 1 millisecond will be set.
	 */
	public MaryTranscriptionAligner(AllophoneSet allophoneSet, boolean insertDummyDurations) {
		super(allophoneSet);
		this.insertDummyDurations = insertDummyDurations;
	}

	/**
	 * 
	 * This changes the transcription of a MARYXML document in ALLOPHONES format to match the label sequence given as the "labels"
	 * parameter. The symbols of the original transcription are aligned to corrected ones, with which they are replaced in turn.
	 * 
	 * @param allophones
	 *            the MARYXML document, in ALLOPHONES format
	 * @param labels
	 *            the sequence of label symbols to use, separated by the entry separator as provided by getEntrySeparator().
	 * @throws InvalidDataException
	 *             if a manual label is encountered that is not in the AllophoneSet
	 */
	public void alignXmlTranscriptions(Document allophones, String labels) throws InvalidDataException {
		// get all t and boundary elements
		NodeIterator tokenIt = MaryDomUtils.createNodeIterator(allophones, MaryXML.TOKEN, MaryXML.BOUNDARY);
		List tokens = new ArrayList();
		Element e;
		while ((e = (Element) tokenIt.nextNode()) != null) {
			tokens.add(e);
		}

		String orig = this.collectTranscription(allophones);

		System.err.println("Orig   : " + orig);
		System.err.println("Correct: " + labels);

		// now we align the transcriptions and split it at the delimiters
		String al = this.distanceAlign(orig.trim(), labels.trim()) + " ";

		System.err.println("Alignments: " + al);
		String[] alignments = al.split("#");

		// change the transcription in xml according to the aligned one
		changeTranscriptions(allophones, alignments);

		if (allophoneSet == null) { // cannot verify
			return;
		}
		// assert that all alignments should be in the AllophoneSet for this locale:
		HashSet manualLabelSet = new HashSet(Arrays.asList(al.trim().split("[#\\s]+")));
		try {
			for (String label : manualLabelSet) {
				allophoneSet.getAllophone(label);
			}
		} catch (IllegalArgumentException iae) {
			throw new InvalidDataException(iae.getMessage());
		}
	}

	/**
	 * 
	 * This computes a string of phonetic symbols out of an allophones xml: - standard phones are taken from "ph" elements in the
	 * document - after each token-element (except those followed by a "boundary"-element), a "bnd" symbol is inserted (standing
	 * for a possible pause). Entries are separated by the entrySeparator character.
	 * 
	 * @param doc
	 *            the document to analyse
	 * @return orig, converted into string
	 */
	private String collectTranscription(Document doc) {
		// String storing the original transcription begins with a pause
		StringBuilder orig = new StringBuilder();

		NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.PHONE, MaryXML.BOUNDARY);
		Element e;
		Element prevToken = null;
		boolean prevWasBoundary = false;
		while ((e = (Element) ni.nextNode()) != null) {
			if (e.getTagName().equals(MaryXML.PHONE)) {
				Element token = (Element) MaryDomUtils.getAncestor(e, MaryXML.TOKEN);
				if (token != prevToken && !prevWasBoundary) {
					if (orig.length() > 0)
						orig.append(entrySeparator);
					orig.append(possibleBnd);
				}
				if (orig.length() > 0)
					orig.append(entrySeparator);
				orig.append(e.getAttribute("p"));
				prevToken = token;
				prevWasBoundary = false;
			} else { // boundary
				if (orig.length() > 0)
					orig.append(entrySeparator);
				orig.append(possibleBnd);
				prevWasBoundary = true;
			}
		}

		return orig.toString();
	}

	/**
	 * 
	 * This changes the transcription according to a given sequence of phonetic symbols (including boundaries and pauses).
	 * Boundaries in doc are added or deleted as necessary to match the pause symbols in alignments.
	 * 
	 * @param doc
	 *            the document in which to change the transcriptions
	 * @param alignments
	 *            the aligned symbols to use in the update.
	 */
	private void changeTranscriptions(Document doc, String[] alignments) {
		// Algorithm:
		// * Go through  and  elements in doc on the one hand,
		// and through alignments on the other hand.
		// - Special steps for the first  in a token:
		// -> if the  is the first  in the current token,
		// and alignment is a pause symbol,
		// insert a new boundary before the token, and skip the alignment entry;
		// -> if the  is the first  in the current token,
		// and the alignment entry is empty, skip the alignment entry.
		// - for  elements:
		// -> if the alignment entry is empty, delete the  and,
		// if it was the only  in the current , also
		// delete the syllable;
		// -> else, use the current alignment entry, adding any 
		// elements as necessary.
		// - for  elements:
		// -> if symbol is pause, keep boundary;
		// -> if symbol is word separator, delete boundary.

		NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.PHONE, MaryXML.BOUNDARY);
		List origPhonesAndBoundaries = new ArrayList();
		// We make a copy of the list of original entries, because when
		// we add/remove entries later, that get the node iterator confused.
		Element elt;
		while ((elt = (Element) ni.nextNode()) != null) {
			origPhonesAndBoundaries.add(elt);
		}
		int iAlign = 0;
		Element prevToken = null;
		boolean prevWasBoundary = false;
		for (Element e : origPhonesAndBoundaries) {
			if (e.getTagName().equals(MaryXML.PHONE)) {
				boolean betweenTokens = false;
				Element token = (Element) MaryDomUtils.getAncestor(e, MaryXML.TOKEN);
				if (token != prevToken && !prevWasBoundary) {
					betweenTokens = true;
				}
				if (betweenTokens) {
					assert !prevWasBoundary;
					if (alignments[iAlign].trim().equals(possibleBnd)) {
						// Need to insert a boundary before token
						System.err.println("  inserted boundary in xml");
						Element b = MaryXML.createElement(doc, MaryXML.BOUNDARY);
						b.setAttribute("breakindex", "3");
						if (insertDummyDurations) {
							b.setAttribute("duration", "1");
						}
						token.getParentNode().insertBefore(b, token);
					} else if (!alignments[iAlign].trim().equals("")) {
						// one or more phones were inserted into the transcription
						// -- treat them as word-final, i.e. insert them into the last syllable in prevToken
						Element syllable = null;
						Element ref = null; // insert before null = insert at the end
						NodeList prevSyllables = null;
						// if there is an insertion at the beginning, we don't have a prevToken!
						if (prevToken != null) {
							prevSyllables = prevToken.getElementsByTagNameNS(MaryXML.getNamespace(), MaryXML.SYLLABLE);
						}
						if (prevSyllables != null && prevSyllables.getLength() > 0) { // insert at end of previous token
							syllable = (Element) prevSyllables.item(prevSyllables.getLength() - 1);
							ref = null;
						} else { // insert at beginning of current token
							syllable = (Element) e.getParentNode();
							ref = e; // insert before current phone
						}
						String[] newPh = alignments[iAlign].trim().split("\\s+");
						for (int i = 0; i < newPh.length; i++) {
							Element newPhElement = MaryXML.createElement(doc, MaryXML.PHONE);
							newPhElement.setAttribute("p", newPh[i]);
							syllable.insertBefore(newPhElement, ref);
							System.err.println(" inserted phone from transcription: " + newPh[i]);
							if (insertDummyDurations) {
								newPhElement.setAttribute("d", "1");
							}
						}
					} // else it is an empty word boundary marker
					iAlign++; // move beyond the marker between tokens
				}
				prevToken = token;
				prevWasBoundary = false;
				System.err.println("Ph = " + e.getAttribute("p") + ", align = " + alignments[iAlign]);
				if (alignments[iAlign].trim().equals("")) {
					// Need to delete the current  element
					Element syllable = (Element) e.getParentNode();
					assert syllable != null;
					assert syllable.getTagName().equals(MaryXML.SYLLABLE);
					syllable.removeChild(e);
					if (MaryDomUtils.getFirstElementByTagName(syllable, MaryXML.PHONE) == null) {
						// Syllable is now empty, need to delete it as well
						syllable.getParentNode().removeChild(syllable);
					}
				} else {
					// Replace , add siblings if necessary
					String[] newPh = alignments[iAlign].trim().split("\\s+");
					e.setAttribute("p", newPh[0]);
					if (newPh.length > 1) {
						// any ph to be added
						Element syllable = (Element) e.getParentNode();
						assert syllable != null;
						assert syllable.getTagName().equals(MaryXML.SYLLABLE);
						Node rightNeighbor = e.getNextSibling(); // can be null
						for (int i = 1; i < newPh.length; i++) {
							Element newPhElement = MaryXML.createElement(doc, MaryXML.PHONE);
							newPhElement.setAttribute("p", newPh[i]);
							syllable.insertBefore(newPhElement, rightNeighbor);
						}
					}
				}
			} else { // boundary
				System.err.println("Boundary, align = " + alignments[iAlign]);
				if (alignments[iAlign].trim().equals(possibleBnd)) {
					// keep boundary
				} else {
					// delete boundary
					System.err.println("  deleted boundary from xml");
					e.getParentNode().removeChild(e);
				}
				prevWasBoundary = true;
			}
			iAlign++;
		}
		updatePhAttributesFromPhElements(doc);
	}

	private void updatePhAttributesFromPhElements(Document doc) {
		NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.TOKEN);
		Element t;
		while ((t = (Element) ni.nextNode()) != null) {
			updatePhAttributesFromPhElements(t);
		}
	}

	private void updatePhAttributesFromPhElements(Element token) {
		if (token == null)
			throw new NullPointerException("Got null token");
		if (!token.getTagName().equals(MaryXML.TOKEN)) {
			throw new IllegalArgumentException("Argument should be a <" + MaryXML.TOKEN + ">, not a <" + token.getTagName() + ">");
		}
		StringBuilder tPh = new StringBuilder();
		TreeWalker sylWalker = MaryDomUtils.createTreeWalker(token, MaryXML.SYLLABLE);
		Element syl;
		while ((syl = (Element) sylWalker.nextNode()) != null) {
			StringBuilder sylPh = new StringBuilder();
			String stress = syl.getAttribute("stress");
			if (stress.equals("1"))
				sylPh.append("'");
			else if (stress.equals("2"))
				sylPh.append(",");
			TreeWalker phWalker = MaryDomUtils.createTreeWalker(syl, MaryXML.PHONE);
			Element ph;
			while ((ph = (Element) phWalker.nextNode()) != null) {
				if (sylPh.length() > 0)
					sylPh.append(" ");
				sylPh.append(ph.getAttribute("p"));
			}
			String sylPhString = sylPh.toString();
			syl.setAttribute("ph", sylPhString);
			if (tPh.length() > 0)
				tPh.append(" - ");
			tPh.append(sylPhString);
			if (syl.hasAttribute("tone")) {
				tPh.append(" " + syl.getAttribute("tone"));
			}
		}
		if (tPh.toString().length() > 0) {
			token.setAttribute("ph", tPh.toString());
		}
	}

}