marytts.modules.phonemiser.Syllabifier Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of marytts-runtime Show documentation
The newest version!
/**
 * Copyright 2000-2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.modules.phonemiser;

import java.util.LinkedList;
import java.util.ListIterator;

/**
 * @deprecated Use {@link AllophoneSet#syllabify(String)} instead.
 */
public class Syllabifier {
	protected AllophoneSet allophoneSet;
	protected boolean removeTrailingOneFromPhones = true;

	public Syllabifier(AllophoneSet allophoneSet, boolean removeTrailingOneFromPhones) {
		this.allophoneSet = allophoneSet;
		this.removeTrailingOneFromPhones = removeTrailingOneFromPhones;
	}

	public Syllabifier(AllophoneSet allophoneSet) {
		this.allophoneSet = allophoneSet;
	}

	/**
	 * Syllabify a phonetic string, marking syllable boundaries with dash characters in the output. If the input marks stressed
	 * vowels with a suffix "1", these marks are removed, and single quotes (') are inserted at the beginning of the corresponding
	 * syllable.
	 * 
	 * @param phoneString
	 *            the phone string to syllabify.
	 * @return a syllabified phone string, with space characters inserted between individual phone symbols
	 */
	public String syllabify(String phoneString) {
		LinkedList phoneList = splitIntoAllophones(phoneString);
		syllabify(phoneList);
		StringBuilder sb = new StringBuilder();
		for (String p : phoneList) {
			if (sb.length() > 0)
				sb.append(" ");
			sb.append(p);
		}
		return sb.toString();
	}

	/**
	 * Syllabify a LinkedList of phones. This is an implementation of the syllabification rules by Jürgen Trouvain.
	 * 
	 * @param phoneList
	 *            phoneList
	 * @return a LinkedList of phone strings with inserted "-" strings at syllable boundaries.
	 */
	public LinkedList syllabify(LinkedList phoneList) {
		// Regel(1a)
		// Jede Grenze einer morphologischen Wurzel stellt eine
		// Silbengrenze dar.
		// Regel(1b)
		// Jede Grenze eines Präfixes stellt eine Silbengrenze dar.
		// Dort, wo ein Fugensuffix bzw ein Suffix beginnt,
		// gibt es keine morphologische Silbengrenze.
		// Bsp: Lebens-gefaehrte und nicht Leben-s-gefaehrte
		// Bsp: Mei-nung und nicht Mein-ung

		// Note: We don't have morpheme boundaries! (=> so ignore rule 1)

		// 2.: finde nicht-morphologische Silbengrenzen
		// teile Woerter, die eine morphologisch bedingte Silbengrenze haben
		// in ihre Silbenteile, um dort spaeter nach weiteren,
		// nicht-morphologisch bedingten, Silbengrenzen zu suchen

		// Only one such component as long as we don't have morpheme boundaries

		if (phoneList == null) {
			throw new IllegalArgumentException("Cannot syllabify null string");
		}
		ListIterator it = phoneList.listIterator(0);
		if (!it.hasNext()) {
			return phoneList;
		}
		Allophone previous = getAllophone(it.next());
		boolean previousIsVowel = (previous != null && previous.sonority() >= 4);
		while (it.hasNext()) {
			Allophone next = getAllophone(it.next());
			boolean nextIsVowel = (next != null && next.sonority() >= 4);

			// Regel(5)
			// Wenn zwischen zwei Vokalen keine weiteren Phone sind,
			// dann setze die Silbengrenze vor den zweiten Vokal.
			if (previousIsVowel && nextIsVowel && !next.name().equals("6")) {
				// Insert a syllable boundary between the two.
				it.previous(); // one step back
				it.add("-"); // insert syllable boundary
				it.next(); // and forward again
			}
			previousIsVowel = nextIsVowel;
		}

		// Regel(4)
		// Suche das "Tal" (kleinster Level < 4) zwischen zwei benachbarten
		// Vokalen, sofern die Vokale nicht durch eine morphologisch bedingte
		// Grenze getrennt werden.
		it = phoneList.listIterator(0);
		int minSonority = 7; // one higher than possible maximum.
		int minIndex = -1; // position of the sonority minimum
		int syllableStart = -1;
		while (it.hasNext()) {
			String s = it.next();
			if (s.equals("-")) {
				// Forget about all valleys:
				minSonority = 7;
				minIndex = -1;
				syllableStart = it.previousIndex();
			} else {
				Allophone ph = getAllophone(s);
				if (ph != null && ph.sonority() < minSonority) {
					minSonority = ph.sonority();
					minIndex = it.previousIndex();
				} else if (ph != null && ph.sonority() >= 4) {
					// Found a vowel. Now, if there is a (non-initial) sonority
					// valley before this vowel, insert a valley marker:
					if (minIndex > syllableStart + 1) {
						int steps = 0;
						while (it.nextIndex() > minIndex) {
							steps++;
							it.previous();
						}
						it.add(".");
						while (steps > 0) {
							it.next();
							steps--;
						}
					}
					minSonority = 7;
					minIndex = -1;
				}
			}
		}

		// Regel(6a)
		// Steht zwischen einem ungespannten Vokal (Level 5) und dem
		// darauffolgenden Vokal (Level 4, 5 oder 6) nur *ein* Konsonant des
		// Levels 2 oder 3, so ersetze die Talmarkierung durch eine
		// ambisilbische Silbengrenze (Symbol "_").
		// halbformal:
		// ([v5]).([k2,3])([v4,5,6])
		// --> ([v5])_([k2,3])([v4,5,6])

		// Regel(6b)
		// Steht zwischen einem ungespannten Vokal (Level 5) und dem
		// darauffolgenden Vokal (Level 4, 5 oder 6) mehr als ein Konsonant
		// (Levels 1,2 oder 3), und folgt gleichzeitig dem 5er-Vokal eine
		// Talmarkierung, so versetze die "Talmarkierung" ein Phonem weiter und
		// ersetze sie durch eine normale Silbengrenze.
		// halbformal:
		// ([v5]).([k1,2,3])([k1,2,3]+)([v4,5,6])
		// --> ([v5]).([k1,2,3])-([k1,2,3]+)([v4,5,6])

		// Regel(6c)
		// In allen anderen Faellen ersetze die "Talmarkierung" mit einer
		// normalen Silbengrenze.
		it = phoneList.listIterator(0);
		while (it.hasNext()) {
			String s = it.next();
			if (s.equals(".")) {
				it.previous(); // skip . backwards
				Allophone ph = getAllophone(it.previous());
				it.next();
				it.next(); // skip ph and . forwards
				if (ph != null && ph.sonority() == 5) {
					// The phone just after the marker:
					ph = getAllophone(it.next());
					if (ph != null && ph.sonority() <= 3) {
						// Now the big question: another consonant or not?
						ph = getAllophone(it.next());
						if (ph != null && ph.sonority() <= 3) {
							// (6b) remove ., go one further, insert -
							// two ph back, and the .:
							it.previous();
							it.previous();
							it.previous();
							it.remove(); // remove the .
							it.next(); // skip one ph
							it.add("-");
						} else {
							// (6a) replace . with _
							// two ph back, and the .:
							it.previous();
							it.previous();
							it.previous();

							// only use minuses, because underscores denote also pauses
							// it.set("_"); // replace . with _
							it.set("-"); // replace . with -
						}
					} else {
						// unlikely case: no consonant after a 5
						it.previous();
						it.previous();
						it.set("-");
					}
				} else {
					// (6c) simply replace . with -
					it.set("-");
				}
			}
		}

		// Regel(7)
		// Folgt einem Phonem /N/, vor dem unmittelbar eine ambisilbische
		// Silbengrenze steht, ein Vollvokal (Level 5 oder 6), so verschiebe
		// die Silbengrenze um ein Phonem (naemlich hinter das /N/) und
		// ersetze es durch eine normale Silbengrenze.
		// halbformal:
		// _N([v5,6])
		// --> N-([v5,6])
		it = phoneList.listIterator(0);
		while (it.hasNext()) {
			String s = it.next();
			// only use minuses, because underscores denote also pauses
			// if (s.equals("_")) {
			if (s.equals("-")) {
				Allophone ph = getAllophone(it.next());
				if (ph != null && ph.name().equals("N")) {
					ph = getAllophone(it.next());
					if (ph != null && ph.sonority() >= 5) {
						// (7) remove _, put a - after the N
						// skip vowel, N, and _ backwards:
						it.previous();
						it.previous();
						it.previous();
						it.remove(); // remove _
						it.next(); // skip N forwards
						it.add("-"); // insert -
					} // else, just leave it
				}
			}
		}
		correctStressSymbol(phoneList);
		return phoneList;
	}

	/**
	 * For those syllables containing a "1" character, remove that "1" character and add a stress marker ' at the beginning of the
	 * syllable.
	 * 
	 * @param phoneList
	 *            phoneList
	 */
	protected void correctStressSymbol(LinkedList phoneList) {
		boolean stressFound = false;
		ListIterator it = phoneList.listIterator(0);
		while (it.hasNext()) {
			String s = it.next();
			if (s.endsWith("1")) {
				if (this.removeTrailingOneFromPhones) {
					it.set(s.substring(0, s.length() - 1)); // delete "1"
				}
				if (!stressFound) {
					// Only add a stress marker for first occurrence of "1":
					// Search backwards for syllable boundary or beginning of word:
					int steps = 0;
					while (it.hasPrevious()) {
						steps++;
						String t = it.previous();
						if (t.equals("-") || t.equals("_")) { // syllable boundary
							it.next();
							steps--;
							break;
						}
					}
					it.add("'");
					while (steps > 0) {
						it.next();
						steps--;
					}
					stressFound = true;
				}
			}
		}
		// No stressed vowel in word?
		if (!stressFound) {
			// Stress first non-schwa syllable
			it = phoneList.listIterator(0);
			while (it.hasNext()) {
				String s = it.next();
				Allophone ph = allophoneSet.getAllophone(s);
				if (ph.sonority() >= 5) { // non-schwa vowel
					// Search backwards for syllable boundary or beginning of word:
					int steps = 0;
					while (it.hasPrevious()) {
						steps++;
						String t = it.previous();
						if (t.equals("-") || t.equals("_")) { // syllable boundary
							it.next();
							steps--;
							break;
						}
					}
					it.add("'");
					while (steps > 0) {
						it.next();
						steps--;
					}
					break; // OK, that's it.
				}
			}
		}
	}

	/**
	 * Convert a phone string into a list of string representations of individual phones. The input can use the suffix "1" to
	 * indicate stressed vowels.
	 * 
	 * @param phoneString
	 *            the phone string to split
	 * @return a linked list of strings, each string representing an individual phone
	 * @deprecated This duplicates (badly) {@link AllophoneSet#splitAllophoneString(String)}; use that method instead.
	 */
	@Deprecated
	protected LinkedList splitIntoAllophones(String phoneString) {
		LinkedList phoneList = new LinkedList();
		for (int i = 0; i < phoneString.length(); i++) {
			// Try to cut off individual segments,
			// starting with the longest prefixes,
			// and allowing for a suffix "1" marking stress:
			String name = null;
			for (int j = 3; j >= 1; j--) {
				if (i + j <= phoneString.length()) {
					String candidate = phoneString.substring(i, i + j);
					try {
						allophoneSet.getAllophone(candidate);
						name = candidate;
						i += j - 1; // so that the next i++ goes beyond current phone
						break;
					} catch (IllegalArgumentException e) {
						// ignore
					}
				}
			}
			if (name != null) {
				phoneList.add(name);
			}
		}
		return phoneList;
	}

	/**
	 * Get the Allophone object named phone; if phone ends with "1", discard the "1" and use the rest of the string as the phone
	 * symbol.
	 * 
	 * @param phone
	 *            phone
	 * @deprecated Use {@link AllophoneSet#getAllophone(String)} instead
	 * @return allophoneset.getAllophone(phonesubstring(0, phone.length() - 1)) if this.removeTrailingOneFromPhones and
	 *         phone.endsWith("1"), allophoneset.getAllophone(phonesubstring(phone) otherwise
	 */
	@Deprecated
	protected Allophone getAllophone(String phone) {
		if (this.removeTrailingOneFromPhones && phone.endsWith("1"))
			return allophoneSet.getAllophone(phone.substring(0, phone.length() - 1));
		else
			return allophoneSet.getAllophone(phone);
	}

}