All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.tools.voiceimport.PhoneLabelFeatureAligner Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2000-2009 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.tools.voiceimport;

import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.GridBagConstraints;
import java.awt.GridBagLayout;
import java.awt.HeadlessException;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.SortedMap;
import java.util.StringTokenizer;
import java.util.TreeMap;

import javax.swing.JButton;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;

import marytts.modules.phonemiser.AllophoneSet;
import marytts.util.io.FileUtils;

/**
 * Compare unit label and unit feature files. If they don't align, flag a problem; let the user decide how to fix it -- either by
 * editing the unit label file or by editing a rawmaryxml file and recomputing the features file.
 * 
 * @author schroed
 *
 */
public class PhoneLabelFeatureAligner extends VoiceImportComponent {

	protected PhoneUnitFeatureComputer featureComputer;
	protected AllophonesExtractor allophoneExtractor;
	protected PhoneUnitLabelComputer labelComputer;
	protected TranscriptionAligner transcriptionAligner;
	protected String pauseSymbol;

	protected DatabaseLayout db = null;
	protected int percent = 0;
	protected Map problems;
	protected boolean correctedPauses = false;
	// protected boolean wait =false;

	protected String featsExt;
	protected String labExt;
	protected String labDir;
	protected String featsDir;

	protected static final int TRYAGAIN = 0;
	protected static final int SKIP = 1;
	protected static final int SKIPALL = 2;
	protected static final int REMOVE = 3;
	protected static final int REMOVEALL = 4;

	public String getName() {
		return "PhoneLabelFeatureAligner";
	}

	protected void customInitialisation() {
		featureComputer = (PhoneUnitFeatureComputer) db.getComponent("PhoneUnitFeatureComputer");
		allophoneExtractor = (AllophonesExtractor) db.getComponent("AllophonesExtractor");
		labelComputer = (PhoneUnitLabelComputer) db.getComponent("PhoneUnitLabelComputer");
		transcriptionAligner = (TranscriptionAligner) db.getComponent("TranscriptionAligner");
		featsExt = ".pfeats";
		labExt = ".lab";
		featsDir = db.getProp(db.PHONEFEATUREDIR);
		labDir = db.getProp(db.PHONELABDIR);
	}

	@Override
	protected final void initialiseComp() throws Exception {
		customInitialisation();
		db.initialiseComponent(featureComputer);

		pauseSymbol = db.getAllophoneSet().getSilence().name();
		File unitfeatureDir = new File(featsDir);
		if (!unitfeatureDir.exists()) {
			System.out.println("Feature directory " + featsDir + " does not exist; ");
			if (!unitfeatureDir.mkdir()) {
				throw new Error("Could not create FEATUREDIR");
			}
			System.out.println("Created successfully.");
		}
		File unitlabelDir = new File(labDir);
		if (!unitlabelDir.exists()) {
			System.out.print("Label directory " + labDir + " does not exist; ");
			if (!unitlabelDir.mkdir()) {
				throw new Error("Could not create LABELDIR");
			}
			System.out.println("Created successfully.");
		}
	}

	public SortedMap getDefaultProps(DatabaseLayout theDb) {
		this.db = theDb;
		if (props == null) {
			props = new TreeMap();
		}
		return props;
	}

	protected void setupHelp() {
		props2Help = new TreeMap();
	}

	/**
	 * Align labels and features. For each .phonelab file in the phone label directory, verify whether the chain of units given is
	 * identical to the chain of units in the corresponding unit feature file. For those files that are not perfectly aligned,
	 * give the user the opportunity to correct alignment.
	 * 
	 * @return a boolean indicating whether or not the database is fully aligned.
	 * @throws Exception
	 *             Exception
	 */
	public boolean compute() throws Exception {
		int bnlLengthIn = bnl.getLength();
		System.out.println("Verifying feature-label alignment for " + bnlLengthIn + " utterances.");
		problems = new TreeMap();

		for (int i = 0; i < bnl.getLength(); i++) {
			percent = 100 * i / bnl.getLength();
			// call firstVerifyAlignment for first alignment test
			String errorMessage = verifyAlignment(bnl.getName(i));
			System.out.print("    " + bnl.getName(i));
			if (errorMessage == null) {
				System.out.println(" OK");
			} else {
				problems.put(bnl.getName(i), errorMessage);
				System.out.println(" " + errorMessage);
			}
		}
		System.out.println("Found " + problems.size() + " problems");
		int remainingProblems = problems.keySet().size();

		if (remainingProblems > 0) {
			// show option for automatically correcting pauses
			// remainingProblems = correctPausesYesNo(remainingProblems);
			remainingProblems = correctPauses();
		}

		int guiReturn = SKIP;
		boolean removeAll = false;
		boolean skipAll = false;
		boolean tryAgain = true;
		for (Iterator it = problems.keySet().iterator(); it.hasNext();) {
			String basename = it.next();
			String errorMessage;
			if (!(removeAll || skipAll)) { // These may be set true after a previous call to letUserCorrect()
				do {
					errorMessage = (String) problems.get(basename);
					System.out.println("    " + basename + ": " + errorMessage);
					/* Let the user make a first correction */
					guiReturn = letUserCorrect(basename, errorMessage);
					// while(wait=true){}
					/* Check if an error remains */
					errorMessage = verifyAlignment(basename);
					/* If there is no error, proceed with the next file. */
					if (errorMessage == null) {
						System.out.println(" -> OK");
						remainingProblems--;
						tryAgain = false;
					}
					/* If the error message is (still) not null, manage the GUI return code: */
					else {
						problems.put(basename, errorMessage);
						/* Manage the error according to the GUI return: */
						switch (guiReturn) {

						case TRYAGAIN:
							tryAgain = true;
							break;

						case SKIP:
							tryAgain = false;
							System.out.println(" -> Skipped this utterance ! This problem remains.");
							break;

						case SKIPALL:
							tryAgain = false;
							skipAll = true;
							System.out.println(" -> Skipping all utterances ! The problems remain.");
							break;

						case REMOVE:
							tryAgain = false;
							bnl.remove(basename);
							deleteProblemsYesNo(null, basename);
							remainingProblems--;
							System.out.println(" -> Removed from the utterance list. OK");
							break;

						case REMOVEALL:
							tryAgain = false;
							removeAll = true;
							System.out.println(" -> Removing all problematic utterances. OK");
							break;

						default:
							throw new RuntimeException("The letUserCorrect() GUI returned an unknown return code.");
						}
					}
				} while (tryAgain);

			}

			/* Additional management for the removeAll option: */
			if (removeAll) {
				bnl.remove(basename);
				remainingProblems--;
			}
		}
		if (removeAll) {
			// ask user if asscociated files should be deleted
			deleteProblemsYesNo(problems, null);
		}

		System.out.println("Removed [" + (bnlLengthIn - bnl.getLength()) + "/" + bnlLengthIn + "] utterances from the list, ["
				+ bnl.getLength() + "] utterances remain," + " among which [" + remainingProblems + "/" + bnl.getLength()
				+ "] still have problems.");

		return remainingProblems == 0; // true exactly if all problems have been solved
	}

	/**
	 * Let the user select if he wants to run the the automatic correction of pauses.
	 * 
	 * @param numProblems
	 *            the number of problems
	 * @throws IOException
	 *             IOException
	 * @return the number of problems remaining
	 */
	protected int correctPausesYesNo(int numProblems) throws IOException {
		int choice = JOptionPane.showOptionDialog(null, "Found " + numProblems + " problems. Automatically correct pauses?",
				"Automatic pause correction", JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null, new String[] {
						"Yes", "No" }, null);

		if (choice == 0)
			return correctPauses();
		return numProblems;
	}

	/**
	 * Let the user select if he wants to run the the automatic correction of pauses.
	 * 
	 * @param someProblems
	 *            someProblems
	 * @param basename
	 *            basename
	 * @throws IOException
	 *             IOException
	 */
	protected void deleteProblemsYesNo(Map someProblems, String basename) throws IOException {
		int choice = JOptionPane.showOptionDialog(null, "Removed problematic utterance(s) from List. Also delete file(s)?",
				"Delete problematic file(s)", JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null, new String[] {
						"Yes", "No" }, null);

		if (choice == 0) {
			if (someProblems != null) {
				// we have a map basenames->problems
				for (Iterator it = someProblems.keySet().iterator(); it.hasNext();) {
					String nextBasename = it.next();
					File nextLabFile = new File(labDir + nextBasename + labExt);
					nextLabFile.delete();
					File nextFeatFile = new File(featsDir + nextBasename + featsExt);
					nextFeatFile.delete();
				}
			}
			if (basename != null) {
				// there is just one basename
				File nextLabFile = new File(labDir + basename + labExt);
				nextLabFile.delete();
				File nextFeatFile = new File(featsDir + basename + featsExt);
				nextFeatFile.delete();
			}
			// before anything else happens, ensure that deleted files will not be missed later
			// by forcing basename file to be written/updated; this is just a q&d fix!
			String basenameFilename = db.getProp("db.basenameFile");
			bnl.write(basenameFilename);
		}
	}

	protected void defineReplacementWindow() {

		final JFrame frame = new JFrame("Define Replacements");
		GridBagLayout gridBagLayout = new GridBagLayout();
		GridBagConstraints gridC = new GridBagConstraints();
		frame.getContentPane().setLayout(gridBagLayout);

		final JEditorPane editPane = new JEditorPane();
		editPane.setPreferredSize(new Dimension(500, 500));
		editPane.setText("#Whenever a problem occurs, the problematic phone in the label file\n"
				+ "#will be replaced by the phone you define here.\n\n" + "#Define replacements like this:\n"
				+ "#labelPhone newLabelPhone\n");

		JButton saveButton = new JButton("Apply to problems");
		saveButton.addActionListener(new ActionListener() {
			public void actionPerformed(ActionEvent e) {
				frame.setVisible(false);
				try {
					defineReplacements(editPane.getText());
				} catch (Exception ex) {
					ex.printStackTrace();
					throw new Error("Error defining replacements");
				}
			}
		});
		JButton cancelButton = new JButton("Cancel");
		cancelButton.addActionListener(new ActionListener() {
			public void actionPerformed(ActionEvent e) {
				frame.setVisible(false);
			}
		});

		gridC.gridx = 0;
		gridC.gridy = 0;
		// resize scroll pane:
		gridC.weightx = 1;
		gridC.weighty = 1;
		gridC.fill = GridBagConstraints.HORIZONTAL;
		JScrollPane scrollPane = new JScrollPane(editPane);
		scrollPane.setPreferredSize(editPane.getPreferredSize());
		gridBagLayout.setConstraints(scrollPane, gridC);
		frame.getContentPane().add(scrollPane);
		gridC.gridy = 1;
		// do not resize buttons:
		gridC.weightx = 0;
		gridC.weighty = 0;
		JPanel buttonPanel = new JPanel();
		buttonPanel.setLayout(new FlowLayout());
		buttonPanel.add(saveButton);
		buttonPanel.add(cancelButton);
		gridBagLayout.setConstraints(buttonPanel, gridC);
		frame.getContentPane().add(buttonPanel);
		frame.pack();
		frame.setVisible(true);

		do {
			try {
				Thread.sleep(10);
			} catch (InterruptedException e) {
			}
		} while (frame.isVisible());
		frame.dispose();
	}

	protected void defineReplacementInfo(String text) {
		int choice = JOptionPane.showOptionDialog(null, "Error applying replacements: Syntax error in line \"" + text + "\"",
				"Error in replacement definition", JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null,
				new String[] { "Correct", "Cancel" }, null);
		if (choice == 0)
			defineReplacementWindow();
	}

	/**
	 * Try to automatically correct misalignment caused by pauses: If there is a pause in the label file and not in the feature
	 * file, it is removed in the label file. If there is a pause in the feature file and not in the label file, a pause of length
	 * zero is inserted in the label file
	 * 
	 * 
	 * @return the number of problems remaining
	 * @throws IOException
	 *             IOException
	 */
	protected int correctPauses() throws IOException {
		correctedPauses = true;
		// clear the list of problems
		problems = new TreeMap();
		// go through all files
		for (int l = 0; l < bnl.getLength(); l++) {
			percent = 100 * l / bnl.getLength();
			String basename = bnl.getName(l);
			System.out.print("    " + basename);
			String line;

			BufferedReader labels;
			try {
				labels = new BufferedReader(new InputStreamReader(new FileInputStream(new File(labDir + basename + labExt)),
						"UTF-8"));
			} catch (FileNotFoundException fnfe) {
				continue;
			}
			// store header of label file in StringBuffer
			StringBuilder labelFileHeader = new StringBuilder();
			boolean foundHeader = false;
			while ((line = labels.readLine()) != null) {
				labelFileHeader.append(line + "\n");
				if (line.startsWith("#")) {
					foundHeader = true;
					break; // line starting with "#" marks end of header
				}
			}

			if (!foundHeader) {
				throw new IOException("File " + labDir + basename + labExt + " does not contain a file header!");
			}

			// store units of label file in List
			List labelUnits = new ArrayList();
			while ((line = labels.readLine()) != null) {
				labelUnits.add(line + "\n");
			}

			BufferedReader features;
			try {
				features = new BufferedReader(new InputStreamReader(
						new FileInputStream(new File(featsDir + basename + featsExt)), "UTF-8"));
			} catch (FileNotFoundException fnfe) {
				continue;
			}
			while ((line = features.readLine()) != null) {
				if (line.trim().equals(""))
					break; // empty line marks end of header
			}

			// store text units of feature file in list
			List featureUnits = new ArrayList();
			while ((line = features.readLine()) != null) {
				if (line.trim().equals(""))
					break; // empty line marks end of section
				featureUnits.add(line);
			}

			labels.close();
			features.close();

			ArrayList labelUnitData;
			String labelUnit;
			String featureUnit;
			String returnString = null;

			int numLabelUnits = labelUnits.size();
			int numFeatureUnits = featureUnits.size();

			int i = 0, j = 0;
			while (i < numLabelUnits && j < numFeatureUnits) {
				// System.out.println("featureUnit : "+featureUnit
				// +" labelUnit : "+labelUnit);
				labelUnitData = getLabelUnitData(labelUnits.get(i));
				labelUnit = labelUnitData.get(2);
				featureUnit = getFeatureUnit(featureUnits.get(j));

				if (!featureUnit.equals(labelUnit)) {

					if (featureUnit.equals("_")) {
						// add pause in labels
						System.out.println(" Adding pause unit in labels before unit " + i);
						String pauseUnit;
						if (i - 1 >= 0) {
							ArrayList previousUnitData = getLabelUnitData(labelUnits.get(i - 1));
							pauseUnit = (String) previousUnitData.get(0) + " " + (String) labelUnitData.get(1) + " _\n";
						} else {
							pauseUnit = "0.00000 " + (String) labelUnitData.get(1) + " _\n";
						}
						labelUnits.add(i, pauseUnit);
						i++;
						j++;
						numLabelUnits = labelUnits.size();
						continue;
					} else if (featureUnit.equals("__L")) {
						// add two pause units in labels
						System.out.println(" Adding pause units in labels before unit " + i);
						String pauseUnit;
						if (i - 1 >= 0) {
							ArrayList previousUnitData = getLabelUnitData(labelUnits.get(i - 1));
							pauseUnit = previousUnitData.get(0) + " " + (String) labelUnitData.get(1) + " __L\n";
							labelUnits.add(i, pauseUnit);
							i++;
							pauseUnit = (String) previousUnitData.get(0) + " " + (String) labelUnitData.get(1) + " __R\n";
							labelUnits.add(i, pauseUnit);
						} else {
							// this is the first label-unit
							pauseUnit = "0.00000 " + (String) labelUnitData.get(1) + " __L\n";
							labelUnits.add(i, pauseUnit);
							i++;
							pauseUnit = "0.00000 " + (String) labelUnitData.get(1) + " __R\n";
							labelUnits.add(i, pauseUnit);
						}
						i++;
						j += 2;
						numLabelUnits = labelUnits.size();
						continue;
					} else if (labelUnit.equals("_")) {
						// remove pause in labels
						System.out.println(" Removing pause unit in labels at index " + i);
						labelUnits.remove(i);
						numLabelUnits = labelUnits.size();
						continue;
					} else if (labelUnit.equals("__L")) {
						// remove two pause units in labels
						System.out.println(" Removing pause units in labels at index " + i);
						if (i - 1 >= 0) {
							// lengthen the unit before the pause
							ArrayList previousUnitData = getLabelUnitData(labelUnits.get(i - 1));
							labelUnits.set(i - 1, (String) labelUnitData.get(0) + " " + (String) previousUnitData.get(1) + " "
									+ (String) previousUnitData.get(2) + "\n");
						}
						// remove the pauses
						labelUnits.remove(i);
						labelUnits.remove(i);
						numLabelUnits = labelUnits.size();
						continue;
					} else {
						// truly not matching
						if (returnString == null) {
							// only remember the first mismatch
							int unitIndex = i - 1;
							returnString = " Non-matching units found: feature file '" + featureUnit + "' vs. label file '"
									+ labelUnit + "' (Unit " + unitIndex + ")";
						}
					}
				}
				// increase both counters if you did not delete a pause
				i++;
				j++;
			}
			if (numLabelUnits < numFeatureUnits) {
				// check if the final pause is missing in the label file
				featureUnit = getFeatureUnit((String) featureUnits.get(numFeatureUnits - 1));
				labelUnitData = getLabelUnitData((String) labelUnits.get(numLabelUnits - 1));
				labelUnit = (String) labelUnitData.get(2);
				// add a pause at the end of label file
				if (featureUnit.equals("_") && numLabelUnits + 1 == numFeatureUnits) {
					String lastFeatureUnit = getFeatureUnit((String) featureUnits.get(numFeatureUnits - 2));
					if (lastFeatureUnit.equals(labelUnit)) {
						// add pause at the end
						System.out.println(" Adding pause unit in labels after last unit");
						String pauseUnit = (String) labelUnitData.get(0) + " " + numLabelUnits + " _\n";
						labelUnits.add(pauseUnit);
						numLabelUnits = labelUnits.size();
					}
				} else if (featureUnit.equals("__R") && numLabelUnits + 2 == numFeatureUnits) {
					String lastFeatureUnit = getFeatureUnit((String) featureUnits.get(numFeatureUnits - 3));
					if (lastFeatureUnit.equals(labelUnit)) {
						// add two pause units at the end of label file
						System.out.println(" Adding pause units in labels after last unit");
						int unitIndex = numLabelUnits - 1;
						String pauseUnit = (String) labelUnitData.get(0) + " " + unitIndex + " __L\n";
						labelUnits.add(pauseUnit);

						pauseUnit = (String) labelUnitData.get(0) + " " + numLabelUnits + " __R\n";
						labelUnits.add(pauseUnit);

						numLabelUnits = labelUnits.size();
					}
				} else { // feature file is truly longer than label file
					if (returnString == null) {
						returnString = " Feature file is longer than label file: " + " unit " + numLabelUnits
								+ " and greater do not exist in label file";
					}
				}
			}
			// return an error if label file is longer than feature file
			if (returnString == null && numLabelUnits > numFeatureUnits) {
				returnString = " Label file is longer than feature file: " + " unit " + numFeatureUnits
						+ " and greater do not exist in feature file";
			}

			// now overwrite the label file
			PrintWriter labelFileWriter = new PrintWriter(new FileWriter(new File(labDir + basename + labExt)));
			// print header
			labelFileWriter.print(labelFileHeader.toString());
			// print units
			numLabelUnits = labelUnits.size();
			for (int k = 0; k < numLabelUnits; k++) {
				String nextUnit = labelUnits.get(k);
				if (nextUnit != null) {
					// correct the unit index
					ArrayList nextUnitData = getLabelUnitData(nextUnit);
					labelFileWriter.println(nextUnitData.get(0) + " " + k + " " + nextUnitData.get(2));
				}
			}

			labelFileWriter.flush();
			labelFileWriter.close();

			// returnString is null if all units matched,
			// otherwise the first error is given back
			if (returnString == null) {
				System.out.println(" OK");
			} else {
				problems.put(basename, returnString);
				System.out.println(returnString);
			}
		}
		System.out.println("Remaining problems: " + problems.size());
		return problems.size();
	}

	protected void defineReplacements(String text) throws Exception {
		/* read the replacements into a map */
		Map phone2Replace = new HashMap();
		String error = null;
		String[] textlines = text.split("\n");
		for (int i = 0; i < textlines.length; i++) {
			if (!textlines[i].startsWith("#") && !textlines[i].equals("")) {
				StringTokenizer tok = new StringTokenizer(textlines[i].trim());
				try {
					phone2Replace.put(tok.nextToken(), tok.nextToken());
				} catch (NoSuchElementException nsee) {
					error = textlines[i];
					break;
				}
			}
		}
		if (error != null) {
			// wait = true;
			// TODO: Does not work properly
			defineReplacementInfo(error);
		} else {
			/*
			 * go through the problems and try to replace the phones in the labels with the specified replacements
			 */

			// clear the list of problems
			problems = new TreeMap();
			// go through all files
			for (int l = 0; l < bnl.getLength(); l++) {
				percent = 100 * l / bnl.getLength();
				String basename = bnl.getName(l);
				System.out.print("    " + basename);
				String line;

				BufferedReader labels;
				try {
					labels = new BufferedReader(new InputStreamReader(new FileInputStream(new File(labDir + basename + labExt)),
							"UTF-8"));
				} catch (FileNotFoundException fnfe) {
					continue;
				}
				// store header of label file in StringBuffer
				StringBuilder labelFileHeader = new StringBuilder();
				while ((line = labels.readLine()) != null) {
					labelFileHeader.append(line + "\n");
					if (line.startsWith("#"))
						break; // line starting with "#" marks end of header
				}

				// store units of label file in List
				List labelUnits = new ArrayList();
				while ((line = labels.readLine()) != null) {
					labelUnits.add(line + "\n");
				}

				BufferedReader features;
				try {
					features = new BufferedReader(new InputStreamReader(new FileInputStream(new File(featsDir + basename
							+ featsExt)), "UTF-8"));
				} catch (FileNotFoundException fnfe) {
					continue;
				}
				while ((line = features.readLine()) != null) {
					if (line.trim().equals(""))
						break; // empty line marks end of header
				}

				// store text units of feature file in list
				List featureUnits = new ArrayList();
				while ((line = features.readLine()) != null) {
					if (line.trim().equals(""))
						break; // empty line marks end of section
					featureUnits.add(line);
				}

				ArrayList labelUnitData;
				String labelUnit;
				String featureUnit;
				String returnString = null;

				int numLabelUnits = labelUnits.size();
				int numFeatureUnits = featureUnits.size();

				int i = 0, j = 0;
				boolean alteredLabel = false;
				while (i < numLabelUnits && j < numFeatureUnits) {
					// System.out.println("featureUnit : "+featureUnit
					// +" labelUnit : "+labelUnit);
					labelUnitData = getLabelUnitData((String) labelUnits.get(i));
					labelUnit = (String) labelUnitData.get(2);
					featureUnit = getFeatureUnit((String) featureUnits.get(j));

					if (!featureUnit.equals(labelUnit)) {
						// try to replace label Unit
						if (phone2Replace.containsKey(labelUnit)) {
							System.out.print(" Replacing " + labelUnit);
							String newlabelUnit = (String) phone2Replace.get(labelUnit);
							System.out.print(" with " + newlabelUnit + "... ");
							if (featureUnit.equals(newlabelUnit)) {
								labelUnits.remove(i);
								labelUnits.add(i, (String) labelUnitData.get(0) + " " + (String) labelUnitData.get(1) + " "
										+ newlabelUnit);
								alteredLabel = true;
								System.out.print("successful!\n");
								i++;
								j++;
								continue;
							}
							System.out.print("failed!\n");
						}
						// else we have a problem
						if (returnString == null) {
							// only remember the the first mismatch
							int unitIndex = i;
							returnString = " Non-matching units found: feature file '" + featureUnit + "' vs. label file '"
									+ labelUnit + "' (Unit " + unitIndex + ")";
						}

					}
					i++;
					j++;
				}
				// return an error if label file is longer than feature file
				if (returnString == null && numLabelUnits > numFeatureUnits) {
					returnString = " Label file is longer than feature file: " + " unit " + numFeatureUnits
							+ " and greater do not exist in feature file";
				}

				if (alteredLabel) {
					// overwrite the label file
					PrintWriter labelFileWriter = new PrintWriter(new FileWriter(new File(labDir + basename + labExt)));
					// print header
					labelFileWriter.print(labelFileHeader.toString());
					// print units
					numLabelUnits = labelUnits.size();
					for (int k = 0; k < numLabelUnits; k++) {
						String nextUnit = (String) labelUnits.get(k);
						if (nextUnit != null) {
							// correct the unit index
							ArrayList nextUnitData = getLabelUnitData(nextUnit);
							labelFileWriter.print((String) nextUnitData.get(0) + " " + k + " " + (String) nextUnitData.get(2)
									+ "\n");
						}
					}

					labelFileWriter.flush();
					labelFileWriter.close();
				}
				// returnString is null if all units matched,
				// otherwise the first error is given back
				if (returnString == null) {
					System.out.println(" OK");
				} else {
					problems.put(basename, returnString);
					System.out.println(returnString);
				}
			}
			System.out.println("Remaining problems: " + problems.size());
			// wait = false;
		}

	}

	/**
	 * Verify if the feature and label files for basename align OK. This method should be called after firstVerifyAlignment for
	 * subsequent alignment tries.
	 * 
	 * @param basename
	 *            basename
	 * @return null if the alignment was OK, or a String containing an error message.
	 * @throws IOException
	 *             IOException
	 */
	protected String verifyAlignment(String basename) throws IOException {
		BufferedReader labels = null;
		BufferedReader features = null;
		try {
			try {
				labels = new BufferedReader(new InputStreamReader(new FileInputStream(new File(labDir + basename + labExt)),
						"UTF-8"));
			} catch (FileNotFoundException fnfe) {
				return "No label file " + labDir + basename + labExt;
			}
			try {
				features = new BufferedReader(new InputStreamReader(
						new FileInputStream(new File(featsDir + basename + featsExt)), "UTF-8"));
			} catch (FileNotFoundException fnfe) {
				return "No feature file " + featsDir + basename + featsExt;
			}

			String line;
			// Skip label file header:
			while ((line = labels.readLine()) != null) {
				if (line.startsWith("#"))
					break; // line starting with "#" marks end of header
			}
			// Skip features file header:
			while ((line = features.readLine()) != null) {
				if (line.trim().equals(""))
					break; // empty line marks end of header
			}

			// Now go through all feature file units
			boolean correct = true;
			int unitIndex = 0;
			while (correct) {
				line = labels.readLine();
				String labelUnit = null;
				if (line != null) {
					List labelUnitData = getLabelUnitData(line);
					labelUnit = (String) labelUnitData.get(2);
					unitIndex = Integer.parseInt((String) labelUnitData.get(1));
				}

				String featureUnit = getFeatureUnit(features);
				if (featureUnit == null)
					throw new IOException("Incomplete feature file: " + basename);
				// when featureUnit is the empty string, we have found an empty line == end of feature section
				if ("".equals(featureUnit)) {
					if (labelUnit == null) {
						// we have reached the end in both labels and features
						break;
					} else {
						// label file is longer than feature file
						return "Label file is longer than feature file: " + " unit " + unitIndex
								+ " and greater do not exist in feature file";
					}
				}
				if (labelUnit == null) {
					// feature file is longer than label file
					unitIndex++;
					return "Feature file is longer than label file: " + " unit " + unitIndex
							+ " and greater do not exist in label file";
				}
				if (!featureUnit.equals(labelUnit)) {
					// label and feature unit do not match
					return "Non-matching units found: feature file '" + featureUnit + "' vs. label file '" + labelUnit
							+ "' (Unit " + unitIndex + ")";
				}
			}
		} finally {
			if (labels != null)
				labels.close();
			if (features != null)
				features.close();

		}
		return null; // success
	}

	private ArrayList getLabelUnitData(String line) throws IOException {
		if (line == null)
			return null;
		ArrayList unitData = new ArrayList();
		StringTokenizer st = new StringTokenizer(line.trim());
		// the first token is the time
		unitData.add(st.nextToken());
		// the second token is the unit index
		unitData.add(st.nextToken());
		// the third token is the phone
		unitData.add(st.nextToken());
		return unitData;
	}

	private String getFeatureUnit(String line) throws IOException {
		if (line == null)
			return null;
		if (line.trim().equals(""))
			return ""; // empty line -- signal end of section
		StringTokenizer st = new StringTokenizer(line.trim());
		// The expect that the first token in each line is the label
		return st.nextToken();
	}

	private String getLabelUnit(BufferedReader labelReader) throws IOException {
		String line = labelReader.readLine();
		if (line == null)
			return null;
		StringTokenizer st = new StringTokenizer(line.trim());
		// The third token in each line is the label
		st.nextToken();
		st.nextToken();
		return st.nextToken();
	}

	private String getFeatureUnit(BufferedReader featureReader) throws IOException {
		String line = featureReader.readLine();
		if (line == null)
			return null;
		if (line.trim().equals(""))
			return ""; // empty line -- signal end of section
		StringTokenizer st = new StringTokenizer(line.trim());
		// The expect that the first token in each line is the label
		return st.nextToken();
	}

	protected int letUserCorrect(String basename, String errorMessage) throws Exception {
		String[] options;
		/*
		 * if (correctedPauses){ options = new String[] {"Edit RAWMARYXML", "Edit unit labels", "Remove from list",
		 * "Remove all problems", "Skip", "Skip all","Replace labels in unit file","Define replacements"}; } else { options = new
		 * String[] {"Edit RAWMARYXML", "Edit unit labels", "Remove from list", "Remove all problems", "Skip",
		 * "Skip all","Define replacements"}; }
		 */
		if (correctedPauses) {
			options = new String[] { "Edit RAWMARYXML", "Edit unit labels", "Remove from list", "Remove all problems", "Skip",
					"Skip all" };
		} else {
			options = new String[] { "Edit RAWMARYXML", "Edit unit labels", "Remove from list", "Remove all problems", "Skip",
					"Skip all" };
		}
		int choice;
		try {
			choice = JOptionPane.showOptionDialog(null, "Misalignment problem for " + basename + ":\n" + errorMessage,
					"Correct alignment for " + basename, JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null,
					options, null);
		} catch (HeadlessException e) {
			return SKIP;
		}
		switch (choice) {
		case 0:
			editMaryXML(basename);
			return TRYAGAIN;
		case 1:
			editUnitLabels(basename);
			return TRYAGAIN;
		case 2:
			return REMOVE;
		case 3:
			return REMOVEALL;
		case 4:
			return SKIP;
		case 5:
			return SKIPALL;
			/**
			 * case 6: if (correctedPauses){ replaceUnitLabels(basename); } else { defineReplacementWindow(); } return TRYAGAIN;
			 * case 7: defineReplacementWindow(); return TRYAGAIN;
			 **/
		default: // JOptionPane.CLOSED_OPTION
			return SKIP; // don't verify again.
		}
	}

	/**
	 * Replace all label units which do not match the feature units with the feature units This method should only be called after
	 * automatic pause alignment.
	 * 
	 * @param basename
	 *            the filename of the label/feature file
	 * @throws IOException
	 *             IOException
	 */
	private void replaceUnitLabels(String basename) throws IOException {
		String line;

		BufferedReader labels = new BufferedReader(new InputStreamReader(
				new FileInputStream(new File(labDir + basename + labExt)), "UTF-8"));
		// store header of label file in StringBuffer
		StringBuilder labelFileHeader = new StringBuilder();
		while ((line = labels.readLine()) != null) {
			labelFileHeader.append(line + "\n");
			if (line.startsWith("#"))
				break; // line starting with "#" marks end of header
		}

		// store units of label file in List
		List labelUnits = new ArrayList();
		while ((line = labels.readLine()) != null) {
			labelUnits.add(line + "\n");
		}

		BufferedReader features = new BufferedReader(new InputStreamReader(new FileInputStream(new File(featsDir + basename
				+ featsExt)), "UTF-8"));
		while ((line = features.readLine()) != null) {
			if (line.trim().equals(""))
				break; // empty line marks end of header
		}

		// store text units of feature file in list
		List featureTextUnits = new ArrayList();
		while ((line = features.readLine()) != null) {
			if (line.trim().equals(""))
				break; // empty line marks end of section
			featureTextUnits.add(line);
		}

		labels.close();
		features.close();

		ArrayList labelUnitData;
		String labelUnit;
		String featureUnit;
		String returnString = null;

		int numLabelUnits = labelUnits.size();
		int numFeatureUnits = featureTextUnits.size();

		int i = 0, j = 0;
		while (i < numLabelUnits && j < numFeatureUnits) {
			// System.out.println("featureUnit : "+featureUnit
			// +" labelUnit : "+labelUnit);
			labelUnitData = getLabelUnitData((String) labelUnits.get(i));
			labelUnit = (String) labelUnitData.get(2);
			featureUnit = getFeatureUnit((String) featureTextUnits.get(j));

			if (!featureUnit.equals(labelUnit)) {
				// take over label of feature file
				labelUnits.set(i, (String) labelUnitData.get(0) + " " + (String) labelUnitData.get(1) + " " + featureUnit + "\n");
			}
			i++;
			j++;
		}

		// now overwrite the label file
		PrintWriter labelFileWriter = new PrintWriter(new FileWriter(new File(labDir + basename + labExt)));
		// print header
		labelFileWriter.print(labelFileHeader.toString());
		// print units
		numLabelUnits = labelUnits.size();
		for (int k = 0; k < numLabelUnits; k++) {
			String nextUnit = (String) labelUnits.get(k);
			if (nextUnit != null) {
				// correct the unit index
				ArrayList nextUnitData = getLabelUnitData(nextUnit);
				labelFileWriter.print((String) nextUnitData.get(0) + " " + k + " " + (String) nextUnitData.get(2) + "\n");
			}
		}

		labelFileWriter.flush();
		labelFileWriter.close();

	}

	private void editMaryXML(String basename) throws Exception {
		final File maryxmlFile = new File(db.getProp(db.MARYXMLDIR) + basename + db.getProp(db.MARYXMLEXT));
		if (!maryxmlFile.exists()) {
			// need to create it
			String text = FileUtils
					.getFileAsString(new File(db.getProp(db.TEXTDIR) + basename + db.getProp(db.TEXTEXT)), "UTF-8");
			PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(maryxmlFile), "UTF-8"));
			pw.println(PhoneUnitFeatureComputer.getMaryXMLHeaderWithInitialBoundary(db.getProp(db.LOCALE)));
			pw.println(text);
			pw.println("");
			pw.close();
		}
		boolean edited = new EditFrameShower(maryxmlFile).display();
		if (edited) {
			allophoneExtractor.generateAllophonesFile(basename);
			try {
				transcriptionAligner.alignTranscription(basename);
			} catch (Exception e) {
				e.printStackTrace();
			}
			labelComputer.computePhoneLabel(basename);
			featureComputer.computeFeaturesFor(basename);
		}
	}

	private void editUnitLabels(String basename) throws IOException {
		new EditFrameShower(new File(labDir + basename + labExt)).display();
	}

	public static void main(String[] args) throws Exception {
		PhoneLabelFeatureAligner lfa = new PhoneLabelFeatureAligner();
		new DatabaseLayout(lfa);
		boolean isAligned = lfa.compute();
		System.out.println("The database is " + (isAligned ? "" : "NOT") + " perfectly aligned");
	}

	public static class EditFrameShower {
		protected final File file;
		protected boolean saved;

		public EditFrameShower(File file) {
			this.file = file;
			this.saved = false;
		}

		/**
		 * Show a frame allowing the user to edit the file.
		 * 
		 * 
		 * 
		 * @return a boolean indicating whether the file was saved.
		 * @throws IOException
		 *             IOException
		 * @throws UnsupportedEncodingException
		 *             UnsupportedEncodingException
		 * @throws FileNotFoundException
		 *             FileNotFoundException
		 */
		public boolean display() throws IOException, UnsupportedEncodingException, FileNotFoundException {
			final JFrame frame = new JFrame("Edit " + file.getName());
			GridBagLayout gridBagLayout = new GridBagLayout();
			GridBagConstraints gridC = new GridBagConstraints();
			frame.getContentPane().setLayout(gridBagLayout);

			final JEditorPane editPane = new JEditorPane();
			editPane.setPreferredSize(new Dimension(500, 500));
			editPane.read(new InputStreamReader(new FileInputStream(file), "UTF-8"), null);
			JButton saveButton = new JButton("Save & Exit");
			saveButton.addActionListener(new ActionListener() {
				public void actionPerformed(ActionEvent e) {
					try {
						PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
						editPane.write(pw);
						pw.flush();
						pw.close();
						frame.setVisible(false);
						setSaved(true);
					} catch (IOException ioe) {
						ioe.printStackTrace();
					}
				}
			});
			JButton cancelButton = new JButton("Cancel");
			cancelButton.addActionListener(new ActionListener() {
				public void actionPerformed(ActionEvent e) {
					frame.setVisible(false);
					setSaved(false);
				}
			});

			gridC.gridx = 0;
			gridC.gridy = 0;
			// resize scroll pane:
			gridC.weightx = 1;
			gridC.weighty = 1;
			gridC.fill = GridBagConstraints.HORIZONTAL;
			JScrollPane scrollPane = new JScrollPane(editPane);
			scrollPane.setPreferredSize(editPane.getPreferredSize());
			gridBagLayout.setConstraints(scrollPane, gridC);
			frame.getContentPane().add(scrollPane);
			gridC.gridy = 1;
			// do not resize buttons:
			gridC.weightx = 0;
			gridC.weighty = 0;
			JPanel buttonPanel = new JPanel();
			buttonPanel.setLayout(new FlowLayout());
			buttonPanel.add(saveButton);
			buttonPanel.add(cancelButton);
			gridBagLayout.setConstraints(buttonPanel, gridC);
			frame.getContentPane().add(buttonPanel);
			frame.pack();
			frame.setVisible(true);
			do {
				try {
					Thread.sleep(10); // OK, this is ugly, but I don't mind today...
				} catch (InterruptedException e) {
				}
			} while (frame.isVisible());
			frame.dispose();
			return saved;
		}

		protected void setSaved(boolean saved) {
			this.saved = saved;
		}

	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return percent;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy