marytts.tools.voiceimport.PhoneLabelFeatureAligner Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2000-2009 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.tools.voiceimport;
import java.awt.Dimension;
import java.awt.FlowLayout;
import java.awt.GridBagConstraints;
import java.awt.GridBagLayout;
import java.awt.HeadlessException;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.SortedMap;
import java.util.StringTokenizer;
import java.util.TreeMap;
import javax.swing.JButton;
import javax.swing.JEditorPane;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import marytts.modules.phonemiser.AllophoneSet;
import marytts.util.io.FileUtils;
/**
* Compare unit label and unit feature files. If they don't align, flag a problem; let the user decide how to fix it -- either by
* editing the unit label file or by editing a rawmaryxml file and recomputing the features file.
*
* @author schroed
*
*/
public class PhoneLabelFeatureAligner extends VoiceImportComponent {
protected PhoneUnitFeatureComputer featureComputer;
protected AllophonesExtractor allophoneExtractor;
protected PhoneUnitLabelComputer labelComputer;
protected TranscriptionAligner transcriptionAligner;
protected String pauseSymbol;
protected DatabaseLayout db = null;
protected int percent = 0;
protected Map problems;
protected boolean correctedPauses = false;
// protected boolean wait =false;
protected String featsExt;
protected String labExt;
protected String labDir;
protected String featsDir;
protected static final int TRYAGAIN = 0;
protected static final int SKIP = 1;
protected static final int SKIPALL = 2;
protected static final int REMOVE = 3;
protected static final int REMOVEALL = 4;
public String getName() {
return "PhoneLabelFeatureAligner";
}
protected void customInitialisation() {
featureComputer = (PhoneUnitFeatureComputer) db.getComponent("PhoneUnitFeatureComputer");
allophoneExtractor = (AllophonesExtractor) db.getComponent("AllophonesExtractor");
labelComputer = (PhoneUnitLabelComputer) db.getComponent("PhoneUnitLabelComputer");
transcriptionAligner = (TranscriptionAligner) db.getComponent("TranscriptionAligner");
featsExt = ".pfeats";
labExt = ".lab";
featsDir = db.getProp(db.PHONEFEATUREDIR);
labDir = db.getProp(db.PHONELABDIR);
}
@Override
protected final void initialiseComp() throws Exception {
customInitialisation();
db.initialiseComponent(featureComputer);
pauseSymbol = db.getAllophoneSet().getSilence().name();
File unitfeatureDir = new File(featsDir);
if (!unitfeatureDir.exists()) {
System.out.println("Feature directory " + featsDir + " does not exist; ");
if (!unitfeatureDir.mkdir()) {
throw new Error("Could not create FEATUREDIR");
}
System.out.println("Created successfully.");
}
File unitlabelDir = new File(labDir);
if (!unitlabelDir.exists()) {
System.out.print("Label directory " + labDir + " does not exist; ");
if (!unitlabelDir.mkdir()) {
throw new Error("Could not create LABELDIR");
}
System.out.println("Created successfully.");
}
}
public SortedMap getDefaultProps(DatabaseLayout theDb) {
this.db = theDb;
if (props == null) {
props = new TreeMap();
}
return props;
}
protected void setupHelp() {
props2Help = new TreeMap();
}
/**
* Align labels and features. For each .phonelab file in the phone label directory, verify whether the chain of units given is
* identical to the chain of units in the corresponding unit feature file. For those files that are not perfectly aligned,
* give the user the opportunity to correct alignment.
*
* @return a boolean indicating whether or not the database is fully aligned.
* @throws Exception
* Exception
*/
public boolean compute() throws Exception {
int bnlLengthIn = bnl.getLength();
System.out.println("Verifying feature-label alignment for " + bnlLengthIn + " utterances.");
problems = new TreeMap();
for (int i = 0; i < bnl.getLength(); i++) {
percent = 100 * i / bnl.getLength();
// call firstVerifyAlignment for first alignment test
String errorMessage = verifyAlignment(bnl.getName(i));
System.out.print(" " + bnl.getName(i));
if (errorMessage == null) {
System.out.println(" OK");
} else {
problems.put(bnl.getName(i), errorMessage);
System.out.println(" " + errorMessage);
}
}
System.out.println("Found " + problems.size() + " problems");
int remainingProblems = problems.keySet().size();
if (remainingProblems > 0) {
// show option for automatically correcting pauses
// remainingProblems = correctPausesYesNo(remainingProblems);
remainingProblems = correctPauses();
}
int guiReturn = SKIP;
boolean removeAll = false;
boolean skipAll = false;
boolean tryAgain = true;
for (Iterator it = problems.keySet().iterator(); it.hasNext();) {
String basename = it.next();
String errorMessage;
if (!(removeAll || skipAll)) { // These may be set true after a previous call to letUserCorrect()
do {
errorMessage = (String) problems.get(basename);
System.out.println(" " + basename + ": " + errorMessage);
/* Let the user make a first correction */
guiReturn = letUserCorrect(basename, errorMessage);
// while(wait=true){}
/* Check if an error remains */
errorMessage = verifyAlignment(basename);
/* If there is no error, proceed with the next file. */
if (errorMessage == null) {
System.out.println(" -> OK");
remainingProblems--;
tryAgain = false;
}
/* If the error message is (still) not null, manage the GUI return code: */
else {
problems.put(basename, errorMessage);
/* Manage the error according to the GUI return: */
switch (guiReturn) {
case TRYAGAIN:
tryAgain = true;
break;
case SKIP:
tryAgain = false;
System.out.println(" -> Skipped this utterance ! This problem remains.");
break;
case SKIPALL:
tryAgain = false;
skipAll = true;
System.out.println(" -> Skipping all utterances ! The problems remain.");
break;
case REMOVE:
tryAgain = false;
bnl.remove(basename);
deleteProblemsYesNo(null, basename);
remainingProblems--;
System.out.println(" -> Removed from the utterance list. OK");
break;
case REMOVEALL:
tryAgain = false;
removeAll = true;
System.out.println(" -> Removing all problematic utterances. OK");
break;
default:
throw new RuntimeException("The letUserCorrect() GUI returned an unknown return code.");
}
}
} while (tryAgain);
}
/* Additional management for the removeAll option: */
if (removeAll) {
bnl.remove(basename);
remainingProblems--;
}
}
if (removeAll) {
// ask user if asscociated files should be deleted
deleteProblemsYesNo(problems, null);
}
System.out.println("Removed [" + (bnlLengthIn - bnl.getLength()) + "/" + bnlLengthIn + "] utterances from the list, ["
+ bnl.getLength() + "] utterances remain," + " among which [" + remainingProblems + "/" + bnl.getLength()
+ "] still have problems.");
return remainingProblems == 0; // true exactly if all problems have been solved
}
/**
* Let the user select if he wants to run the the automatic correction of pauses.
*
* @param numProblems
* the number of problems
* @throws IOException
* IOException
* @return the number of problems remaining
*/
protected int correctPausesYesNo(int numProblems) throws IOException {
int choice = JOptionPane.showOptionDialog(null, "Found " + numProblems + " problems. Automatically correct pauses?",
"Automatic pause correction", JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null, new String[] {
"Yes", "No" }, null);
if (choice == 0)
return correctPauses();
return numProblems;
}
/**
* Let the user select if he wants to run the the automatic correction of pauses.
*
* @param someProblems
* someProblems
* @param basename
* basename
* @throws IOException
* IOException
*/
protected void deleteProblemsYesNo(Map someProblems, String basename) throws IOException {
int choice = JOptionPane.showOptionDialog(null, "Removed problematic utterance(s) from List. Also delete file(s)?",
"Delete problematic file(s)", JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null, new String[] {
"Yes", "No" }, null);
if (choice == 0) {
if (someProblems != null) {
// we have a map basenames->problems
for (Iterator it = someProblems.keySet().iterator(); it.hasNext();) {
String nextBasename = it.next();
File nextLabFile = new File(labDir + nextBasename + labExt);
nextLabFile.delete();
File nextFeatFile = new File(featsDir + nextBasename + featsExt);
nextFeatFile.delete();
}
}
if (basename != null) {
// there is just one basename
File nextLabFile = new File(labDir + basename + labExt);
nextLabFile.delete();
File nextFeatFile = new File(featsDir + basename + featsExt);
nextFeatFile.delete();
}
// before anything else happens, ensure that deleted files will not be missed later
// by forcing basename file to be written/updated; this is just a q&d fix!
String basenameFilename = db.getProp("db.basenameFile");
bnl.write(basenameFilename);
}
}
protected void defineReplacementWindow() {
final JFrame frame = new JFrame("Define Replacements");
GridBagLayout gridBagLayout = new GridBagLayout();
GridBagConstraints gridC = new GridBagConstraints();
frame.getContentPane().setLayout(gridBagLayout);
final JEditorPane editPane = new JEditorPane();
editPane.setPreferredSize(new Dimension(500, 500));
editPane.setText("#Whenever a problem occurs, the problematic phone in the label file\n"
+ "#will be replaced by the phone you define here.\n\n" + "#Define replacements like this:\n"
+ "#labelPhone newLabelPhone\n");
JButton saveButton = new JButton("Apply to problems");
saveButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
frame.setVisible(false);
try {
defineReplacements(editPane.getText());
} catch (Exception ex) {
ex.printStackTrace();
throw new Error("Error defining replacements");
}
}
});
JButton cancelButton = new JButton("Cancel");
cancelButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
frame.setVisible(false);
}
});
gridC.gridx = 0;
gridC.gridy = 0;
// resize scroll pane:
gridC.weightx = 1;
gridC.weighty = 1;
gridC.fill = GridBagConstraints.HORIZONTAL;
JScrollPane scrollPane = new JScrollPane(editPane);
scrollPane.setPreferredSize(editPane.getPreferredSize());
gridBagLayout.setConstraints(scrollPane, gridC);
frame.getContentPane().add(scrollPane);
gridC.gridy = 1;
// do not resize buttons:
gridC.weightx = 0;
gridC.weighty = 0;
JPanel buttonPanel = new JPanel();
buttonPanel.setLayout(new FlowLayout());
buttonPanel.add(saveButton);
buttonPanel.add(cancelButton);
gridBagLayout.setConstraints(buttonPanel, gridC);
frame.getContentPane().add(buttonPanel);
frame.pack();
frame.setVisible(true);
do {
try {
Thread.sleep(10);
} catch (InterruptedException e) {
}
} while (frame.isVisible());
frame.dispose();
}
protected void defineReplacementInfo(String text) {
int choice = JOptionPane.showOptionDialog(null, "Error applying replacements: Syntax error in line \"" + text + "\"",
"Error in replacement definition", JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null,
new String[] { "Correct", "Cancel" }, null);
if (choice == 0)
defineReplacementWindow();
}
/**
* Try to automatically correct misalignment caused by pauses: If there is a pause in the label file and not in the feature
* file, it is removed in the label file. If there is a pause in the feature file and not in the label file, a pause of length
* zero is inserted in the label file
*
*
* @return the number of problems remaining
* @throws IOException
* IOException
*/
protected int correctPauses() throws IOException {
correctedPauses = true;
// clear the list of problems
problems = new TreeMap();
// go through all files
for (int l = 0; l < bnl.getLength(); l++) {
percent = 100 * l / bnl.getLength();
String basename = bnl.getName(l);
System.out.print(" " + basename);
String line;
BufferedReader labels;
try {
labels = new BufferedReader(new InputStreamReader(new FileInputStream(new File(labDir + basename + labExt)),
"UTF-8"));
} catch (FileNotFoundException fnfe) {
continue;
}
// store header of label file in StringBuffer
StringBuilder labelFileHeader = new StringBuilder();
boolean foundHeader = false;
while ((line = labels.readLine()) != null) {
labelFileHeader.append(line + "\n");
if (line.startsWith("#")) {
foundHeader = true;
break; // line starting with "#" marks end of header
}
}
if (!foundHeader) {
throw new IOException("File " + labDir + basename + labExt + " does not contain a file header!");
}
// store units of label file in List
List labelUnits = new ArrayList();
while ((line = labels.readLine()) != null) {
labelUnits.add(line + "\n");
}
BufferedReader features;
try {
features = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(featsDir + basename + featsExt)), "UTF-8"));
} catch (FileNotFoundException fnfe) {
continue;
}
while ((line = features.readLine()) != null) {
if (line.trim().equals(""))
break; // empty line marks end of header
}
// store text units of feature file in list
List featureUnits = new ArrayList();
while ((line = features.readLine()) != null) {
if (line.trim().equals(""))
break; // empty line marks end of section
featureUnits.add(line);
}
labels.close();
features.close();
ArrayList labelUnitData;
String labelUnit;
String featureUnit;
String returnString = null;
int numLabelUnits = labelUnits.size();
int numFeatureUnits = featureUnits.size();
int i = 0, j = 0;
while (i < numLabelUnits && j < numFeatureUnits) {
// System.out.println("featureUnit : "+featureUnit
// +" labelUnit : "+labelUnit);
labelUnitData = getLabelUnitData(labelUnits.get(i));
labelUnit = labelUnitData.get(2);
featureUnit = getFeatureUnit(featureUnits.get(j));
if (!featureUnit.equals(labelUnit)) {
if (featureUnit.equals("_")) {
// add pause in labels
System.out.println(" Adding pause unit in labels before unit " + i);
String pauseUnit;
if (i - 1 >= 0) {
ArrayList previousUnitData = getLabelUnitData(labelUnits.get(i - 1));
pauseUnit = (String) previousUnitData.get(0) + " " + (String) labelUnitData.get(1) + " _\n";
} else {
pauseUnit = "0.00000 " + (String) labelUnitData.get(1) + " _\n";
}
labelUnits.add(i, pauseUnit);
i++;
j++;
numLabelUnits = labelUnits.size();
continue;
} else if (featureUnit.equals("__L")) {
// add two pause units in labels
System.out.println(" Adding pause units in labels before unit " + i);
String pauseUnit;
if (i - 1 >= 0) {
ArrayList previousUnitData = getLabelUnitData(labelUnits.get(i - 1));
pauseUnit = previousUnitData.get(0) + " " + (String) labelUnitData.get(1) + " __L\n";
labelUnits.add(i, pauseUnit);
i++;
pauseUnit = (String) previousUnitData.get(0) + " " + (String) labelUnitData.get(1) + " __R\n";
labelUnits.add(i, pauseUnit);
} else {
// this is the first label-unit
pauseUnit = "0.00000 " + (String) labelUnitData.get(1) + " __L\n";
labelUnits.add(i, pauseUnit);
i++;
pauseUnit = "0.00000 " + (String) labelUnitData.get(1) + " __R\n";
labelUnits.add(i, pauseUnit);
}
i++;
j += 2;
numLabelUnits = labelUnits.size();
continue;
} else if (labelUnit.equals("_")) {
// remove pause in labels
System.out.println(" Removing pause unit in labels at index " + i);
labelUnits.remove(i);
numLabelUnits = labelUnits.size();
continue;
} else if (labelUnit.equals("__L")) {
// remove two pause units in labels
System.out.println(" Removing pause units in labels at index " + i);
if (i - 1 >= 0) {
// lengthen the unit before the pause
ArrayList previousUnitData = getLabelUnitData(labelUnits.get(i - 1));
labelUnits.set(i - 1, (String) labelUnitData.get(0) + " " + (String) previousUnitData.get(1) + " "
+ (String) previousUnitData.get(2) + "\n");
}
// remove the pauses
labelUnits.remove(i);
labelUnits.remove(i);
numLabelUnits = labelUnits.size();
continue;
} else {
// truly not matching
if (returnString == null) {
// only remember the first mismatch
int unitIndex = i - 1;
returnString = " Non-matching units found: feature file '" + featureUnit + "' vs. label file '"
+ labelUnit + "' (Unit " + unitIndex + ")";
}
}
}
// increase both counters if you did not delete a pause
i++;
j++;
}
if (numLabelUnits < numFeatureUnits) {
// check if the final pause is missing in the label file
featureUnit = getFeatureUnit((String) featureUnits.get(numFeatureUnits - 1));
labelUnitData = getLabelUnitData((String) labelUnits.get(numLabelUnits - 1));
labelUnit = (String) labelUnitData.get(2);
// add a pause at the end of label file
if (featureUnit.equals("_") && numLabelUnits + 1 == numFeatureUnits) {
String lastFeatureUnit = getFeatureUnit((String) featureUnits.get(numFeatureUnits - 2));
if (lastFeatureUnit.equals(labelUnit)) {
// add pause at the end
System.out.println(" Adding pause unit in labels after last unit");
String pauseUnit = (String) labelUnitData.get(0) + " " + numLabelUnits + " _\n";
labelUnits.add(pauseUnit);
numLabelUnits = labelUnits.size();
}
} else if (featureUnit.equals("__R") && numLabelUnits + 2 == numFeatureUnits) {
String lastFeatureUnit = getFeatureUnit((String) featureUnits.get(numFeatureUnits - 3));
if (lastFeatureUnit.equals(labelUnit)) {
// add two pause units at the end of label file
System.out.println(" Adding pause units in labels after last unit");
int unitIndex = numLabelUnits - 1;
String pauseUnit = (String) labelUnitData.get(0) + " " + unitIndex + " __L\n";
labelUnits.add(pauseUnit);
pauseUnit = (String) labelUnitData.get(0) + " " + numLabelUnits + " __R\n";
labelUnits.add(pauseUnit);
numLabelUnits = labelUnits.size();
}
} else { // feature file is truly longer than label file
if (returnString == null) {
returnString = " Feature file is longer than label file: " + " unit " + numLabelUnits
+ " and greater do not exist in label file";
}
}
}
// return an error if label file is longer than feature file
if (returnString == null && numLabelUnits > numFeatureUnits) {
returnString = " Label file is longer than feature file: " + " unit " + numFeatureUnits
+ " and greater do not exist in feature file";
}
// now overwrite the label file
PrintWriter labelFileWriter = new PrintWriter(new FileWriter(new File(labDir + basename + labExt)));
// print header
labelFileWriter.print(labelFileHeader.toString());
// print units
numLabelUnits = labelUnits.size();
for (int k = 0; k < numLabelUnits; k++) {
String nextUnit = labelUnits.get(k);
if (nextUnit != null) {
// correct the unit index
ArrayList nextUnitData = getLabelUnitData(nextUnit);
labelFileWriter.println(nextUnitData.get(0) + " " + k + " " + nextUnitData.get(2));
}
}
labelFileWriter.flush();
labelFileWriter.close();
// returnString is null if all units matched,
// otherwise the first error is given back
if (returnString == null) {
System.out.println(" OK");
} else {
problems.put(basename, returnString);
System.out.println(returnString);
}
}
System.out.println("Remaining problems: " + problems.size());
return problems.size();
}
protected void defineReplacements(String text) throws Exception {
/* read the replacements into a map */
Map phone2Replace = new HashMap();
String error = null;
String[] textlines = text.split("\n");
for (int i = 0; i < textlines.length; i++) {
if (!textlines[i].startsWith("#") && !textlines[i].equals("")) {
StringTokenizer tok = new StringTokenizer(textlines[i].trim());
try {
phone2Replace.put(tok.nextToken(), tok.nextToken());
} catch (NoSuchElementException nsee) {
error = textlines[i];
break;
}
}
}
if (error != null) {
// wait = true;
// TODO: Does not work properly
defineReplacementInfo(error);
} else {
/*
* go through the problems and try to replace the phones in the labels with the specified replacements
*/
// clear the list of problems
problems = new TreeMap();
// go through all files
for (int l = 0; l < bnl.getLength(); l++) {
percent = 100 * l / bnl.getLength();
String basename = bnl.getName(l);
System.out.print(" " + basename);
String line;
BufferedReader labels;
try {
labels = new BufferedReader(new InputStreamReader(new FileInputStream(new File(labDir + basename + labExt)),
"UTF-8"));
} catch (FileNotFoundException fnfe) {
continue;
}
// store header of label file in StringBuffer
StringBuilder labelFileHeader = new StringBuilder();
while ((line = labels.readLine()) != null) {
labelFileHeader.append(line + "\n");
if (line.startsWith("#"))
break; // line starting with "#" marks end of header
}
// store units of label file in List
List labelUnits = new ArrayList();
while ((line = labels.readLine()) != null) {
labelUnits.add(line + "\n");
}
BufferedReader features;
try {
features = new BufferedReader(new InputStreamReader(new FileInputStream(new File(featsDir + basename
+ featsExt)), "UTF-8"));
} catch (FileNotFoundException fnfe) {
continue;
}
while ((line = features.readLine()) != null) {
if (line.trim().equals(""))
break; // empty line marks end of header
}
// store text units of feature file in list
List featureUnits = new ArrayList();
while ((line = features.readLine()) != null) {
if (line.trim().equals(""))
break; // empty line marks end of section
featureUnits.add(line);
}
ArrayList labelUnitData;
String labelUnit;
String featureUnit;
String returnString = null;
int numLabelUnits = labelUnits.size();
int numFeatureUnits = featureUnits.size();
int i = 0, j = 0;
boolean alteredLabel = false;
while (i < numLabelUnits && j < numFeatureUnits) {
// System.out.println("featureUnit : "+featureUnit
// +" labelUnit : "+labelUnit);
labelUnitData = getLabelUnitData((String) labelUnits.get(i));
labelUnit = (String) labelUnitData.get(2);
featureUnit = getFeatureUnit((String) featureUnits.get(j));
if (!featureUnit.equals(labelUnit)) {
// try to replace label Unit
if (phone2Replace.containsKey(labelUnit)) {
System.out.print(" Replacing " + labelUnit);
String newlabelUnit = (String) phone2Replace.get(labelUnit);
System.out.print(" with " + newlabelUnit + "... ");
if (featureUnit.equals(newlabelUnit)) {
labelUnits.remove(i);
labelUnits.add(i, (String) labelUnitData.get(0) + " " + (String) labelUnitData.get(1) + " "
+ newlabelUnit);
alteredLabel = true;
System.out.print("successful!\n");
i++;
j++;
continue;
}
System.out.print("failed!\n");
}
// else we have a problem
if (returnString == null) {
// only remember the the first mismatch
int unitIndex = i;
returnString = " Non-matching units found: feature file '" + featureUnit + "' vs. label file '"
+ labelUnit + "' (Unit " + unitIndex + ")";
}
}
i++;
j++;
}
// return an error if label file is longer than feature file
if (returnString == null && numLabelUnits > numFeatureUnits) {
returnString = " Label file is longer than feature file: " + " unit " + numFeatureUnits
+ " and greater do not exist in feature file";
}
if (alteredLabel) {
// overwrite the label file
PrintWriter labelFileWriter = new PrintWriter(new FileWriter(new File(labDir + basename + labExt)));
// print header
labelFileWriter.print(labelFileHeader.toString());
// print units
numLabelUnits = labelUnits.size();
for (int k = 0; k < numLabelUnits; k++) {
String nextUnit = (String) labelUnits.get(k);
if (nextUnit != null) {
// correct the unit index
ArrayList nextUnitData = getLabelUnitData(nextUnit);
labelFileWriter.print((String) nextUnitData.get(0) + " " + k + " " + (String) nextUnitData.get(2)
+ "\n");
}
}
labelFileWriter.flush();
labelFileWriter.close();
}
// returnString is null if all units matched,
// otherwise the first error is given back
if (returnString == null) {
System.out.println(" OK");
} else {
problems.put(basename, returnString);
System.out.println(returnString);
}
}
System.out.println("Remaining problems: " + problems.size());
// wait = false;
}
}
/**
* Verify if the feature and label files for basename align OK. This method should be called after firstVerifyAlignment for
* subsequent alignment tries.
*
* @param basename
* basename
* @return null if the alignment was OK, or a String containing an error message.
* @throws IOException
* IOException
*/
protected String verifyAlignment(String basename) throws IOException {
BufferedReader labels = null;
BufferedReader features = null;
try {
try {
labels = new BufferedReader(new InputStreamReader(new FileInputStream(new File(labDir + basename + labExt)),
"UTF-8"));
} catch (FileNotFoundException fnfe) {
return "No label file " + labDir + basename + labExt;
}
try {
features = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(featsDir + basename + featsExt)), "UTF-8"));
} catch (FileNotFoundException fnfe) {
return "No feature file " + featsDir + basename + featsExt;
}
String line;
// Skip label file header:
while ((line = labels.readLine()) != null) {
if (line.startsWith("#"))
break; // line starting with "#" marks end of header
}
// Skip features file header:
while ((line = features.readLine()) != null) {
if (line.trim().equals(""))
break; // empty line marks end of header
}
// Now go through all feature file units
boolean correct = true;
int unitIndex = 0;
while (correct) {
line = labels.readLine();
String labelUnit = null;
if (line != null) {
List labelUnitData = getLabelUnitData(line);
labelUnit = (String) labelUnitData.get(2);
unitIndex = Integer.parseInt((String) labelUnitData.get(1));
}
String featureUnit = getFeatureUnit(features);
if (featureUnit == null)
throw new IOException("Incomplete feature file: " + basename);
// when featureUnit is the empty string, we have found an empty line == end of feature section
if ("".equals(featureUnit)) {
if (labelUnit == null) {
// we have reached the end in both labels and features
break;
} else {
// label file is longer than feature file
return "Label file is longer than feature file: " + " unit " + unitIndex
+ " and greater do not exist in feature file";
}
}
if (labelUnit == null) {
// feature file is longer than label file
unitIndex++;
return "Feature file is longer than label file: " + " unit " + unitIndex
+ " and greater do not exist in label file";
}
if (!featureUnit.equals(labelUnit)) {
// label and feature unit do not match
return "Non-matching units found: feature file '" + featureUnit + "' vs. label file '" + labelUnit
+ "' (Unit " + unitIndex + ")";
}
}
} finally {
if (labels != null)
labels.close();
if (features != null)
features.close();
}
return null; // success
}
private ArrayList getLabelUnitData(String line) throws IOException {
if (line == null)
return null;
ArrayList unitData = new ArrayList();
StringTokenizer st = new StringTokenizer(line.trim());
// the first token is the time
unitData.add(st.nextToken());
// the second token is the unit index
unitData.add(st.nextToken());
// the third token is the phone
unitData.add(st.nextToken());
return unitData;
}
private String getFeatureUnit(String line) throws IOException {
if (line == null)
return null;
if (line.trim().equals(""))
return ""; // empty line -- signal end of section
StringTokenizer st = new StringTokenizer(line.trim());
// The expect that the first token in each line is the label
return st.nextToken();
}
private String getLabelUnit(BufferedReader labelReader) throws IOException {
String line = labelReader.readLine();
if (line == null)
return null;
StringTokenizer st = new StringTokenizer(line.trim());
// The third token in each line is the label
st.nextToken();
st.nextToken();
return st.nextToken();
}
private String getFeatureUnit(BufferedReader featureReader) throws IOException {
String line = featureReader.readLine();
if (line == null)
return null;
if (line.trim().equals(""))
return ""; // empty line -- signal end of section
StringTokenizer st = new StringTokenizer(line.trim());
// The expect that the first token in each line is the label
return st.nextToken();
}
protected int letUserCorrect(String basename, String errorMessage) throws Exception {
String[] options;
/*
* if (correctedPauses){ options = new String[] {"Edit RAWMARYXML", "Edit unit labels", "Remove from list",
* "Remove all problems", "Skip", "Skip all","Replace labels in unit file","Define replacements"}; } else { options = new
* String[] {"Edit RAWMARYXML", "Edit unit labels", "Remove from list", "Remove all problems", "Skip",
* "Skip all","Define replacements"}; }
*/
if (correctedPauses) {
options = new String[] { "Edit RAWMARYXML", "Edit unit labels", "Remove from list", "Remove all problems", "Skip",
"Skip all" };
} else {
options = new String[] { "Edit RAWMARYXML", "Edit unit labels", "Remove from list", "Remove all problems", "Skip",
"Skip all" };
}
int choice;
try {
choice = JOptionPane.showOptionDialog(null, "Misalignment problem for " + basename + ":\n" + errorMessage,
"Correct alignment for " + basename, JOptionPane.YES_NO_CANCEL_OPTION, JOptionPane.QUESTION_MESSAGE, null,
options, null);
} catch (HeadlessException e) {
return SKIP;
}
switch (choice) {
case 0:
editMaryXML(basename);
return TRYAGAIN;
case 1:
editUnitLabels(basename);
return TRYAGAIN;
case 2:
return REMOVE;
case 3:
return REMOVEALL;
case 4:
return SKIP;
case 5:
return SKIPALL;
/**
* case 6: if (correctedPauses){ replaceUnitLabels(basename); } else { defineReplacementWindow(); } return TRYAGAIN;
* case 7: defineReplacementWindow(); return TRYAGAIN;
**/
default: // JOptionPane.CLOSED_OPTION
return SKIP; // don't verify again.
}
}
/**
* Replace all label units which do not match the feature units with the feature units This method should only be called after
* automatic pause alignment.
*
* @param basename
* the filename of the label/feature file
* @throws IOException
* IOException
*/
private void replaceUnitLabels(String basename) throws IOException {
String line;
BufferedReader labels = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(labDir + basename + labExt)), "UTF-8"));
// store header of label file in StringBuffer
StringBuilder labelFileHeader = new StringBuilder();
while ((line = labels.readLine()) != null) {
labelFileHeader.append(line + "\n");
if (line.startsWith("#"))
break; // line starting with "#" marks end of header
}
// store units of label file in List
List labelUnits = new ArrayList();
while ((line = labels.readLine()) != null) {
labelUnits.add(line + "\n");
}
BufferedReader features = new BufferedReader(new InputStreamReader(new FileInputStream(new File(featsDir + basename
+ featsExt)), "UTF-8"));
while ((line = features.readLine()) != null) {
if (line.trim().equals(""))
break; // empty line marks end of header
}
// store text units of feature file in list
List featureTextUnits = new ArrayList();
while ((line = features.readLine()) != null) {
if (line.trim().equals(""))
break; // empty line marks end of section
featureTextUnits.add(line);
}
labels.close();
features.close();
ArrayList labelUnitData;
String labelUnit;
String featureUnit;
String returnString = null;
int numLabelUnits = labelUnits.size();
int numFeatureUnits = featureTextUnits.size();
int i = 0, j = 0;
while (i < numLabelUnits && j < numFeatureUnits) {
// System.out.println("featureUnit : "+featureUnit
// +" labelUnit : "+labelUnit);
labelUnitData = getLabelUnitData((String) labelUnits.get(i));
labelUnit = (String) labelUnitData.get(2);
featureUnit = getFeatureUnit((String) featureTextUnits.get(j));
if (!featureUnit.equals(labelUnit)) {
// take over label of feature file
labelUnits.set(i, (String) labelUnitData.get(0) + " " + (String) labelUnitData.get(1) + " " + featureUnit + "\n");
}
i++;
j++;
}
// now overwrite the label file
PrintWriter labelFileWriter = new PrintWriter(new FileWriter(new File(labDir + basename + labExt)));
// print header
labelFileWriter.print(labelFileHeader.toString());
// print units
numLabelUnits = labelUnits.size();
for (int k = 0; k < numLabelUnits; k++) {
String nextUnit = (String) labelUnits.get(k);
if (nextUnit != null) {
// correct the unit index
ArrayList nextUnitData = getLabelUnitData(nextUnit);
labelFileWriter.print((String) nextUnitData.get(0) + " " + k + " " + (String) nextUnitData.get(2) + "\n");
}
}
labelFileWriter.flush();
labelFileWriter.close();
}
private void editMaryXML(String basename) throws Exception {
final File maryxmlFile = new File(db.getProp(db.MARYXMLDIR) + basename + db.getProp(db.MARYXMLEXT));
if (!maryxmlFile.exists()) {
// need to create it
String text = FileUtils
.getFileAsString(new File(db.getProp(db.TEXTDIR) + basename + db.getProp(db.TEXTEXT)), "UTF-8");
PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(maryxmlFile), "UTF-8"));
pw.println(PhoneUnitFeatureComputer.getMaryXMLHeaderWithInitialBoundary(db.getProp(db.LOCALE)));
pw.println(text);
pw.println("");
pw.close();
}
boolean edited = new EditFrameShower(maryxmlFile).display();
if (edited) {
allophoneExtractor.generateAllophonesFile(basename);
try {
transcriptionAligner.alignTranscription(basename);
} catch (Exception e) {
e.printStackTrace();
}
labelComputer.computePhoneLabel(basename);
featureComputer.computeFeaturesFor(basename);
}
}
private void editUnitLabels(String basename) throws IOException {
new EditFrameShower(new File(labDir + basename + labExt)).display();
}
public static void main(String[] args) throws Exception {
PhoneLabelFeatureAligner lfa = new PhoneLabelFeatureAligner();
new DatabaseLayout(lfa);
boolean isAligned = lfa.compute();
System.out.println("The database is " + (isAligned ? "" : "NOT") + " perfectly aligned");
}
public static class EditFrameShower {
protected final File file;
protected boolean saved;
public EditFrameShower(File file) {
this.file = file;
this.saved = false;
}
/**
* Show a frame allowing the user to edit the file.
*
*
*
* @return a boolean indicating whether the file was saved.
* @throws IOException
* IOException
* @throws UnsupportedEncodingException
* UnsupportedEncodingException
* @throws FileNotFoundException
* FileNotFoundException
*/
public boolean display() throws IOException, UnsupportedEncodingException, FileNotFoundException {
final JFrame frame = new JFrame("Edit " + file.getName());
GridBagLayout gridBagLayout = new GridBagLayout();
GridBagConstraints gridC = new GridBagConstraints();
frame.getContentPane().setLayout(gridBagLayout);
final JEditorPane editPane = new JEditorPane();
editPane.setPreferredSize(new Dimension(500, 500));
editPane.read(new InputStreamReader(new FileInputStream(file), "UTF-8"), null);
JButton saveButton = new JButton("Save & Exit");
saveButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
try {
PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
editPane.write(pw);
pw.flush();
pw.close();
frame.setVisible(false);
setSaved(true);
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
});
JButton cancelButton = new JButton("Cancel");
cancelButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
frame.setVisible(false);
setSaved(false);
}
});
gridC.gridx = 0;
gridC.gridy = 0;
// resize scroll pane:
gridC.weightx = 1;
gridC.weighty = 1;
gridC.fill = GridBagConstraints.HORIZONTAL;
JScrollPane scrollPane = new JScrollPane(editPane);
scrollPane.setPreferredSize(editPane.getPreferredSize());
gridBagLayout.setConstraints(scrollPane, gridC);
frame.getContentPane().add(scrollPane);
gridC.gridy = 1;
// do not resize buttons:
gridC.weightx = 0;
gridC.weighty = 0;
JPanel buttonPanel = new JPanel();
buttonPanel.setLayout(new FlowLayout());
buttonPanel.add(saveButton);
buttonPanel.add(cancelButton);
gridBagLayout.setConstraints(buttonPanel, gridC);
frame.getContentPane().add(buttonPanel);
frame.pack();
frame.setVisible(true);
do {
try {
Thread.sleep(10); // OK, this is ugly, but I don't mind today...
} catch (InterruptedException e) {
}
} while (frame.isVisible());
frame.dispose();
return saved;
}
protected void setSaved(boolean saved) {
this.saved = saved;
}
}
/**
* Provide the progress of computation, in percent, or -1 if that feature is not implemented.
*
* @return -1 if not implemented, or an integer between 0 and 100.
*/
public int getProgress() {
return percent;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy