marytts.util.data.text.BasenameClassificationDefinitionFileReader Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2010 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.util.data.text;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.LinkedHashMap;
import org.apache.oro.text.GlobCompiler;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.Perl5Matcher;
/**
* Reader for basename classification definition file, which allows mapping of basenames to classification strings in a lazy way,
* using glob expressions.
*
* Definition file format:
*
* - lines with leading # are comments and are ignored
*
- empty lines are ignored
*
- every other line must conform to the format
*
*
* GLOB = class
*
*
* where GLOB is a glob expression and class is a classification string. Within the glob, the following
* characters have special meaning:
*
* - *
* - zero or more characters
* - ?
* - any one character
*
*
* The idea is that a line like
*
*
* foo_* = bar
*
*
* in the classification definition file will cause all basenames starting with foo_ to be handled as belonging to the
* "bar" class. One scenario where this is useful is the classification of prompts by speaking style in a multi-style voice
* database.
*
* @author steiner
*
*/
public class BasenameClassificationDefinitionFileReader {
protected BufferedReader reader;
public boolean fileOK = true;
protected LinkedHashMap styleDefinitions = new LinkedHashMap();
/**
* constructor to call main constructor with a filename String
*
* @param filename
* as a String
* @throws IOException
* IOException
*/
public BasenameClassificationDefinitionFileReader(String filename) throws IOException {
this(new FileReader(filename));
}
/**
* main constructor
*
* @param reader
* as a Reader
* @throws IOException
* IOException
*/
public BasenameClassificationDefinitionFileReader(Reader reader) throws IOException {
this.reader = new BufferedReader(reader);
parseDefinitionFile();
}
/**
* parse style definition file (see class documentation above for format), putting <glob expression, style string> pairs
* in styleDefinitions
*
* @throws IOException
* IOException
*/
private void parseDefinitionFile() throws IOException {
String line;
String globString;
String styleString;
GlobCompiler glob = new GlobCompiler();
Pattern globPattern;
// read lines...
while ((line = reader.readLine()) != null) {
// ...trimming whitespace:
line = line.trim();
// ignore lines that are empty or start with #:
if (line.equals("") || line.startsWith("#")) {
continue;
} else {
// split lines into fields
String[] fields = line.split("=");
try {
globString = fields[0].trim();
styleString = fields[1].trim();
} catch (IndexOutOfBoundsException iob) {
System.err.println("Warning: could not parse line: " + line);
fileOK = false;
continue;
}
// create GlobCompiler for glob expression:
try {
globPattern = glob.compile(globString);
} catch (MalformedPatternException mpe) {
System.err.println("Warning: could not parse line: ");
fileOK = false;
continue;
}
// put (glob expression, style string) pair in styleDefinions:
styleDefinitions.put(globPattern, styleString);
}
}
if (styleDefinitions.isEmpty()) {
System.err.println("Warning: no style definitions were found!");
}
}
/**
* match basename against the glob expressions in styleDefinitions
*
* @param basename
* basename
* @return style String of first matching glob expression, or empty String if no glob matches
*/
public String getValue(String basename) {
Perl5Matcher globMatcher = new Perl5Matcher();
String style = "";
for (Pattern globPattern : styleDefinitions.keySet()) {
if (globMatcher.matches(basename, globPattern)) {
style = styleDefinitions.get(globPattern);
break; // enable this line to change behavior to return style of *first* matching glob expr
// return style; // enable this line to change behavior to return style of *last* matching glob expr
}
}
// no globPattern in styleDefinitions matched... return empty string:
return style;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy