All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.util.data.text.BasenameClassificationDefinitionFileReader Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */

package marytts.util.data.text;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.LinkedHashMap;

import org.apache.oro.text.GlobCompiler;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.Perl5Matcher;

/**
 * Reader for basename classification definition file, which allows mapping of basenames to classification strings in a lazy way,
 * using glob expressions.
 * 

* Definition file format: *

    *
  1. lines with leading # are comments and are ignored *
  2. empty lines are ignored *
  3. every other line must conform to the format * *
     * GLOB = class
     * 
    * * where GLOB is a glob expression and class is a classification string. Within the glob, the following * characters have special meaning: *
    *
    *
    *
    zero or more characters
    *
    ?
    *
    any one character
    *
    *
* The idea is that a line like * *
 *   foo_* = bar
 * 
* * in the classification definition file will cause all basenames starting with foo_ to be handled as belonging to the * "bar" class. One scenario where this is useful is the classification of prompts by speaking style in a multi-style voice * database. * * @author steiner * */ public class BasenameClassificationDefinitionFileReader { protected BufferedReader reader; public boolean fileOK = true; protected LinkedHashMap styleDefinitions = new LinkedHashMap(); /** * constructor to call main constructor with a filename String * * @param filename * as a String * @throws IOException * IOException */ public BasenameClassificationDefinitionFileReader(String filename) throws IOException { this(new FileReader(filename)); } /** * main constructor * * @param reader * as a Reader * @throws IOException * IOException */ public BasenameClassificationDefinitionFileReader(Reader reader) throws IOException { this.reader = new BufferedReader(reader); parseDefinitionFile(); } /** * parse style definition file (see class documentation above for format), putting <glob expression, style string> pairs * in styleDefinitions * * @throws IOException * IOException */ private void parseDefinitionFile() throws IOException { String line; String globString; String styleString; GlobCompiler glob = new GlobCompiler(); Pattern globPattern; // read lines... while ((line = reader.readLine()) != null) { // ...trimming whitespace: line = line.trim(); // ignore lines that are empty or start with #: if (line.equals("") || line.startsWith("#")) { continue; } else { // split lines into fields String[] fields = line.split("="); try { globString = fields[0].trim(); styleString = fields[1].trim(); } catch (IndexOutOfBoundsException iob) { System.err.println("Warning: could not parse line: " + line); fileOK = false; continue; } // create GlobCompiler for glob expression: try { globPattern = glob.compile(globString); } catch (MalformedPatternException mpe) { System.err.println("Warning: could not parse line: "); fileOK = false; continue; } // put (glob expression, style string) pair in styleDefinions: styleDefinitions.put(globPattern, styleString); } } if (styleDefinitions.isEmpty()) { System.err.println("Warning: no style definitions were found!"); } } /** * match basename against the glob expressions in styleDefinitions * * @param basename * basename * @return style String of first matching glob expression, or empty String if no glob matches */ public String getValue(String basename) { Perl5Matcher globMatcher = new Perl5Matcher(); String style = ""; for (Pattern globPattern : styleDefinitions.keySet()) { if (globMatcher.matches(basename, globPattern)) { style = styleDefinitions.get(globPattern); break; // enable this line to change behavior to return style of *first* matching glob expr // return style; // enable this line to change behavior to return style of *last* matching glob expr } } // no globPattern in styleDefinitions matched... return empty string: return style; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy