marytts.datatypes.MaryData Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2000-2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
*/
package marytts.datatypes;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Arrays;
import java.util.Locale;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import marytts.modules.synthesis.Voice;
import marytts.server.MaryProperties;
import marytts.util.MaryUtils;
import marytts.util.data.audio.AppendableSequenceAudioInputStream;
import marytts.util.data.audio.SequenceAudioInputStream;
import marytts.util.dom.DomUtils;
import marytts.util.dom.MaryNormalisedWriter;
import marytts.util.io.FileUtils;
import marytts.util.io.ReaderSplitter;
import marytts.util.string.StringUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.google.common.base.Objects;
/**
* A representation of any type of mary data, be it input, intermediate or output data. The "technical" representation of the read
* data is hidden from the caller, but can be accessed on request. Internally, the data is appropriately represented according to
* this data's type, i.e. as a String containing plain text, an XML DOM tree, or an input stream containing audio data.
*
* @author Marc Schröder
*/
public class MaryData {
private MaryDataType type;
private Locale locale;
private String outputParams = null;
// Only one of the following data representations should be non-null
// for a given instance; which one depends on our type.
private Document xmlDocument = null;
private String plainText = null;
private AudioInputStream audio = null;
private AudioFileFormat audioFileFormat = null;
private Logger logger = MaryUtils.getLogger("IO");
// for plainText, allow additional information:
private Voice defaultVoice = null;
private String defaultStyle = "";
private String defaultEffects = "";
// The following XML I/O helpers are only initialised
// if actually needed.
private MaryNormalisedWriter writer = null;
private boolean doValidate;
private boolean doWarnClient = false;
public MaryData(MaryDataType type, Locale locale) {
this(type, locale, false);
}
public MaryData(MaryDataType type, Locale locale, boolean createStubDocument) {
if (type == null)
throw new NullPointerException("Received null type for MaryData");
this.type = type;
this.locale = locale;
// The following is the default setting for module output (we suppose
// that for the input data, setValidating() is called as appropriate):
doValidate = MaryProperties.getBoolean("maryxml.validate.modules", false);
if (createStubDocument && type.isMaryXML()) {
xmlDocument = MaryXML.newDocument();
}
}
public boolean getValidating() {
return doValidate;
}
public void setValidating(boolean doValidate) throws ParserConfigurationException {
this.doValidate = doValidate;
}
@Deprecated
public boolean getWarnClient() {
return doWarnClient;
}
@Deprecated
public void setWarnClient(boolean doWarnClient) {
}
public MaryDataType getType() {
return type;
}
public Locale getLocale() {
return locale;
}
/**
* Read data from input stream is
, in the appropriate way as determined by our type
.
*
* @param is
* is
* @throws ParserConfigurationException
* ParserConfigurationException
* @throws SAXException
* SAXException
* @throws IOException
* IOException
* @throws TransformerConfigurationException
* TransformerConfigurationException
* @throws TransformerException
* TransformerException
*/
public void readFrom(InputStream is) throws ParserConfigurationException, SAXException, IOException,
TransformerConfigurationException, TransformerException {
readFrom(is, null);
}
/**
* Read data from input stream is
, in the appropriate way as determined by our type
.
*
* @param is
* the InputStream from which to read.
* @param endMarker
* a string marking end of file. If this is null, read until end-of-file; if it is non-null, read up to (and
* including) the first line containing the end marker string. This will be ignored for audio data.
* @throws ParserConfigurationException
* ParserConfigurationException
* @throws SAXException
* SAXException
* @throws IOException
* IOException
* @throws TransformerConfigurationException
* TransformerConfigurationException
* @throws TransformerException
* TransformerException
*/
public void readFrom(InputStream is, String endMarker) throws ParserConfigurationException, SAXException, IOException,
TransformerConfigurationException, TransformerException {
if (type.isXMLType() || type.isTextType())
readFrom(new InputStreamReader(is, "UTF-8"), endMarker);
else { // audio
// ignore endMarker
setAudio((AudioInputStream) is);
}
}
/**
* Read data from reader r
in the appropriate way as determined by our type
. Only XML and Text data
* can be read from a reader, audio data cannot.
*
* @param from
* from
* @throws ParserConfigurationException
* ParserConfigurationException
* @throws SAXException
* SAXException
* @throws IOException
* IOException
*/
public void readFrom(Reader from) throws ParserConfigurationException, SAXException, IOException {
String inputData = FileUtils.getReaderAsString(from);
setData(inputData);
}
/**
* Read data from reader r
in the appropriate way as determined by our type
. Only XML and Text data
* can be read from a reader, audio data cannot. "Helpers" needed to read the data, such as XML parser objects, are created
* when they are needed. If doWarnClient is set to true, warning and error messages related to XML parsing are logged to the
* log category connected to the client from which this request originated.
*
* @param from
* the Reader from which to read.
* @param endMarker
* a string marking end of file. If this is null, read until end-of-file; if it is non-null, read up to (and
* including) the first line containing the end marker string.
* @throws ParserConfigurationException
* ParserConfigurationException
* @throws SAXException
* SAXException
* @throws IOException
* IOException
*/
public void readFrom(Reader from, String endMarker) throws ParserConfigurationException, SAXException, IOException {
// For the case that the data to be read it is not
// followed by end-of-file, we use a ReaderSplitter which
// provides a reader artificially "inserting" an end-of-file
// after a line containing the pattern given in endMarker.
Reader r = from;
if (endMarker != null) {
ReaderSplitter fromSplitter = new ReaderSplitter(from, endMarker);
r = fromSplitter.nextReader();
}
readFrom(r);
}
/**
* Set the content data of this MaryData object from the given String. For XML data ({@link MaryDataType#isXMLType()}), parse
* the String representation of the data into a DOM tree.
*
* @param dataString
* string representation of the input data.
* @throws ParserConfigurationException
* ParserConfigurationException
* @throws IOException
* IOException
* @throws SAXException
* SAXException
* @throws IllegalArgumentException
* if this method is called for MaryDataTypes that are neither text nor XML.
*/
public void setData(String dataString) throws ParserConfigurationException, SAXException, IOException {
// First, some data cleanup:
dataString = StringUtils.purgeNonBreakingSpaces(dataString);
// Now, deal with it.
if (type.isXMLType()) {
logger.debug("Parsing XML input (" + (doValidate ? "" : "non-") + "validating): " + dataString);
xmlDocument = DomUtils.parseDocument(dataString, doValidate);
} else if (type.isTextType()) {
logger.debug("Setting text input: " + dataString);
plainText = dataString;
} else {
throw new IllegalArgumentException("Cannot set data of type " + type + " from a string");
}
}
/**
* Write our internal representation to output stream os
, in the appropriate way as determined by our
* type
.
*
* @param os
* os
* @throws TransformerConfigurationException
* TransformerConfigurationException
* @throws FileNotFoundException
* FileNotFoundException
* @throws TransformerException
* TransformerException
* @throws IOException
* IOException
* @throws Exception
* Exception
*/
public void writeTo(OutputStream os) throws TransformerConfigurationException, FileNotFoundException, TransformerException,
IOException, Exception {
if (type.isXMLType()) {
if (writer == null)
writer = new MaryNormalisedWriter();
if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
writer.output(xmlDocument, debugOut);
logger.debug(debugOut.toString());
}
writer.output(xmlDocument, new BufferedOutputStream(os));
} else if (type.isTextType()) { // caution: XML types are text types!
writeTo(new OutputStreamWriter(os, "UTF-8"));
} else { // audio
logger.debug("Writing audio output, frame length " + audio.getFrameLength());
AudioSystem.write(audio, audioFileFormat.getType(), os);
os.flush();
os.close();
}
}
/*
* public void writeTo(HttpResponse response) throws TransformerConfigurationException, FileNotFoundException,
* TransformerException, IOException, Exception { if (type.isUtterances()) throw new
* IOException("Cannot write out utterance-based data type!");
*
* if (type.isXMLType()) { if (writer == null) writer = new MaryNormalisedWriter(); if
* (logger.getEffectiveLevel().equals(Level.DEBUG)) { ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
* writer.output(xmlDocument, debugOut); logger.debug(debugOut.toString()); }
*
* //writer.output(xmlDocument, new BufferedOutputStream(os));
*
* ByteArrayOutputStream os = new ByteArrayOutputStream(); writer.output(xmlDocument, new BufferedOutputStream(os));
* NByteArrayEntity body = new NByteArrayEntity(os.toByteArray()); body.setContentType("text/html; charset=UTF-8");
* response.setEntity(body); } else if (type.isTextType()) // caution: XML types are text types! { //writeTo(new
* OutputStreamWriter(os, "UTF-8"));
*
* ByteArrayOutputStream os = new ByteArrayOutputStream(); writeTo(new OutputStreamWriter(os, "UTF-8")); NByteArrayEntity body
* = new NByteArrayEntity(os.toByteArray()); body.setContentType("text/html; charset=UTF-8"); response.setEntity(body); } else
* // audio { logger.debug("Writing audio output, frame length "+audio.getFrameLength()); //AudioSystem.write(audio,
* audioFileFormat.getType(), os); //os.flush();
*
* ByteArrayOutputStream os = new ByteArrayOutputStream(); AudioSystem.write(audio, audioFileFormat.getType(), os);
* os.flush();
*
* MaryHttpServerUtils.toHttpResponse(os.toByteArray(), response); } }
*/
/**
* Write our internal representation to writer w
, in the appropriate way as determined by our type
.
* Only XML and Text data can be written to a writer, audio data cannot. "Helpers" needed to read the data, such as XML parser
* objects, are created when they are needed.
*
* @param w
* w
* @throws TransformerConfigurationException
* TransformerConfigurationException
* @throws FileNotFoundException
* FileNotFoundException
* @throws TransformerException
* TransformerException
* @throws IOException
* IOException
* @throws Exception
* Exception
*/
public void writeTo(Writer w) throws TransformerConfigurationException, FileNotFoundException, TransformerException,
IOException, Exception {
if (type.isXMLType()) {
throw new IOException("Better write XML data to an OutputStream, not to a Writer");
} else if (type.isTextType()) { // caution: XML types are text types!
w.write(plainText);
w.flush();
logger.debug("Writing Text output:\n" + plainText);
} else { // audio - cannot write this to a writer
throw new Exception("Illegal attempt to write audio data to a character Writer");
}
}
public Object getData() {
if (type.isXMLType()) {
return xmlDocument;
} else if (type.isTextType()) {
return plainText;
} else { // audio
return audio;
}
}
public String getPlainText() {
return plainText;
}
public void setPlainText(String plainText) {
this.plainText = plainText;
}
public Document getDocument() {
return xmlDocument;
}
public void setDocument(Document xmlDocument) {
this.xmlDocument = xmlDocument;
}
public AudioInputStream getAudio() {
return audio;
}
/**
* Set the audio data. This will discard any previously set audio data. If audio data is to be appended, consider
* appendAudio().
*
* @param audio
* audio
*/
public void setAudio(AudioInputStream audio) {
this.audio = audio;
}
public void setDefaultVoice(Voice voice) {
if (voice == null) {
return;
}
// check that voice locale fits before accepting the voice:
Locale voiceLocale = null;
voiceLocale = voice.getLocale();
Locale docLocale = getLocale();
if (docLocale == null && getType().isXMLType() && getDocument() != null
&& getDocument().getDocumentElement().hasAttribute("xml:lang")) {
docLocale = MaryUtils.string2locale(getDocument().getDocumentElement().getAttribute("xml:lang"));
}
if (docLocale != null && voiceLocale != null
&& !(MaryUtils.subsumes(docLocale, voiceLocale) || MaryUtils.subsumes(voiceLocale, docLocale))) {
logger.warn("Voice `" + voice.getName() + "' does not match document locale `" + docLocale + "' -- ignoring!");
}
this.defaultVoice = voice;
}
public Voice getDefaultVoice() {
return defaultVoice;
}
public void setDefaultStyle(String style) {
defaultStyle = style;
}
public String getDefaultStyle() {
return defaultStyle;
}
public void setDefaultEffects(String effects) {
defaultEffects = effects;
}
public String getDefaultEffects() {
return defaultEffects;
}
/**
* The audio file format is required only for data types serving as input to modules producing AUDIO data (e.g., MBROLA data),
* as well as for the AUDIO data itself. It should be set by the calling code before passing the data to the module producing
* AUDIO data.
*
* @param audioFileFormat
* audioFileFormat
*/
public void setAudioFileFormat(AudioFileFormat audioFileFormat) {
this.audioFileFormat = audioFileFormat;
}
public AudioFileFormat getAudioFileFormat() {
return audioFileFormat;
}
public void append(MaryData md) {
if (md == null)
throw new NullPointerException("Received null marydata");
if (!md.getType().equals(this.getType()))
throw new IllegalArgumentException("Cannot append mary data of type `" + md.getType().name()
+ "' to mary data of type `" + this.getType().name() + "'");
if (getType().isXMLType()) {
NodeList kids = md.getDocument().getDocumentElement().getChildNodes();
logger.debug("Appending " + kids.getLength() + " nodes to MaryXML structure");
Element docEl = this.getDocument().getDocumentElement();
for (int i = 0; i < kids.getLength(); i++) {
docEl.appendChild(this.getDocument().importNode(kids.item(i), true));
}
} else if (getType().isTextType()) {
// Attention: XML type is a text type!
if (this.plainText == null) {
this.plainText = md.getPlainText();
} else {
this.plainText = this.plainText + "\n\n" + md.getPlainText();
}
} else if (getType().equals(MaryDataType.get("AUDIO"))) {
appendAudio(md.getAudio());
} else {
throw new UnsupportedOperationException("Cannot append two mary data items of type `" + getType() + "'");
}
}
/**
* For audio data, append more audio data to the one currently present. If no audio data is set yet, this call is equivalent
* to setAudio().
*
* @param audioToAppend
* the new audio data to append
*/
public void appendAudio(AudioInputStream audioToAppend) {
if (this.audio == null)
setAudio(audioToAppend);
else if (this.audio instanceof AppendableSequenceAudioInputStream)
((AppendableSequenceAudioInputStream) this.audio).append(audioToAppend);
else
this.audio = new SequenceAudioInputStream(this.audio.getFormat(), Arrays.asList(new AudioInputStream[] { this.audio,
audioToAppend }));
}
public void setOutputParams(String params) {
this.outputParams = params;
}
public String getOutputParams() {
return outputParams;
}
public String toString() {
return Objects.toStringHelper(this).add("type", getType()).add("locale", getLocale())
.add("output parameters", getOutputParams()).add("data", getData())
.add("document", DomUtils.serializeToString(getDocument())).add("validating", getValidating())
.add("plain text", getPlainText()).add("audio", getAudio()).add("audio file format", getAudioFileFormat())
.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy