All Downloads are FREE. Search and download functionalities are using the official Maven repository.

marytts.datatypes.MaryData Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2000-2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package marytts.datatypes;

import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.Arrays;
import java.util.Locale;

import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;

import marytts.modules.synthesis.Voice;
import marytts.server.MaryProperties;
import marytts.util.MaryUtils;
import marytts.util.data.audio.AppendableSequenceAudioInputStream;
import marytts.util.data.audio.SequenceAudioInputStream;
import marytts.util.dom.DomUtils;
import marytts.util.dom.MaryNormalisedWriter;
import marytts.util.io.FileUtils;
import marytts.util.io.ReaderSplitter;
import marytts.util.string.StringUtils;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import com.google.common.base.Objects;

/**
 * A representation of any type of mary data, be it input, intermediate or output data. The "technical" representation of the read
 * data is hidden from the caller, but can be accessed on request. Internally, the data is appropriately represented according to
 * this data's type, i.e. as a String containing plain text, an XML DOM tree, or an input stream containing audio data.
 * 
 * @author Marc Schröder
 */
public class MaryData {
	private MaryDataType type;
	private Locale locale;
	private String outputParams = null;
	// Only one of the following data representations should be non-null
	// for a given instance; which one depends on our type.
	private Document xmlDocument = null;
	private String plainText = null;
	private AudioInputStream audio = null;
	private AudioFileFormat audioFileFormat = null;
	private Logger logger = MaryUtils.getLogger("IO");

	// for plainText, allow additional information:
	private Voice defaultVoice = null;
	private String defaultStyle = "";
	private String defaultEffects = "";

	// The following XML I/O helpers are only initialised
	// if actually needed.
	private MaryNormalisedWriter writer = null;

	private boolean doValidate;
	private boolean doWarnClient = false;

	public MaryData(MaryDataType type, Locale locale) {
		this(type, locale, false);
	}

	public MaryData(MaryDataType type, Locale locale, boolean createStubDocument) {
		if (type == null)
			throw new NullPointerException("Received null type for MaryData");
		this.type = type;
		this.locale = locale;
		// The following is the default setting for module output (we suppose
		// that for the input data, setValidating() is called as appropriate):
		doValidate = MaryProperties.getBoolean("maryxml.validate.modules", false);
		if (createStubDocument && type.isMaryXML()) {
			xmlDocument = MaryXML.newDocument();
		}
	}

	public boolean getValidating() {
		return doValidate;
	}

	public void setValidating(boolean doValidate) throws ParserConfigurationException {
		this.doValidate = doValidate;
	}

	@Deprecated
	public boolean getWarnClient() {
		return doWarnClient;
	}

	@Deprecated
	public void setWarnClient(boolean doWarnClient) {
	}

	public MaryDataType getType() {
		return type;
	}

	public Locale getLocale() {
		return locale;
	}

	/**
	 * Read data from input stream is, in the appropriate way as determined by our type.
	 * 
	 * @param is
	 *            is
	 * @throws ParserConfigurationException
	 *             ParserConfigurationException
	 * @throws SAXException
	 *             SAXException
	 * @throws IOException
	 *             IOException
	 * @throws TransformerConfigurationException
	 *             TransformerConfigurationException
	 * @throws TransformerException
	 *             TransformerException
	 */
	public void readFrom(InputStream is) throws ParserConfigurationException, SAXException, IOException,
			TransformerConfigurationException, TransformerException {
		readFrom(is, null);
	}

	/**
	 * Read data from input stream is, in the appropriate way as determined by our type.
	 * 
	 * @param is
	 *            the InputStream from which to read.
	 * @param endMarker
	 *            a string marking end of file. If this is null, read until end-of-file; if it is non-null, read up to (and
	 *            including) the first line containing the end marker string. This will be ignored for audio data.
	 * @throws ParserConfigurationException
	 *             ParserConfigurationException
	 * @throws SAXException
	 *             SAXException
	 * @throws IOException
	 *             IOException
	 * @throws TransformerConfigurationException
	 *             TransformerConfigurationException
	 * @throws TransformerException
	 *             TransformerException
	 */
	public void readFrom(InputStream is, String endMarker) throws ParserConfigurationException, SAXException, IOException,
			TransformerConfigurationException, TransformerException {
		if (type.isXMLType() || type.isTextType())
			readFrom(new InputStreamReader(is, "UTF-8"), endMarker);
		else { // audio
				// ignore endMarker
			setAudio((AudioInputStream) is);
		}
	}

	/**
	 * Read data from reader r in the appropriate way as determined by our type. Only XML and Text data
	 * can be read from a reader, audio data cannot.
	 * 
	 * @param from
	 *            from
	 * @throws ParserConfigurationException
	 *             ParserConfigurationException
	 * @throws SAXException
	 *             SAXException
	 * @throws IOException
	 *             IOException
	 */
	public void readFrom(Reader from) throws ParserConfigurationException, SAXException, IOException {
		String inputData = FileUtils.getReaderAsString(from);
		setData(inputData);
	}

	/**
	 * Read data from reader r in the appropriate way as determined by our type. Only XML and Text data
	 * can be read from a reader, audio data cannot. "Helpers" needed to read the data, such as XML parser objects, are created
	 * when they are needed. If doWarnClient is set to true, warning and error messages related to XML parsing are logged to the
	 * log category connected to the client from which this request originated.
	 * 
	 * @param from
	 *            the Reader from which to read.
	 * @param endMarker
	 *            a string marking end of file. If this is null, read until end-of-file; if it is non-null, read up to (and
	 *            including) the first line containing the end marker string.
	 * @throws ParserConfigurationException
	 *             ParserConfigurationException
	 * @throws SAXException
	 *             SAXException
	 * @throws IOException
	 *             IOException
	 */
	public void readFrom(Reader from, String endMarker) throws ParserConfigurationException, SAXException, IOException {
		// For the case that the data to be read it is not
		// followed by end-of-file, we use a ReaderSplitter which
		// provides a reader artificially "inserting" an end-of-file
		// after a line containing the pattern given in endMarker.
		Reader r = from;
		if (endMarker != null) {
			ReaderSplitter fromSplitter = new ReaderSplitter(from, endMarker);
			r = fromSplitter.nextReader();
		}
		readFrom(r);

	}

	/**
	 * Set the content data of this MaryData object from the given String. For XML data ({@link MaryDataType#isXMLType()}), parse
	 * the String representation of the data into a DOM tree.
	 * 
	 * @param dataString
	 *            string representation of the input data.
	 * @throws ParserConfigurationException
	 *             ParserConfigurationException
	 * @throws IOException
	 *             IOException
	 * @throws SAXException
	 *             SAXException
	 * @throws IllegalArgumentException
	 *             if this method is called for MaryDataTypes that are neither text nor XML.
	 */
	public void setData(String dataString) throws ParserConfigurationException, SAXException, IOException {
		// First, some data cleanup:
		dataString = StringUtils.purgeNonBreakingSpaces(dataString);
		// Now, deal with it.
		if (type.isXMLType()) {
			logger.debug("Parsing XML input (" + (doValidate ? "" : "non-") + "validating): " + dataString);
			xmlDocument = DomUtils.parseDocument(dataString, doValidate);
		} else if (type.isTextType()) {
			logger.debug("Setting text input: " + dataString);
			plainText = dataString;
		} else {
			throw new IllegalArgumentException("Cannot set data of type " + type + " from a string");
		}
	}

	/**
	 * Write our internal representation to output stream os, in the appropriate way as determined by our
	 * type.
	 * 
	 * @param os
	 *            os
	 * @throws TransformerConfigurationException
	 *             TransformerConfigurationException
	 * @throws FileNotFoundException
	 *             FileNotFoundException
	 * @throws TransformerException
	 *             TransformerException
	 * @throws IOException
	 *             IOException
	 * @throws Exception
	 *             Exception
	 */
	public void writeTo(OutputStream os) throws TransformerConfigurationException, FileNotFoundException, TransformerException,
			IOException, Exception {
		if (type.isXMLType()) {
			if (writer == null)
				writer = new MaryNormalisedWriter();
			if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
				ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
				writer.output(xmlDocument, debugOut);
				logger.debug(debugOut.toString());
			}
			writer.output(xmlDocument, new BufferedOutputStream(os));
		} else if (type.isTextType()) { // caution: XML types are text types!
			writeTo(new OutputStreamWriter(os, "UTF-8"));
		} else { // audio
			logger.debug("Writing audio output, frame length " + audio.getFrameLength());
			AudioSystem.write(audio, audioFileFormat.getType(), os);
			os.flush();
			os.close();
		}
	}

	/*
	 * public void writeTo(HttpResponse response) throws TransformerConfigurationException, FileNotFoundException,
	 * TransformerException, IOException, Exception { if (type.isUtterances()) throw new
	 * IOException("Cannot write out utterance-based data type!");
	 * 
	 * if (type.isXMLType()) { if (writer == null) writer = new MaryNormalisedWriter(); if
	 * (logger.getEffectiveLevel().equals(Level.DEBUG)) { ByteArrayOutputStream debugOut = new ByteArrayOutputStream();
	 * writer.output(xmlDocument, debugOut); logger.debug(debugOut.toString()); }
	 * 
	 * //writer.output(xmlDocument, new BufferedOutputStream(os));
	 * 
	 * ByteArrayOutputStream os = new ByteArrayOutputStream(); writer.output(xmlDocument, new BufferedOutputStream(os));
	 * NByteArrayEntity body = new NByteArrayEntity(os.toByteArray()); body.setContentType("text/html; charset=UTF-8");
	 * response.setEntity(body); } else if (type.isTextType()) // caution: XML types are text types! { //writeTo(new
	 * OutputStreamWriter(os, "UTF-8"));
	 * 
	 * ByteArrayOutputStream os = new ByteArrayOutputStream(); writeTo(new OutputStreamWriter(os, "UTF-8")); NByteArrayEntity body
	 * = new NByteArrayEntity(os.toByteArray()); body.setContentType("text/html; charset=UTF-8"); response.setEntity(body); } else
	 * // audio { logger.debug("Writing audio output, frame length "+audio.getFrameLength()); //AudioSystem.write(audio,
	 * audioFileFormat.getType(), os); //os.flush();
	 * 
	 * ByteArrayOutputStream os = new ByteArrayOutputStream(); AudioSystem.write(audio, audioFileFormat.getType(), os);
	 * os.flush();
	 * 
	 * MaryHttpServerUtils.toHttpResponse(os.toByteArray(), response); } }
	 */

	/**
	 * Write our internal representation to writer w, in the appropriate way as determined by our type.
	 * Only XML and Text data can be written to a writer, audio data cannot. "Helpers" needed to read the data, such as XML parser
	 * objects, are created when they are needed.
	 * 
	 * @param w
	 *            w
	 * @throws TransformerConfigurationException
	 *             TransformerConfigurationException
	 * @throws FileNotFoundException
	 *             FileNotFoundException
	 * @throws TransformerException
	 *             TransformerException
	 * @throws IOException
	 *             IOException
	 * @throws Exception
	 *             Exception
	 */
	public void writeTo(Writer w) throws TransformerConfigurationException, FileNotFoundException, TransformerException,
			IOException, Exception {
		if (type.isXMLType()) {
			throw new IOException("Better write XML data to an OutputStream, not to a Writer");
		} else if (type.isTextType()) { // caution: XML types are text types!
			w.write(plainText);
			w.flush();
			logger.debug("Writing Text output:\n" + plainText);
		} else { // audio - cannot write this to a writer
			throw new Exception("Illegal attempt to write audio data to a character Writer");
		}
	}

	public Object getData() {
		if (type.isXMLType()) {
			return xmlDocument;
		} else if (type.isTextType()) {
			return plainText;
		} else { // audio
			return audio;
		}
	}

	public String getPlainText() {
		return plainText;
	}

	public void setPlainText(String plainText) {
		this.plainText = plainText;
	}

	public Document getDocument() {
		return xmlDocument;
	}

	public void setDocument(Document xmlDocument) {
		this.xmlDocument = xmlDocument;
	}

	public AudioInputStream getAudio() {
		return audio;
	}

	/**
	 * Set the audio data. This will discard any previously set audio data. If audio data is to be appended, consider
	 * appendAudio().
	 * 
	 * @param audio
	 *            audio
	 */
	public void setAudio(AudioInputStream audio) {
		this.audio = audio;
	}

	public void setDefaultVoice(Voice voice) {
		if (voice == null) {
			return;
		}
		// check that voice locale fits before accepting the voice:
		Locale voiceLocale = null;
		voiceLocale = voice.getLocale();
		Locale docLocale = getLocale();
		if (docLocale == null && getType().isXMLType() && getDocument() != null
				&& getDocument().getDocumentElement().hasAttribute("xml:lang")) {
			docLocale = MaryUtils.string2locale(getDocument().getDocumentElement().getAttribute("xml:lang"));
		}
		if (docLocale != null && voiceLocale != null
				&& !(MaryUtils.subsumes(docLocale, voiceLocale) || MaryUtils.subsumes(voiceLocale, docLocale))) {
			logger.warn("Voice `" + voice.getName() + "' does not match document locale `" + docLocale + "' -- ignoring!");
		}
		this.defaultVoice = voice;
	}

	public Voice getDefaultVoice() {
		return defaultVoice;
	}

	public void setDefaultStyle(String style) {
		defaultStyle = style;
	}

	public String getDefaultStyle() {
		return defaultStyle;
	}

	public void setDefaultEffects(String effects) {
		defaultEffects = effects;
	}

	public String getDefaultEffects() {
		return defaultEffects;
	}

	/**
	 * The audio file format is required only for data types serving as input to modules producing AUDIO data (e.g., MBROLA data),
	 * as well as for the AUDIO data itself. It should be set by the calling code before passing the data to the module producing
	 * AUDIO data.
	 * 
	 * @param audioFileFormat
	 *            audioFileFormat
	 */

	public void setAudioFileFormat(AudioFileFormat audioFileFormat) {
		this.audioFileFormat = audioFileFormat;
	}

	public AudioFileFormat getAudioFileFormat() {
		return audioFileFormat;
	}

	public void append(MaryData md) {
		if (md == null)
			throw new NullPointerException("Received null marydata");
		if (!md.getType().equals(this.getType()))
			throw new IllegalArgumentException("Cannot append mary data of type `" + md.getType().name()
					+ "' to mary data of type `" + this.getType().name() + "'");
		if (getType().isXMLType()) {
			NodeList kids = md.getDocument().getDocumentElement().getChildNodes();
			logger.debug("Appending " + kids.getLength() + " nodes to MaryXML structure");
			Element docEl = this.getDocument().getDocumentElement();
			for (int i = 0; i < kids.getLength(); i++) {
				docEl.appendChild(this.getDocument().importNode(kids.item(i), true));
			}
		} else if (getType().isTextType()) {
			// Attention: XML type is a text type!
			if (this.plainText == null) {
				this.plainText = md.getPlainText();
			} else {
				this.plainText = this.plainText + "\n\n" + md.getPlainText();
			}
		} else if (getType().equals(MaryDataType.get("AUDIO"))) {
			appendAudio(md.getAudio());
		} else {
			throw new UnsupportedOperationException("Cannot append two mary data items of type `" + getType() + "'");
		}
	}

	/**
	 * For audio data, append more audio data to the one currently present. If no audio data is set yet, this call is equivalent
	 * to setAudio().
	 * 
	 * @param audioToAppend
	 *            the new audio data to append
	 */
	public void appendAudio(AudioInputStream audioToAppend) {
		if (this.audio == null)
			setAudio(audioToAppend);
		else if (this.audio instanceof AppendableSequenceAudioInputStream)
			((AppendableSequenceAudioInputStream) this.audio).append(audioToAppend);
		else
			this.audio = new SequenceAudioInputStream(this.audio.getFormat(), Arrays.asList(new AudioInputStream[] { this.audio,
					audioToAppend }));
	}

	public void setOutputParams(String params) {
		this.outputParams = params;
	}

	public String getOutputParams() {
		return outputParams;
	}

	public String toString() {
		return Objects.toStringHelper(this).add("type", getType()).add("locale", getLocale())
				.add("output parameters", getOutputParams()).add("data", getData())
				.add("document", DomUtils.serializeToString(getDocument())).add("validating", getValidating())
				.add("plain text", getPlainText()).add("audio", getAudio()).add("audio file format", getAudioFileFormat())
				.toString();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy