
io.tokra.audio.tts.Synthesis Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tts-slovak Show documentation
Show all versions of tts-slovak Show documentation
Slovak Text-to-Speech Synthesis
The newest version!
package io.tokra.audio.tts;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.tokra.audio.wav.WavInfo;
import io.tokra.audio.wav.WavProcessing;
public abstract class Synthesis {
private static final Logger logger = LoggerFactory.getLogger(Synthesis.class);
/** all info about audio database */
private WavInfo audio;
/** all indexes for audio database */
private Map> phonemes;
private boolean isInitialised;
private static final String THREAD_NAME_READ_AUDIO_DB = "Read Audio Database";
private static final String THREAD_NAME_READ_PHONEMES = "Read Phonemes";
public Synthesis(){
StopWatch sw = new StopWatch();
sw.start();
logger.debug("Initialization... started !");
init();
sw.stop();
logger.debug("Initialization... finished ! ... Runtime: '{}' ms", sw.getTime());
}
/**
* Initializer
*
* @author Tomas Kramaric
*/
protected void init() {
try {
final File audiofile = getAudioDbFile() != null ? getAudioDbFile().toFile() : null;
final File indexFile = getAudioIndexFile() != null ? getAudioIndexFile().toFile() : null;
if (audiofile == null && indexFile == null) {
throw new InterruptedException("Some of data file not found !");
}
loadData(audiofile, indexFile);
isInitialised = true;
} catch (InterruptedException e) {
logger.error("{} : {}", e.getClass().getName(), ExceptionUtils.getStackTrace(e));
isInitialised = false;
}
}
protected void loadData(final File... files) throws InterruptedException {
final CountDownLatch latch = new CountDownLatch(2);
Thread readWav = new Thread(new Runnable() {
@Override
public void run() {
try {
audio = WavProcessing.readWavFileSamples(files[0]);
latch.countDown();
} catch (IOException e) {
logger.error("IOException", e);
} catch (URISyntaxException e) {
logger.error("URISyntaxException", e);
}
}
}, THREAD_NAME_READ_AUDIO_DB);
Thread readPhonemes = new Thread(new Runnable() {
@Override
public void run() {
try {
phonemes = readPhonemes(files[1]);
latch.countDown();
} catch (IOException e) {
logger.error("IOException", e);
}
}
}, THREAD_NAME_READ_PHONEMES);
readWav.start();
readPhonemes.start();
latch.await(); /* lock until init is not done */
}
/**
* @author Tomas Kramaric
* @param text input text to be synthesized
* @return {@link InputStream} wav representation
*/
public InputStream tts(String text) {
if (audio != null && phonemes != null) {
List foldedVoiceSamples = convertTextToVoiceSamples(text);
InputStream wasIS = WavProcessing.getWavInputStreamFromAudioSamples(foldedVoiceSamples);
return wasIS;
}
return null;
}
/**
* Refactored from 'convertTextToVoiceSamples()'
* @param sampedText input text in form of sampa
* @param audiodbSamples database of audio samples
* @param audiodbIndexes database of audio samples indexes
* @return voice samples in form of {@link List} of short's
*/
protected List getSamplesFromSampedText(Vector sampedText, short[] audiodbSamples, Map> audiodbIndexes) {
List synthSamples = new ArrayList();
for (int i = 0; i < sampedText.size() - 1; i++) {
String key1 = sampedText.elementAt(i);
String key2 = sampedText.elementAt(i + 1);
if (audiodbIndexes.containsKey(key1 + key2)) {
Vector pom3 = audiodbIndexes.get(key1 + key2); //hodnoty ktore prisluchaju dvojici sampa znakov v hashtable
Vector pom4 = audiodbIndexes.get(key1);
int offset = pom4.elementAt(0).intValue() - 1;
int start = pom3.elementAt(0).intValue() + offset - 1; // nacita prvu a druhu hodnotu Object(value)
int end = pom3.elementAt(1).intValue() + offset - 1; // odrata jedna lebo sample zacinaju od 1 nie od 0
if (start < 0) {
start = 0;
}
if (end > audiodbSamples.length) {
end = audiodbSamples.length - 1;
}
for (int j = start; j <= end; j++) {
synthSamples.add(audiodbSamples[j]);
}
} else {
logger.warn("Skipping: '{}' , '{}'", key1, key2);
}
}
return synthSamples;
}
/**
* @param indexFile file of db indexes
* @return db indexes as map
* @throws IOException exception
*/
protected Map> readPhonemes(File indexFile) throws IOException {
StopWatch sw = new StopWatch();
sw.start();
Map> audioMappings = new ConcurrentHashMap>();
logger.debug("Reading indexes for samples");
List lines = FileUtils.readLines(indexFile, "utf-8");
for (String line : lines) {
String[] mappingValues = line.split("\\t");
int start = 0;
int end = 0;
int middle = 0;
int offset = 0;
Vector mapping = new Vector(); // TODO migrate to List
switch (mappingValues.length) {
case 4: { /* difon */
start = Integer.valueOf(mappingValues[2]).intValue();
end = Integer.valueOf(mappingValues[3]).intValue();
mapping.addElement(new Integer(start));
mapping.addElement(new Integer(end));
audioMappings.put(mappingValues[0] + mappingValues[1], mapping);
break;
}
case 6: {
offset = Integer.valueOf(mappingValues[2]).intValue();
start = Integer.valueOf(mappingValues[3]).intValue();
end = Integer.valueOf(mappingValues[4]).intValue();
middle = Integer.valueOf(mappingValues[5]).intValue();
mapping.addElement(new Integer(offset));
mapping.addElement(new Integer(start));
mapping.addElement(new Integer(end));
mapping.addElement(new Integer(middle));
audioMappings.put(mappingValues[0], mapping);
break;
}
default: {
// skip anything else
}
}
}
sw.stop();
logger.debug("Reading indexes for samples...Runtime: '{}' ms", sw.getTime());
return audioMappings;
}
/*****************\
|* Abstract *|
\*****************/
/**
* @return audio db file path as string
*/
public abstract String getAudioDbFilePath();
/**
* @return audio db indexes file path as string
*/
public abstract String getAudioIndexFilePath();
/**
* @return audio db file {@link Path}
*/
public abstract Path getAudioDbFile();
/**
* @return audio db indexes file {@link Path}
*/
public abstract Path getAudioIndexFile();
/**
* @param text to be synthesized
* @return voice samples as {@link List} of short's
*/
public abstract List convertTextToVoiceSamples(String text);
/*****************\
|*Getters/Setters*|
\*****************/
/**
* @return isInitialised
*/
public boolean isInitialised() {
return isInitialised;
}
/**
* @return decoded samples as short array
*/
public short[] getDecodedSamples() {
return audio.getDecodedSamples();
}
public Map> getPhonemes() {
return phonemes;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy