All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.pipeline.tts.VoiceManager Maven / Gradle / Ivy

The newest version!
package org.daisy.pipeline.tts;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;

import com.google.common.collect.Iterables;

import cz.vutbr.web.css.Term;

import org.daisy.pipeline.css.speech.VoiceFamilyList;
import org.daisy.pipeline.css.speech.VoiceFamilyList.VoiceFamily;
import org.daisy.pipeline.tts.TTSService.SynthesisException;
import org.daisy.pipeline.tts.VoiceInfo.Gender;
import org.daisy.pipeline.tts.VoiceInfo.LanguageRange;
import static org.daisy.pipeline.tts.VoiceInfo.NO_DEFINITE_GENDER;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class VoiceManager {

	private Logger ServerLogger = LoggerFactory.getLogger(VoiceManager.class);

	private final Set engineNames;
	/**
	 * Map of the best services for each available voice, given that two different services can serve the same voice
	 */
	private final Map bestEngines;
	/**
	 * Map from voice name/engine to voice
	 */
	private final Map primaryVoices;
	/**
	 * Map from voice (name/engine) to a collection of fallback voices (sorted by matching engine,
	 * matching gender and priority).
	 */
	private final Map> secondaryVoices;
	/**
	 * Map from voice properties (language/gender/engine) to voices. Each combination of primary
	 * language, gender and engine is mapped to a collection of voices with corresponding language
	 * range, sorted by priority.
	 */
	private final Map> voiceIndex;

	/**
	 * Map from voice IDs to voices.
	 */
	private final Map voiceForID;

	public VoiceManager(Collection engines, Collection voiceInfoFromConfig) {
		engineNames = new HashSet<>();
		for (TTSEngine e : engines)
			engineNames.add(e.getProvider().getName().toLowerCase());

		// create a map of the best services for each available voice
		bestEngines = new LinkedHashMap(); // LinkedHashMap: iteration order = insertion order
		voiceForID = new HashMap<>(); {
			TTSTimeout timeout = new TTSTimeout();
			int timeoutSecs = 30;
			// sort engines by engine priority, so that voice info from engines (see below) is sorted by engine priority
			// as well
			List sortedEngines = new ArrayList<>(engines);
			Collections.sort(sortedEngines, Comparator.comparingInt(TTSEngine::getOverallPriority).reversed());
			for (TTSEngine tts : sortedEngines) {
				timeout.enableForCurrentThread(timeoutSecs);
				try {
					Collection voices = tts.getAvailableVoices();
					if (voices != null)
						for (Voice v : voices) {
							if (!bestEngines.containsKey(v))
								bestEngines.put(v, tts);
							if (!voiceForID.containsKey(v.getID()))
								voiceForID.put(v.getID(), v);
						}
				} catch (SynthesisException e) {
					ServerLogger.error("error while retrieving the voices of "
					                   + tts.getProvider().getName());
					ServerLogger.debug(tts.getProvider().getName()
					                   + " getAvailableVoices error: " + getStack(e));
				} catch (InterruptedException e) {
					ServerLogger.error("timeout while retrieving the voices of "
					                   + tts.getProvider().getName()
					                   + " (exceeded " + timeoutSecs + " seconds)");
				} finally {
					timeout.disable();
				}
			}
			timeout.close();
		}

		// get mappings from voice properties (language/gender) to voice (name/engine)
		List voiceInfo = new ArrayList<>();

		// get info from engines (lowest priority)
		primaryVoices = new HashMap<>();
		for (Voice v : bestEngines.keySet()) {
			primaryVoices.put(new VoiceKey(v.getEngine(), v.getName()), v);
			if (!v.getLocale().isEmpty() && v.getGender().isPresent()) {
				Gender g = v.getGender().get();
				for (LanguageRange l : v.getLocale())
					voiceInfo.add(new VoiceInfo(v.getEngine(), v.getName(), l, g, 0));
			}
		}

		// voices also match less specific locales (without region tag)
		final float priorityVariantPenalty = 0.1f;
		List derivedVoiceInfo = new ArrayList<>(); {
			for (VoiceInfo vi : voiceInfo) {
				Locale shortLang = vi.language.getPrimaryLanguageSubTag();
				if (shortLang != null && !shortLang.toString().equals(vi.language.toString()))
					derivedVoiceInfo.add(
						new VoiceInfo(vi.voiceEngine, vi.voiceName, shortLang, vi.gender,
						              vi.priority - priorityVariantPenalty));
			}
		}
		voiceInfo.addAll(derivedVoiceInfo);

		// get info from configuration
		// the configuration is interpreted as-is. region and other subtags are significant
		// due to fuzzy voice selection, a voice can be applied to less specific locales, but it is
		// not an exact match
		voiceInfo.addAll(voiceInfoFromConfig);

		// sort by priority and language range specificity (descending)
		// voices with equal priority and specificity will not be reordered as a result of the sort
		Collections.sort(voiceInfo, sortByPriority.thenComparing(sortBySpecificity));

		// filter available voices
		List availableVoiceInfo = new ArrayList<>(); {
			for (VoiceInfo i : voiceInfo)
				if (primaryVoices.containsKey(new VoiceKey(i.voiceEngine, i.voiceName)))
					availableVoiceInfo.add(i);
		}

		// create map of the best fallback for each voice
		// engine is more important than gender and priority, gender is more important than priority
		secondaryVoices = new HashMap<>(); {
			for (boolean sameEngine : new boolean[]{true, false})
				for (boolean sameGender : new boolean[]{true, false})
					for (VoiceInfo best : voiceInfo) {
						VoiceKey bestKey = new VoiceKey(best.voiceEngine, best.voiceName);
						for (VoiceInfo fallback : availableVoiceInfo) {
							VoiceKey fallbackKey = new VoiceKey(fallback.voiceEngine, fallback.voiceName);
							if (!fallbackKey.equals(bestKey))
								if (best.language.getPrimaryLanguageSubTag() == null) {
									if (!sameGender && !sameEngine) {
										// multilang fallback voices are only considered when gender
										// and engine are not criteria so as to prevent the algo
										// from choosing a multilang voice with the same engine over
										// a regular voice with a different engine.
										Collection vv = secondaryVoices.get(bestKey);
										if (vv == null) {
											vv = new LinkedHashSet<>();
											secondaryVoices.put(bestKey, vv);
										}
										vv.add(primaryVoices.get(fallbackKey));
									}
								} else if (fallback.language.equals(best.language)
								           && (!sameGender
								               // fallback voices with unknown gender are only considered
								               // when gender is not a criteria
								               || (fallback.gender != NO_DEFINITE_GENDER
								                   && fallback.gender == best.gender))
								           && (!sameEngine
								               || fallback.voiceEngine.equals(best.voiceEngine))) {
									Collection vv = secondaryVoices.get(bestKey);
									if (vv == null) {
										vv = new LinkedHashSet<>();
										secondaryVoices.put(bestKey, vv);
									}
									vv.add(primaryVoices.get(fallbackKey));
								}
						}
					}
		}

		// create index of voices with language, engine and gender as keys
		Set allLangs = new HashSet(); {
			for (VoiceInfo vi : availableVoiceInfo) {
				Locale shortLang = vi.language.getPrimaryLanguageSubTag();
				if (shortLang != null)
					allLangs.add(shortLang); }}
		Set allGenders = new HashSet(); {
			for (VoiceInfo vi : availableVoiceInfo) allGenders.add(vi.gender);
			allGenders.remove(NO_DEFINITE_GENDER); }
		voiceIndex = new HashMap<>(); {
			for (VoiceInfo vi : availableVoiceInfo) {
				if (vi.language.getPrimaryLanguageSubTag() == null)
					// this is to make sure that a multi-lang voice wins from a regular voice if it has
					// a higher priority
					for (Locale shortLang : allLangs)
						for (boolean sameEngine : new boolean[]{true, false}) {
							if (vi.gender == NO_DEFINITE_GENDER)
								// this is to make sure that a voice with unknown gender wins from a regular voice
								// if it has a higher priority
								for (Gender g : allGenders) {
									VoiceKey k = new VoiceKey(shortLang, g, sameEngine ? vi.voiceEngine : null);
									Collection vv = voiceIndex.get(k);
									if (vv == null) {
										vv = new ArrayList<>();
										voiceIndex.put(k, vv);
									}
									Voice v = primaryVoices.get(new VoiceKey(vi.voiceEngine, vi.voiceName));
									vv.add(new LanguageRangeVoiceTuple(vi.language, v));
								}
							for (boolean sameGender : new boolean[]{true, false}) {
								VoiceKey k = new VoiceKey(shortLang,
								                          sameGender ? vi.gender : null,
								                          sameEngine ? vi.voiceEngine : null);
								Collection vv = voiceIndex.get(k);
								if (vv == null) {
									vv = new ArrayList<>();
									voiceIndex.put(k, vv);
								}
								Voice v = primaryVoices.get(new VoiceKey(vi.voiceEngine, vi.voiceName));
								vv.add(new LanguageRangeVoiceTuple(vi.language, v));
							}
						}
				for (boolean sameLanguage : new boolean[]{true, false}) {
					Locale shortLang = vi.language.getPrimaryLanguageSubTag();
					if (shortLang == null) shortLang = VoiceKey.MUL;
					for (boolean sameEngine : new boolean[]{true, false}) {
						if (vi.gender == NO_DEFINITE_GENDER)
							for (Gender g : allGenders) {
								VoiceKey k = new VoiceKey(sameLanguage ? shortLang : null,
								                          g,
								                          sameEngine ? vi.voiceEngine : null);
								Collection vv = voiceIndex.get(k);
								if (vv == null) {
									vv = new ArrayList<>();
									voiceIndex.put(k, vv);
								}
								Voice v = primaryVoices.get(new VoiceKey(vi.voiceEngine, vi.voiceName));
								vv.add(new LanguageRangeVoiceTuple(vi.language, v));
							}
						for (boolean sameGender : new boolean[]{true, false}) {
							VoiceKey k = new VoiceKey(sameLanguage ? shortLang : null,
							                          sameGender ? vi.gender : null,
							                          sameEngine ? vi.voiceEngine : null);
							Collection vv = voiceIndex.get(k);
							if (vv == null) {
								vv = new ArrayList<>();
								voiceIndex.put(k, vv);
							}
							Voice v = primaryVoices.get(new VoiceKey(vi.voiceEngine, vi.voiceName));
							vv.add(new LanguageRangeVoiceTuple(vi.language, v));
						}
					}
				}
			}
		}

		// log
		StringBuilder sb = new StringBuilder("Available voices:");
		for (Entry e : bestEngines.entrySet())
			sb.append("\n * ")
			  .append(e.getKey());
		ServerLogger.debug(sb.toString());
		sb = new StringBuilder("Voice selection data:");
		Collection sortedAvailableVoiceInfo = new TreeSet(
			new Comparator() {
				public int compare(VoiceInfo vi1, VoiceInfo vi2) {
					// first group by locale
					// multi-lang voices last
					if (vi1.language.getPrimaryLanguageSubTag() == null && vi2.language.getPrimaryLanguageSubTag() != null)
						return 1;
					else if (vi1.language.getPrimaryLanguageSubTag() != null && vi2.language.getPrimaryLanguageSubTag() == null)
						return -1;
					int compare = vi1.language.toString().compareTo(vi2.language.toString());
					if (compare != 0) return compare;
					// then group by gender
					compare = vi1.gender.compareTo(vi2.gender);
					if (compare != 0) return compare;
					// remove duplicates
					// (Note that the duplicate with the highest priority is kept because availableVoiceInfo
					// is sorted by descending priority and because of the way TreeSet works)
					if (vi1.voiceEngine.equals(vi2.voiceEngine) && vi1.voiceName.equals(vi2.voiceName))
						return 0;
					// highest priority first
					compare = Float.valueOf(vi2.priority).compareTo(Float.valueOf(vi1.priority));
					if (compare != 0)
						return compare;
					compare = vi2.voiceEngine.compareTo(vi1.voiceEngine);
					if (compare != 0)
						return compare;
					return vi2.voiceName.compareTo(vi1.voiceName);
				}
			}
		);
		sortedAvailableVoiceInfo.addAll(availableVoiceInfo);
		for (VoiceInfo vi : sortedAvailableVoiceInfo)
			sb.append("\n * {")
			  .append("locale:").append(vi.language)
			  .append(", gender:").append(vi.gender)
			  .append("}")
			  .append(" -> ")
			  .append(primaryVoices.get(new VoiceKey(vi.voiceEngine, vi.voiceName)));
		ServerLogger.debug(sb.toString());
	}

	/**
	 * @param voice is an available voice.
	 * @return the best TTS Engine for @param voice. It can return an engine
	 *         whose OSGi service is no longer enable.
	 */
	public TTSEngine getTTS(Voice voice) {
		return bestEngines.get(voice);
	}

	/**
	 * @return {@code null} if there is no voice with the given ID.
	 */
	public Voice getVoiceForID(String id) {
		return voiceForID.get(id);
	}

	/**
	 * @param voiceEngine or {@code null} if unknown
	 * @param voiceName   or {@code null} if unknown
	 * @param lang        or {@code null} if unknown
	 * @param gender      or {@code null} if unknown
	 *
	 * @return {@code null} if no voice is available for the given parameters.
	 */
	public Voice findAvailableVoice(String voiceEngine, String voiceName, Locale lang, Gender gender) {
		return Iterables.getFirst(findAvailableVoices(voiceEngine, voiceName, lang, gender), null);
	}

	public Iterable findAvailableVoices(Locale lang, VoiceFamilyList voiceFamily) {
		if (voiceFamily == null)
			return findAvailableVoices(null, null, lang, null);
		Collection voices = new LinkedHashSet<>();
		Collection nonExactMatches = new LinkedHashSet<>();
		for (Term term : voiceFamily) {
			VoiceFamily f = (VoiceFamily)term;
			String engineName = null;
			String voiceName = null;
			Gender gender = null;
			if (f.getFamilyName().isPresent()) {
				String name = f.getFamilyName().get();
				if (engineNames.contains(name.toLowerCase()))
					engineName = name;
				else
					voiceName = name;
			}
			if (f.getGender().isPresent())
				gender = Gender.of(f.getGender().get(), f.getAge());
			boolean exact = true;
			Integer variant = f.getVariant().orElse(null);
			int k = 0;
			for (Voice v : findAvailableVoices(engineName, voiceName, lang, gender)) {
				k++;
				if ((exact = exact && matches(v, engineName, voiceName, lang, gender))
				    && (variant == null || k == variant))
					voices.add(v);
				else
					nonExactMatches.add(v);
			}
		}
		voices.addAll(nonExactMatches);
		return voices;
	}

	private final Map> cache = new HashMap<>();

	public Iterable findAvailableVoices(String voiceEngine, String voiceName, Locale lang, Gender gender) {
		VoiceKey cacheKey = new VoiceKey(voiceEngine, voiceName, lang, gender);
		Iterable fromCache = cache.get(cacheKey);
		if (fromCache != null)
			return fromCache;
		Collection voices = new LinkedHashSet<>();
		if (voiceEngine != null && !voiceEngine.isEmpty() && voiceName != null && !voiceName.isEmpty()) {
			VoiceKey preferred = new VoiceKey(voiceEngine, voiceName); // not necessarily equal to cacheKey
			                                                           // because of case normalization
			Voice primary = primaryVoices.get(preferred);
			if (primary != null)
				voices.add(primary);
			Collection fallback = secondaryVoices.get(preferred);
			if (fallback != null)
				voices.addAll(fallback);
		}
		if (lang != null || gender != null || voiceName == null) {
			Locale shortLang = lang != null ? new Locale(lang.getLanguage()) : lang;

			// engine is more important than gender, region and priority, gender is more important than region
			// and priority, region is more important than priority
			addExactMatches(voices, lang, shortLang, gender, voiceEngine);
			if (gender != null && gender != NO_DEFINITE_GENDER)
				addExactMatches(voices, lang, shortLang, NO_DEFINITE_GENDER, voiceEngine);
			if (lang != null) {
				if (!lang.equals(shortLang)) {
					addExactMatches(voices, shortLang, shortLang, gender, voiceEngine);
					if (gender != null && gender != NO_DEFINITE_GENDER)
						addExactMatches(voices, shortLang, shortLang, NO_DEFINITE_GENDER, voiceEngine);
				}
				if (!lang.equals(VoiceKey.MUL)) {
					addExactMatches(voices, VoiceKey.MUL, VoiceKey.MUL, gender, voiceEngine);
					if (gender != null && gender != NO_DEFINITE_GENDER)
						addExactMatches(voices, VoiceKey.MUL, VoiceKey.MUL, NO_DEFINITE_GENDER, voiceEngine);
				}
			}
			if (gender != null) {
				addExactMatches(voices, lang, shortLang, null, voiceEngine);
				if (lang != null) {
					if (!lang.equals(shortLang))
						addExactMatches(voices, shortLang, shortLang, null, voiceEngine);
					if (!lang.equals(VoiceKey.MUL))
						addExactMatches(voices, VoiceKey.MUL, VoiceKey.MUL, null, voiceEngine);
				}
			}
			if (voiceEngine != null) {
				addExactMatches(voices, lang, shortLang, gender, null);
				if (gender != null && gender != NO_DEFINITE_GENDER)
					addExactMatches(voices, lang, shortLang, NO_DEFINITE_GENDER, null);
				if (lang != null) {
					if (!lang.equals(shortLang)) {
						addExactMatches(voices, shortLang, shortLang, gender, null);
						if (gender != null && gender != NO_DEFINITE_GENDER)
							addExactMatches(voices, shortLang, shortLang, NO_DEFINITE_GENDER, null);
					}
					if (!lang.equals(VoiceKey.MUL)) {
						addExactMatches(voices, VoiceKey.MUL, VoiceKey.MUL, gender, null);
						if (gender != null && gender != NO_DEFINITE_GENDER)
							addExactMatches(voices, VoiceKey.MUL, VoiceKey.MUL, NO_DEFINITE_GENDER, null);
					}
				}
			}
			if (gender != null && voiceEngine != null) {
				addExactMatches(voices, lang, shortLang, null, null);
				if (lang != null) {
					if (!lang.equals(shortLang))
						addExactMatches(voices, shortLang, shortLang, null, null);
					if (!lang.equals(VoiceKey.MUL))
						addExactMatches(voices, VoiceKey.MUL, VoiceKey.MUL, null, null);
				}
			}
			if (voiceName != null) {
				// reorder so that voices with requested name come first
				// the relative order of two voices does not change if they have the same name, or
				// if they both have a name that does not equal the requested name
				voices = new ArrayList<>(voices);
				Collections.sort((List)voices,
				                 Comparator.comparing(x -> !x.getName().equals(voiceName)));
			}
		}
		cache.put(cacheKey, voices);
		return voices;
	}

	private void addExactMatches(Collection collect, Locale lang, Locale shortLang, Gender gender, String voiceEngine) {
		Collection vv = voiceIndex.get(new VoiceKey(shortLang, gender, voiceEngine));
		if (vv != null)
			for (LanguageRangeVoiceTuple v : vv)
				if (lang == null || v.language.matches(lang))
					collect.add(v.voice);
	}

	/**
	 * Whether the given voice matches exactly the given voice engine, voice name, language and/or gender.
	 */
	public boolean matches(Voice voice, String voiceEngine, String voiceName, Locale lang, Gender gender) {
		if (voiceEngine != null && !voiceEngine.equalsIgnoreCase(voice.getEngine()))
			return false;
		if (voiceName != null && !voiceName.equalsIgnoreCase(voice.getName()))
			return false;
		if (lang == null && gender == null)
			return true;
		for (VoiceKey k : voiceIndex.keySet()) {
			if (k.lang != null && k.gender != null && k.engine != null)
				if (gender == null || k.gender == NO_DEFINITE_GENDER || k.gender.equals(gender))
					for (LanguageRangeVoiceTuple v : voiceIndex.get(k))
						if (v.voice.equals(voice))
							if (lang == null || v.language.matches(lang))
								return true;
		}
		return false;
	}

	public boolean matches(Voice voice, Locale lang, VoiceFamilyList voiceFamily) {
		if (voiceFamily == null)
			return matches(voice, null, null, lang, null);
		for (Term term : voiceFamily)
			if (matches(voice, lang, (VoiceFamily)term))
				return true;
		return false;
	}

	private boolean matches(Voice voice, Locale lang, VoiceFamily voiceFamily) {
		if (voiceFamily == null)
			return matches(voice, null, null, lang, null);
		if (voiceFamily.getFamilyName().isPresent()) {
			String name = voiceFamily.getFamilyName().get();
			if (engineNames.contains(name.toLowerCase()))
				return matches(voice, name, null, lang, null);
			else
				return matches(voice, null, name, lang, null);
		} else
			return matches(voice, null, null, lang, Gender.of(voiceFamily.getGender().get()));
	}

	private static String getStack(Throwable t) {
		StringWriter writer = new StringWriter();
		PrintWriter printWriter = new PrintWriter(writer);
		t.printStackTrace(printWriter);
		printWriter.flush();
		return writer.toString();
	}

	// sort VoiceInfo by priority (descending)
	private static final Comparator sortByPriority = new Comparator() {
			public int compare(VoiceInfo vi1, VoiceInfo vi2) {
				return Float.valueOf(vi2.priority).compareTo(Float.valueOf(vi1.priority));
			}
		};

	// sort VoiceInfo by specificity of language range (descending)
	private static final Comparator sortBySpecificity
		= Comparator.comparingInt(vi -> vi.language.getSpecificity()).reversed();

	private static class VoiceKey {

		private static final Locale MUL = new Locale("mul");

		private final String engine;
		private final String name;
		/**
		 * When used in {@code voiceIndex}, this is the primary language subtag or "mul". When used
		 * in {@code cache}, this is the {@code lang} argument passed to the {@link
		 * #findAvailableVoices} method.
		 */
		private final Locale lang;
		private final Gender gender;

		/**
		 * For use in {@code voiceIndex}
		 */
		public VoiceKey(String engine, String name) {
			this.engine = engine == null ? null : engine.toLowerCase();
			this.name = name == null ? null : name.toLowerCase();
			this.lang = null;
			this.gender = null;
		}

		/**
		 * For use in {@code voiceIndex}
		 *
		 * @param primaryLang The primary language subtag or "mul"
		 */
		public VoiceKey(Locale primaryLang, Gender gender, String engine) {
			this.lang = primaryLang;
			this.gender = gender;
			this.engine = engine == null ? null : engine.toLowerCase();
			this.name = null;
		}

		/**
		 * For use in {@code cache}
		 */
		public VoiceKey(String engine, String name, Locale lang, Gender gender) {
			this.engine = engine;
			this.name = name;
			this.lang = lang;
			this.gender = gender;
		}

		@Override
		public int hashCode() {
			int res = 0;
			if (this.lang != null)
				res ^=  this.lang.hashCode();
			if (this.gender != null)
				res ^= this.gender.hashCode();
			if (this.engine != null)
				res ^= this.engine.hashCode();
			if (this.name != null)
				res ^= this.name.hashCode();
			return res;
		}

		@Override
		public boolean equals(Object other) {
			VoiceKey o = (VoiceKey) other;
			return (lang == o.lang || (lang != null && lang.equals(o.lang)))
			        && (gender == o.gender || (gender != null && gender.equals(o.gender)))
			        && (engine == o.engine || (engine != null && engine.equals(o.engine)))
			        && (name == o.name || (name != null && name.equals(o.name)));
		}

		@Override
		public String toString() {
			StringBuilder s = new StringBuilder();
			s.append("{");
			if (engine != null)
				s.append("engine: " + engine);
			if (name != null)
				s.append(", name: " + name);
			if (lang != null)
				s.append(", lang: " + lang);
			if (gender != null)
				s.append(", gender: " + gender);
			if (s.length() > 1 && s.charAt(1) == ',')
				s.delete(1, 3);
			s.append("}");
			return s.toString();
		}
	}

	private static class LanguageRangeVoiceTuple {

		private final LanguageRange language;
		private final Voice voice;

		public LanguageRangeVoiceTuple(LanguageRange language, Voice voice) {
			this.language = language;
			this.voice = voice;
		}

		@Override
		public String toString() {
			StringBuilder s = new StringBuilder();
			s.append(language).append(": ").append(voice);
			return s.toString();
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy