All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.pipeline.braille.common.UnityBrailleTranslator Maven / Gradle / Ivy

The newest version!
package org.daisy.pipeline.braille.common;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

import cz.vutbr.web.css.CSSProperty;
import cz.vutbr.web.css.TermString;

import org.daisy.dotify.api.table.BrailleConverter;

import org.daisy.braille.css.SimpleInlineStyle;
import org.daisy.braille.css.BrailleCSSProperty.BrailleCharset;
import org.daisy.braille.css.BrailleCSSProperty.Hyphens;
import org.daisy.braille.css.BrailleCSSProperty.TextTransform;
import org.daisy.braille.css.BrailleCSSProperty.WhiteSpace;
import org.daisy.pipeline.braille.common.AbstractBrailleTranslator.util.DefaultLineBreaker;
import static org.daisy.pipeline.braille.common.util.Strings.splitInclDelimiter;
import org.daisy.pipeline.braille.css.CSSStyledText;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * {@link org.daisy.pipeline.braille.common.BrailleTranslator} that assumes input text exists of
 * only braille and white space characters. Supports CSS properties "word-spacing", "hyphens",
 * "hyphenate-character", "white-space", and "braille-charset".
 */
public class UnityBrailleTranslator extends AbstractBrailleTranslator implements BrailleTranslator {

	private static final Pattern SPECIAL_CHARS = Pattern.compile("[\\x20\t\\n\\r\\u2800\\xA0\u00AD\u200B\u2028]+");

	private final BrailleConverter brailleCharset;
	private final boolean useBrailleCharsetForInput;

	/**
	 * @param brailleCharset            The character set of the output braille, and of the input in
	 *                                  case it is styled as braille-charset: custom or if
	 *                                  useBrailleCharsetForInput is true.
	 *                                  null means Unicode braille.
	 * @param useBrailleCharsetForInput Whether brailleCharset by default also applies to
	 *                                  the input (if it does not have a braille-charset
	 *                                  style).
	 */
	public UnityBrailleTranslator(BrailleConverter brailleCharset, boolean useBrailleCharsetForInput) {
		this.brailleCharset = brailleCharset;
		this.useBrailleCharsetForInput = useBrailleCharsetForInput;
	}

	public UnityBrailleTranslator _withHyphenator(Hyphenator hyphenator) {
		return this;
	}

	private FromStyledTextToBraille fromStyledTextToBraille = null;

	public FromStyledTextToBraille fromStyledTextToBraille() {
		if (fromStyledTextToBraille == null)
			fromStyledTextToBraille = new FromStyledTextToBraille() {
					public Iterable transform(Iterable input, int from, int to) {
						List braille = new ArrayList<>(); {
							int i = 0;
							for (CSSStyledText styledText : input) {
								if (i >= from && (to < 0 || i < to)) {
									SimpleInlineStyle style = styledText.getStyle();
									String text = styledText.getText();
									boolean unicodeBraille = brailleCharset == null || !useBrailleCharsetForInput;
									if (style != null) {
										CSSProperty val = style.getProperty("hyphens");
										if (val == Hyphens.MANUAL || val == Hyphens.NONE) {
											if (val == Hyphens.NONE)
												text = text.replaceAll("[\u00AD\u200B]","");
											style.removeProperty("hyphens"); }
										val = style.getProperty("white-space");
										if (val != null)
											style.removeProperty("white-space");
										val = style.getProperty("text-transform");
										if (val == TextTransform.NONE || val == TextTransform.AUTO)
											style.removeProperty("text-transform");
										val = style.getProperty("braille-charset");
										if (val != null) {
											if (val == BrailleCharset.CUSTOM)
												unicodeBraille = false;
											else if (val == BrailleCharset.UNICODE)
												unicodeBraille = true;
											style.removeProperty("braille-charset"); }
										for (String prop : style.getPropertyNames()) {
											logger.warn("'{}: {}' not supported in combination with 'text-transform: none'",
											            prop, style.get(prop));
											logger.debug("(text was: '" + text + "')"); }}
									if (unicodeBraille && brailleCharset != null) {
										StringBuilder b; {
											b = new StringBuilder();
											boolean special = false;
											for (String s : splitInclDelimiter(text, SPECIAL_CHARS)) {
												if (!s.isEmpty())
													b.append(special ? s : brailleCharset.toText(s));
												special = !special;
											}
										}
										braille.add(b.toString());
									} else
										braille.add(text);
								}
								i++;
							}
						}
						return braille;
					}
				};
		return fromStyledTextToBraille;
	}

	private LineBreakingFromStyledText lineBreakingFromStyledText = null;

	public LineBreakingFromStyledText lineBreakingFromStyledText() {
		if (lineBreakingFromStyledText == null) {
			Character blankChar = brailleCharset == null
				? '\u2800'
				: brailleCharset.toText("\u2800").toCharArray()[0];
			Character hyphenChar = brailleCharset == null
				? '\u2824'
				: brailleCharset.toText("\u2824").toCharArray()[0];
			lineBreakingFromStyledText = new DefaultLineBreaker(blankChar, hyphenChar, brailleCharset, logger) {
					protected BrailleStream translateAndHyphenate(java.lang.Iterable input, int from, int to) {
						List braille = new ArrayList<>(); {
							int i = 0;
							for (CSSStyledText styledText : input) {
								String text = styledText.getText();
								if (i >= from && (to < 0 || i < to)) {
									SimpleInlineStyle style = styledText.getStyle();
									boolean unicodeBraille = brailleCharset == null || !useBrailleCharsetForInput;
									if (style != null) {
										CSSProperty val = style.getProperty("hyphens");
										if (val == Hyphens.MANUAL || val == Hyphens.NONE) {
											if (val == Hyphens.NONE)
												text = text.replaceAll("[\u00AD\u200B]","");
											style.removeProperty("hyphens"); }
										val = style.getProperty("white-space");
										if (val != null) {
											if (val == WhiteSpace.PRE_WRAP)
												text = text.replaceAll("[\\x20\t\\u2800]+", "$0\u200B") // ZERO WIDTH SPACE
												           .replaceAll("[\\x20\t\\u2800]", "\u00A0"); // NO-BREAK SPACE
											if (val == WhiteSpace.PRE_WRAP || val == WhiteSpace.PRE_LINE)
												text = text.replaceAll("[\\n\\r]", "\u2028"); // LINE SEPARATOR
											style.removeProperty("white-space"); }
										val = style.getProperty("text-transform");
										if (val == TextTransform.NONE || val == TextTransform.AUTO)
											style.removeProperty("text-transform");
										val = style.getProperty("braille-charset");
										if (val != null) {
											if (val == BrailleCharset.CUSTOM)
												unicodeBraille = false;
											else if (val == BrailleCharset.UNICODE)
												unicodeBraille = true;
											style.removeProperty("braille-charset"); }
										for (String prop : style.getPropertyNames()) {
											logger.warn("'{}: {}' not supported in combination with 'text-transform: none'",
											            prop, style.get(prop));
											logger.debug("(text was: '" + text + "')"); }}
									if (unicodeBraille && brailleCharset != null) {
										StringBuilder b; {
											b = new StringBuilder();
											boolean special = false;
											for (String s : splitInclDelimiter(text, SPECIAL_CHARS)) {
												if (!s.isEmpty())
													b.append(special ? s : brailleCharset.toText(s));
												special = !special;
											}
										}
										text = b.toString();
									}
								} else {
									// not converting to braille character set because not part of final output and we're not even
									// sure that it is braille
									// FIXME: may not even be useful to pass it as context to DefaultLineBreaker.LineIterator
								}
								braille.add(text);
								i++;
							}
						}
						StringBuilder joined = new StringBuilder();
						int fromChar = 0;
						int toChar = to >= 0 ? 0 : -1;
						for (String s : braille) {
							joined.append(s);
							if (--from == 0)
								fromChar = joined.length();
							if (--to == 0)
								toChar = joined.length();
						}
						return new FullyHyphenatedAndTranslatedString(joined.toString(), fromChar, toChar, hyphenChar);
					}
				};
		}
		return lineBreakingFromStyledText;
	}

	private static final Logger logger = LoggerFactory.getLogger(UnityBrailleTranslator.class);
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy