All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.pipeline.braille.common.AbstractBrailleTranslator Maven / Gradle / Ivy

There is a newer version: 5.0.2
Show newest version
package org.daisy.pipeline.braille.common;

import static java.lang.Math.min;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.google.common.collect.Iterators.concat;
import static com.google.common.collect.Iterators.peekingIterator;
import com.google.common.collect.ImmutableList;
import static com.google.common.collect.Lists.charactersOf;
import com.google.common.collect.PeekingIterator;

import cz.vutbr.web.css.CSSProperty;
import cz.vutbr.web.css.TermInteger;
import cz.vutbr.web.css.TermString;

import org.daisy.braille.css.BrailleCSSProperty.HyphenateCharacter;
import org.daisy.braille.css.BrailleCSSProperty.Hyphens;
import org.daisy.braille.css.BrailleCSSProperty.WhiteSpace;
import org.daisy.braille.css.BrailleCSSProperty.WordSpacing;
import org.daisy.braille.css.SimpleInlineStyle;
import org.daisy.dotify.api.table.BrailleConverter;
import org.daisy.dotify.api.translator.UnsupportedMetricException;
import org.daisy.dotify.api.translator.BrailleTranslatorResult;
import static org.daisy.pipeline.braille.common.util.Strings.extractHyphens;
import static org.daisy.pipeline.braille.common.util.Tuple2;
import org.daisy.pipeline.braille.css.CSSStyledText;

import org.slf4j.Logger;

public abstract class AbstractBrailleTranslator extends AbstractTransform implements BrailleTranslator {
	
	private final BrailleConverter brailleCharset;

	protected AbstractBrailleTranslator() {
		this(null, null);
	}

	/**
	 * @param brailleCharset Used by default implementation of {@link #lineBreakingFromStyledText()}
	 *                       to encode hyphen character when specified through "hyphenate-character"
	 *                       property.
	 */
	protected AbstractBrailleTranslator(BrailleConverter brailleCharset) {
		this(null, brailleCharset);
	}

	protected AbstractBrailleTranslator(Hyphenator hyphenator, BrailleConverter brailleCharset) {
		this.brailleCharset = brailleCharset;
		withHyphenatorCache = new HashMap<>();
		withHyphenatorCache.put(hyphenator, this);
	}

	/**
	 * Create a new {@link AbstractBrailleTranslator} object that is based on the same underlying
	 * caches and that has the same braille charset.
	 */

	protected AbstractBrailleTranslator(AbstractBrailleTranslator from) {
		this.brailleCharset = from.brailleCharset;
		this.withHyphenatorCache = from.withHyphenatorCache;
	}

	/**
	 * Return a new {@link BrailleTranslator} that uses the given {@link Hyphenator} to perform
	 * hyphenation.
	 *
	 * The returned object should be selectable based on its identifier from the same {@link
	 * BrailleTranslatorProvider} that provided this {@link BrailleTranslator}.
	 */
	public BrailleTranslator _withHyphenator(Hyphenator hyphenator) throws UnsupportedOperationException {
		throw new UnsupportedOperationException();
	}

	private Map withHyphenatorCache;

	public final BrailleTranslator withHyphenator(Hyphenator hyphenator) throws UnsupportedOperationException {
		BrailleTranslator t = null;
		if (withHyphenatorCache == null)
			withHyphenatorCache = new HashMap<>();
		else
			t = withHyphenatorCache.get(hyphenator);
		if (t == null) {
			t = _withHyphenator(hyphenator);
			withHyphenatorCache.put(hyphenator, t);
		}
		return t;
	}

	public FromStyledTextToBraille fromStyledTextToBraille() throws UnsupportedOperationException {
		throw new UnsupportedOperationException();
	}
	
	private LineBreakingFromStyledText lineBreakingFromStyledText = null;
	
	public LineBreakingFromStyledText lineBreakingFromStyledText() throws UnsupportedOperationException {
		// default implementation based on fromStyledTextToBraille()
		if (lineBreakingFromStyledText == null) {
			FromStyledTextToBraille fromStyledTextToBraille = fromStyledTextToBraille();
			Character blankChar = brailleCharset == null
				? '\u2800'
				: brailleCharset.toText("\u2800").toCharArray()[0];
			Character defaultHyphenChar = brailleCharset == null
				? '\u2824'
				: brailleCharset.toText("\u2824").toCharArray()[0];
			lineBreakingFromStyledText = new StandardLineBreaker(fromStyledTextToBraille, blankChar, defaultHyphenChar, brailleCharset);
		}
		return lineBreakingFromStyledText;
	}
	
	/**
	 * {@link LineBreakingFromStyledText} implementation that is backed by a {@link
	 * FromStyledTextToBraille} and therefore can only support standard hyphenation.
	 */
	private static class StandardLineBreaker extends util.DefaultLineBreaker {

		private final FromStyledTextToBraille fromStyledTextToBraille;

		public StandardLineBreaker(FromStyledTextToBraille fromStyledTextToBraille, char blankChar, char defaultHyphenChar, BrailleConverter brailleCharset) {
			super(blankChar, defaultHyphenChar, brailleCharset, null);
			this.fromStyledTextToBraille = fromStyledTextToBraille;
		}

		protected BrailleStream translateAndHyphenate(Iterable styledText, int from, int to) {
			List braille = new ArrayList<>();
			Iterator style = styledText.iterator();
			for (String s : fromStyledTextToBraille.transform(styledText)) {
				SimpleInlineStyle st = style.next().getStyle();
				if (st != null) {
					if (st.getProperty("hyphens") == Hyphens.NONE) {
						s = s.replaceAll("[\u00AD\u200B]","");
						st.removeProperty("hyphens"); }
					CSSProperty ws = st.getProperty("white-space");
					if (ws != null) {
						if (ws == WhiteSpace.PRE_WRAP)
							s = s.replaceAll("[\\x20\t\\u2800]+", "$0\u200B")
								.replaceAll("[\\x20\t\\u2800]", "\u00A0");
						if (ws == WhiteSpace.PRE_WRAP || ws == WhiteSpace.PRE_LINE)
							s = s.replaceAll("[\\n\\r]", "\u2028");
						st.removeProperty("white-space"); }}
				braille.add(s);
			}
			StringBuilder brailleString = new StringBuilder();
			int fromChar = 0;
			int toChar = to >= 0 ? 0 : -1;
			for (String s : braille) {
				brailleString.append(s);
				if (--from == 0)
					fromChar = brailleString.length();
				if (--to == 0)
					toChar = brailleString.length();
			}
			return new FullyHyphenatedAndTranslatedString(brailleString.toString(), fromChar, toChar);
		}
	}

	/* ================== */
	/*       UTILS        */
	/* ================== */
	
	public static abstract class util {
		
		public static abstract class DefaultLineBreaker implements LineBreakingFromStyledText {
			
			private final static char SHY = '\u00ad';   // soft hyphen
			private final static char ZWSP = '\u200b';  // zero-width space
			private final static char SPACE = ' ';      // space
			private final static char CR = '\r';        // carriage return
			private final static char LF = '\n';        // line feed
			private final static char TAB = '\t';       // tab
			private final static char NBSP = '\u00a0';  // no-break space
			private final static char BLANK = '\u2800'; // blank braille pattern
			private final static char LS = '\u2028';    // line separator (preserved line breaks)
			private final static char RS = '\u001E';    // (for segmentation)
			
			private final char blankChar;
			
			private final char defaultHyphenChar;
			private final BrailleConverter brailleCharset;
			private final Logger logger;
			
			public DefaultLineBreaker() {
				this(null);
			}
			
			public DefaultLineBreaker(Logger logger) {
				this('\u2800', '\u2824', logger);
			}
			
			public DefaultLineBreaker(char blankChar, char defaultHyphenChar, Logger logger) {
				this(blankChar, defaultHyphenChar, null, logger);
			}

			/**
			 * @param blankChar         character to use as blank pattern. Must already be encoded in the correct
			 *                          braille charset.
			 * @param defaultHyphenChar hyphen character to use in case of "hyphenate-character: auto". Must
			 *                          already be encoded in the correct braille charset.
			 * @param brailleCharset    for encoding hyphen character when specified through "hyphenate-character"
			 *                          property.
			 */
			public DefaultLineBreaker(char blankChar, char defaultHyphenChar, BrailleConverter brailleCharset, Logger logger) {
				this.blankChar = blankChar;
				this.defaultHyphenChar = defaultHyphenChar;
				this.brailleCharset = brailleCharset;
				this.logger = logger;
			}
			
			/**
			 * This method MUST translate to braille (the result may contain only braille
			 * characters and white
			 * space) and perform line breaking within words (hyphenate). It MAY also collapse
			 * white space and perform line breaking outside or at the boundaries of words
			 * (according to the CSS rules), but it doesn't need to because the result will be
			 * passed though a white space processing and line breaking stage anyway. Preserved
			 * spaces MUST be converted to NBSP characters. Line breaking may be "explicit" by not
			 * providing more characters than those that may be put on the current line. The
			 * "allowHyphens" argument must be respected, and a hyphen character at the end the line
			 * MUST be inserted when hyphenating. If it's a soft hyphen (SHY) it will be substituted
			 * with a real hyphen automatically. If line breaking is "implicit", it is left up to
			 * the line breaking stage. Preserved line breaks MUST be converted to LS characters in
			 * this case, and other break characters (SHY, ZWSP) MUST be included for all break
			 * opportunities, including those within words, regardless of the "allowHyphens"
			 * argument. There is a break opportunity after each string returned by the {@link
			 * BrailleStream}.
			 */
			protected abstract BrailleStream translateAndHyphenate(Iterable text, int from, int to);
			
			protected interface BrailleStream extends Cloneable {
				public boolean hasNext();
				/**
				 * @param limit The available space left on the current line. The returned string
				 *              does not have to fit in this space, but normally does in case of
				 *              "explicit" line breaking. If the returned string is longer, it will
				 *              be broken.
				 */
				public String next(int limit, boolean force, boolean allowHyphens);
				public Character peek();
				public String remainder();
				public Object clone();
				/**
				 * Whether the already streamed text, or the preceding segment if we are at the
				 * beginning of the stream, ended with a space.
				 *
				 * This method is only mandatory when we are the beginning of the stream. After the
				 * {@link #next(int, boolean, boolean)} method has been called,
				 * hasPrecedingSpace may throw a {@link UnsupportedOperationException}.
				 */
				public boolean hasPrecedingSpace();
			}
			
			public BrailleTranslator.LineIterator transform(final Iterable text, int from, int to)
					throws TransformationException {
				List hyphenChars = new ArrayList<>();
				int wordSpacing; {
					wordSpacing = -1;
					for (CSSStyledText st : text) {
						SimpleInlineStyle style = st.getStyle();
						char hyphenChar = defaultHyphenChar;
						int spacing = 1;
						if (style != null) {
							CSSProperty val = style.getProperty("hyphenate-character");
							if (val != null) {
								if (val == HyphenateCharacter.braille_string) {
									String s = style.getValue(TermString.class, "hyphenate-character").getValue();
									if (s.length() == 1) {
										hyphenChar = s.charAt(0);
										if (brailleCharset != null)
											hyphenChar = brailleCharset.toText("" + hyphenChar).toCharArray()[0];
									} else
										logger.warn("The 'hyphenate-character' property must be a single character, "
										            + "but got {}", s); }
								style.removeProperty("hyphenate-character"); }
							val = style.getProperty("word-spacing");
							if (val != null) {
								if (val == WordSpacing.length) {
									spacing = style.getValue(TermInteger.class, "word-spacing").getIntValue();
									if (spacing < 0) {
										if (logger != null)
											logger.warn("word-spacing: {} not supported, must be non-negative", val);
										spacing = 1; }}
								
								// FIXME: assuming style is mutable and text.iterator() does not create copies
								style.removeProperty("word-spacing"); }}
						hyphenChars.add(hyphenChar);
						if (wordSpacing < 0)
							wordSpacing = spacing;
						else if (wordSpacing != spacing)
							throw new TransformationException("word-spacing must be constant, but both "
							                                  + wordSpacing + " and " + spacing + " specified"); }
					if (wordSpacing < 0) wordSpacing = 1;
				}
				List lineIterators = new ArrayList<>();
				Character hyphenChar = null;
				if (to < 0) to = hyphenChars.size();
				int i = from;
				while (i < to) {
					Character nextHyphenChar = hyphenChars.get(i);
					if (hyphenChar != nextHyphenChar) {
						if (i > from) {
							lineIterators.add(
								new LineIterator(translateAndHyphenate(text, from, i), blankChar, hyphenChar, wordSpacing));
							from = i; }
						hyphenChar = nextHyphenChar; }
					i++; }
				if (i > from)
					lineIterators.add(
						new LineIterator(translateAndHyphenate(text, from, i), blankChar, hyphenChar, wordSpacing));
				return CompoundBrailleTranslator.concatLineIterators(lineIterators);
			}
			
			protected static class FullyHyphenatedAndTranslatedString implements BrailleStream {
				private String next;
				private String lastWordPart; // if the last word continues in the next segment
				private String lastWordOtherPart;
				private final boolean precedingSpace;
				private final boolean alwaysEmpty;
				private final char hyphenChar;
				// SPACE, TAB, LF, CR, BLANK or LS
				private final static Pattern WORD_BOUNDARY = Pattern.compile("[\\x20\t\\n\\r\\u2800\u2028]");
				public FullyHyphenatedAndTranslatedString(String string) {
					this(string, 0, -1);
				}
				public FullyHyphenatedAndTranslatedString(String string, int from, int to) {
					this(string, from, to, SHY); // LineIterator converts SHY at the end of a line to a hyphen character
				}
				public FullyHyphenatedAndTranslatedString(String string, int from, int to, char hyphenChar) {
					// if there are no preceding segments, assume that we are at the beginning of a line,
					// so leading space can be stripped
					// FIXME: we can not make this assumption! see for example FormatterCoreContext,
					// which translates a space in order to obtain a value to use as margin
					// character, and it expects it to be a non-empty string
					//if (from == 0) precedingSpace = true; else
					precedingSpace = DefaultLineBreaker.hasPrecedingSpace(string, from);
					int len = string.length();
					if (from < 0 || from > len)
						throw new IllegalArgumentException();
					next = string.substring(from);
					lastWordPart = lastWordOtherPart = null;
					if (to >= 0 && to != len) {
						if (to > len || to < from)
							throw new IllegalArgumentException();
						to -= from;
						len -= from;
						int lastWordStart = lastIndexOf((" " + next.substring(0, to)), WORD_BOUNDARY);
						if (lastWordStart == to) {
							next = next.substring(0, to);
						} else {
							int lastWordEnd = to + indexOf((next.substring(to, len) + " "), WORD_BOUNDARY);
							if (lastWordEnd == to) {
								next = next.substring(0, to);
							} else {
								lastWordPart = next.substring(lastWordStart, to);
								lastWordOtherPart = next.substring(to, lastWordEnd);
								next = next.substring(0, lastWordStart);
								// ZWSP is not considered a WORD_BOUNDARY, but a word should not start with a ZWSP
								lastWordPart = lastWordPart.replaceAll("^\u200b*", "");
								if (lastWordPart.isEmpty())
									lastWordPart = lastWordOtherPart = null;
							}
						}
					} else {
						// remove SHY at end of stream because it would be converted to hyphen character
						next = next.replaceAll("\u00ad$", "");
					}
					if (next.replaceAll("[\u00ad\u200b]","").isEmpty())
						next = null;
					alwaysEmpty = (next == null);
					this.hyphenChar = hyphenChar;
				}
				public boolean hasNext() {
					return next != null || lastWordPart != null;
				}
				public String next(int limit, boolean force, boolean allowHyphens) {
					if (next != null) {
						String n = next;
						next = null;
						return n; }
					else if (lastWordPart != null) {
						if (force) {
							String n = lastWordPart;
							lastWordPart = null;
							return n; }
						Tuple2 t = extractHyphens(lastWordPart + RS + lastWordOtherPart, false, SHY, ZWSP, RS);
						String lastWord = t._1;
						// if last word fits
						if (lastWord.length() <= limit) {
							String n = lastWordPart;
							lastWordPart = null;
							return n; }
						// else if the word can be hyphenated
						// assuming that if allowHyphens is true for this segment it will be true for the next segment
						else if (allowHyphens) {
							byte[] hyphens = t._2;
							boolean inFirstPart = false;
							String nextLastWordPart = null;
							for (int i = limit; i > 0; i--) {
								// FIXME: don't hard-code the number 4
								if ((hyphens[i - 1] & 4) == 4) {
									inFirstPart = true;
									nextLastWordPart = lastWord.substring(0, i); }
								// FIXME: don't hard-code these numbers
								if ((((hyphens[i - 1] & 1) == 1) && (i < limit)) || ((hyphens[i - 1] & 2) == 2)) {
									if (!inFirstPart) {
										// break point in second part of word, or in first part and first part does not fit on line
										// in both cases the implicit line breaking will break correctly
										String n = lastWordPart;
										lastWordPart = null;
										return n; }
									else {
										// break point in first part of word and first part fits on line
										// need to switch to "explicit" mode because otherwise the first part would not be broken
										String n = lastWord.substring(0, i);
										// FIXME: don't hard-code the number 1
										if ((hyphens[i - 1] & 1) == 1)
											n += hyphenChar;
										lastWordPart = nextLastWordPart.substring(i);
										if (lastWordPart.isEmpty()) lastWordPart = null;
										return n; }}}}
						// else move the word to the next line
						return "";
					}
					else
						throw new NoSuchElementException();
				}
				public Character peek() {
					if (next != null)
						return next.charAt(0);
					else if (lastWordPart != null)
						return lastWordPart.charAt(0);
					else
						throw new NoSuchElementException();
				}
				public String remainder() {
					if (next == null && lastWordPart == null)
						throw new NoSuchElementException();
					StringBuilder remainder = new StringBuilder();
					if (next != null)
						remainder.append(next);
					if (lastWordPart != null)
						remainder.append(lastWordPart);
					return remainder.toString();
				}
				public boolean hasPrecedingSpace() {
					if (next == null && !alwaysEmpty)
						throw new UnsupportedOperationException();
					else
						return precedingSpace;
				}
				@Override
				public Object clone() {
					try {
						return super.clone();
					} catch (CloneNotSupportedException e) {
						throw new InternalError("coding error");
					}
				}
				/**
				 * Version of {@link String#indexOf(int)} that searched for pattern instead of character.
				 */
				private static int indexOf(String string, Pattern pattern) {
					Matcher m = pattern.matcher(string);
					if (m.find())
						return m.start();
					else
						return -1;
				}
				/**
				 * Version of {@link String#lastIndexOf(int)} that searched for pattern instead of character.
				 */
				private static int lastIndexOf(String string, Pattern pattern) {
					Matcher m = pattern.matcher(string);
					int lastIndex = -1;
					while (m.find())
						lastIndex = m.start();
					return lastIndex;
				}
			}
			
			public static class LineIterator implements BrailleTranslator.LineIterator, Cloneable {
				
				private final char blankChar;
				private final char hyphenChar;
				private final int wordSpacing;
				
				public LineIterator(String fullyHyphenatedAndTranslatedString, char blankChar, char hyphenChar, int wordSpacing) {
					this(fullyHyphenatedAndTranslatedString, 0, -1, blankChar, hyphenChar, wordSpacing);
				}
				
				public LineIterator(String fullyHyphenatedAndTranslatedString, int from, int to,
				                    char blankChar, char hyphenChar, int wordSpacing) {
					this(new FullyHyphenatedAndTranslatedString(fullyHyphenatedAndTranslatedString, from, to, hyphenChar),
					     blankChar, hyphenChar, wordSpacing);
				}
				
				public LineIterator(BrailleStream inputStream, char blankChar, char hyphenChar, int wordSpacing) {
					this.inputStream = inputStream;
					this.blankChar = blankChar;
					this.hyphenChar = hyphenChar;
					this.wordSpacing = wordSpacing;
					this.lastCharIsSpace = inputStream.hasPrecedingSpace();
				}
				
				private BrailleStream inputStream;
				private PeekingIterator inputBuffer = null;
				private StringBuilder charBuffer = new StringBuilder();
				
				/**
				 * Array with soft wrap opportunity info
				 * - SPACE, LF, CR, TAB and ZWSP create normal soft wrap opportunities
				 * - SHY create soft wrap opportunities that insert a hyphen glyph
				 * - normal soft wrap opportunities override soft wrap opportunities that insert a hyphen glyph
				 *
				 * @see Braille CSS – § 9.4 Line Breaking
				 */
				// byte at index i corresponds with boundary between characters i and i+1 of charBuffer
				// or end of string if i == charBuffer.length()
				private ArrayList wrapInfo = new ArrayList();
				// from lowest to highest precedence:
				private final static byte NO_SOFT_WRAP = (byte)0x0;
				private final static byte SOFT_WRAP_WITH_HYPHEN = (byte)0x1;     //    x
				private final static byte SOFT_WRAP_WITHOUT_HYPHEN = (byte)0x3;  //   xx
				private final static byte SOFT_WRAP_AFTER_SPACE = (byte)0x7;     //  xxx
				private final static byte HARD_WRAP = (byte)0x15;                // xxxx
				
				// for collapsing spaces
				private boolean lastCharIsSpace = false;
				private int forcedBreakCount = 0;
				
				/**
				 * Fill the character (charBuffer) and soft wrap opportunity (wrapInfo) buffers while normalising and collapsing spaces
				 * - until the buffers are at least 'limit' long
				 * - or until the current row is full according to the input feed (BrailleStream)
				 * - and while the remaining input starts with SPACE, LF, CR, TAB, NBSP, BRAILLE PATTERN BLANK, SHY, ZWSP or LS
				 */
				private void fillRow(int limit, boolean force, boolean allowHyphens) {
					int bufSize = charBuffer.length();
					while (true) {
						if (inputBuffer != null && !inputBuffer.hasNext()) {
							// there is a break opportunity after each string returned by the stream
							// we keep soft hyphens at the end of the string (also if we are at the very end of the stream)
							if (bufSize > 0 && wrapInfo.get(bufSize - 1) != SOFT_WRAP_WITH_HYPHEN)
								wrapInfo.set(bufSize - 1, (byte)(wrapInfo.get(bufSize - 1) | SOFT_WRAP_WITHOUT_HYPHEN));
							inputBuffer = null;
						}
						if (inputBuffer == null) {
							if (!inputStream.hasNext()) // end of stream
								return;
							if (bufSize < limit) {
								String next = inputStream.next(limit - bufSize, force && (bufSize == 0), allowHyphens);
								if (next.isEmpty()) // row full according to input feed
									return;
								inputBuffer = peekingIterator(charactersOf(next).iterator()); }
							if (inputBuffer == null) {
								switch (inputStream.peek()) {
								case SHY:    case TAB:
								case ZWSP:   case BLANK:
								case SPACE:  case NBSP:
								case LF:     case LS:
								case CR:
									String next = inputStream.next(1, true, allowHyphens);
									if (next.isEmpty())
										throw new RuntimeException("coding error");
									inputBuffer = peekingIterator(charactersOf(next).iterator());
									break;
								default:
									return; }}}
						char next = inputBuffer.peek();
						switch (next) {
						case SHY:
							if (bufSize > 0)
								wrapInfo.set(bufSize - 1, (byte)(wrapInfo.get(bufSize - 1) | SOFT_WRAP_WITH_HYPHEN));
							lastCharIsSpace = false;
							break;
						case ZWSP:
							if (bufSize > 0)
								wrapInfo.set(bufSize - 1, (byte)(wrapInfo.get(bufSize - 1) | SOFT_WRAP_WITHOUT_HYPHEN));
							break;
						case SPACE:
						case LF:
						case CR:
						case TAB:
						case BLANK:
							if (lastCharIsSpace)
								break;
							if (bufSize > 0)
								wrapInfo.set(bufSize - 1, (byte)(wrapInfo.get(bufSize - 1) | SOFT_WRAP_WITHOUT_HYPHEN));
							for (int i = 0; i < wordSpacing; i++) {
								charBuffer.append(blankChar);
								bufSize ++;
								wrapInfo.add(SOFT_WRAP_AFTER_SPACE);
							}
							lastCharIsSpace = true;
							break;
						case NBSP:
							charBuffer.append(blankChar);
							bufSize ++;
							wrapInfo.add(NO_SOFT_WRAP);
							lastCharIsSpace = false;
							break;
						case LS:
							if (bufSize > 0 && (wrapInfo.get(bufSize - 1) & HARD_WRAP) != HARD_WRAP)
								wrapInfo.set(bufSize - 1, (byte)(wrapInfo.get(bufSize - 1) | HARD_WRAP));
							else {
								// add a blank to attach the HARD_WRAP info to
								// otherwise we can't preserve empty lines
								charBuffer.append(blankChar);
								bufSize ++;
								wrapInfo.add(HARD_WRAP);
							}
							lastCharIsSpace = true;
							break;
						default:
							if (bufSize >= limit) return;
							charBuffer.append(next);
							bufSize ++;
							wrapInfo.add(NO_SOFT_WRAP);
							lastCharIsSpace = false; }
						inputBuffer.next();
					}
				}
				
				/**
				 * Flush the first 'size' elements of the character and soft wrap opportunity buffers
				 * Assumes that 'size <= charBuffer.length()'
				 */
				private void flushBuffers(int size) {
					charBuffer = new StringBuilder(charBuffer.substring(size));
					wrapInfo = new ArrayList(wrapInfo.subList(size, wrapInfo.size()));
				}
				
				/**
				 * @param limit specifies the maximum number of characters allowed in the result
				 * @param force specifies if the translator should force a break at the limit
				 *              if no natural break point is found
				 * @param wholeWordsOnly specifies that the row may not end on a break point inside a word.
				 * @return returns the translated string preceding the row break, including a translated hyphen
				 *                 at the end if needed.
				 */
				public String nextTranslatedRow(int limit, boolean force, boolean wholeWordsOnly) {
					fillRow(limit, force, !wholeWordsOnly);
					int bufSize = charBuffer.length();
					
					// charBuffer may be empty (even if hasNext() was true)
					if (bufSize == 0)
						return "";
					
					// always break at preserved line breaks
					for (int i = 0; i < min(bufSize, limit); i++) {
						if (wrapInfo.get(i) == HARD_WRAP) {
							int cut = i + 1;
							
							// strip trailing SPACE/LF/CR/TAB/BLANK/NBSP (all except NBSP are already collapsed into one)
							while (cut > 0 && charBuffer.charAt(cut - 1) == blankChar) cut--;
							
							// preserve if at beginning of stream
							if (cut == 0)
								cut = i + 1;
							String rv = charBuffer.substring(0, cut);
							
							// strip leading SPACE/LF/CR/TAB/BLANK in remaining text (are already collapsed into one)
							cut = i + 1;
							while (cut < bufSize && wrapInfo.get(cut) == SOFT_WRAP_AFTER_SPACE) cut++;
							flushBuffers(cut);
							return rv; }}
					
					// no need to break if remaining text is shorter than line
					if (bufSize < limit) {
						String rv = charBuffer.substring(0, bufSize);
							
						// replace soft hyphen with real hyphen
						if (wrapInfo.get(rv.length() - 1) == SOFT_WRAP_WITH_HYPHEN)
							rv += hyphenChar;
						charBuffer.setLength(0);
						wrapInfo.clear();
						
						// strip trailing SPACE/LF/CR/TAB/BLANK/NBSP (all except NBSP are already collapsed into one)
						int cut = bufSize;
						while (cut > 0 && rv.charAt(cut - 1) == blankChar) cut--;
						
						// preserve if at beginning of stream or end of stream
						if (cut > 0 && cut < bufSize && hasNext())
							rv = rv.substring(0, cut);
						return rv; }
					
					// return nothing if limit is 0 (limit should not be less than 0, but check anyway)
					if (limit <= 0) {
						
						// strip leading SPACE/LF/CR/TAB/BLANK in remaining text (are already collapsed into one)
						int cut = 0;
						while (cut < bufSize && wrapInfo.get(cut) == SOFT_WRAP_AFTER_SPACE) cut++;
						flushBuffers(cut);
						return ""; }
					
					// break at SPACE or ZWSP
					// FIXME: ignore ZWSP if it comes from hyphenation (how to find out?) and wholeWordsOnly==true
					if ((wrapInfo.get(limit - 1) & SOFT_WRAP_WITHOUT_HYPHEN) == SOFT_WRAP_WITHOUT_HYPHEN) {
						int cut = limit;
						
						// strip trailing SPACE/LF/CR/TAB/BLANK/NBSP (all except NBSP are already collapsed into one)
						while (cut > 0 && charBuffer.charAt(cut - 1) == blankChar) cut--;
						
						// strip leading SPACE/LF/CR/TAB/BLANK in remaining text (are already collapsed into one)
						int cut2 = limit;
						while (cut2 < bufSize && wrapInfo.get(cut2) == SOFT_WRAP_AFTER_SPACE) cut2++;
						String rv = charBuffer.substring(0, cut2);
						flushBuffers(cut2);
						
						// preserve if at beginning of stream or end of stream and not overflowing
						if (cut > 0 && cut < cut2 && hasNext())
							rv = rv.substring(0, cut);
						else if (cut2 > limit)
							rv = rv.substring(0, limit);
						return rv; }
					
					// try to break later if the overflowing characters are blank
					for (int i = limit + 1; i - 1 < bufSize && charBuffer.charAt(i - 1) == blankChar; i++)
						if ((wrapInfo.get(i - 1) & SOFT_WRAP_WITHOUT_HYPHEN) == SOFT_WRAP_WITHOUT_HYPHEN) {
							
							// strip trailing SPACE/LF/CR/TAB/BLANK/NBSP (all except NBSP are already collapsed into one)
							int cut = limit;
							while (cut > 0 && charBuffer.charAt(cut - 1) == blankChar) cut--;
							
							// strip leading SPACE/LF/CR/TAB/BLANK in remaining text (are already collapsed into one)
							int cut2 = i;
							while (cut2 < bufSize && wrapInfo.get(cut2) == SOFT_WRAP_AFTER_SPACE) cut2++;
							String rv = charBuffer.substring(0, cut2);
							flushBuffers(cut2);
							
							// preserve if at end of stream and not overflowing
							if (cut < cut2 && hasNext())
								rv = rv.substring(0, cut);
							else if (cut2 > limit)
								rv = rv.substring(0, limit);
							return rv; }
					
					// try to break sooner
					for (int i = limit - 1; i > 0; i--) {
						
						// break at SPACE, ZWSP or SHY
						// FIXME: ignore ZWSP if it comes from hyphenation (how to find out?) and wholeWordsOnly==true
						if ((wrapInfo.get(i - 1) & SOFT_WRAP_WITHOUT_HYPHEN) == SOFT_WRAP_WITHOUT_HYPHEN
						    || (!wholeWordsOnly && (wrapInfo.get(i - 1) == SOFT_WRAP_WITH_HYPHEN))) {
							int cut = i;
							String rv;
							
							// insert hyphen glyph at SHY
							if (wrapInfo.get(cut - 1) == 0x1) {
								rv = charBuffer.substring(0, cut);
								rv += hyphenChar; }
							else {
								
								// strip trailing SPACE/LF/CR/TAB/BLANK/NBSP (all except NBSP are already collapsed into one)
								while (cut > 0 && charBuffer.charAt(cut - 1) == blankChar) cut--;
								
								// preserve if at beginning of stream
								if (cut == 0)
									cut = i;
								rv = charBuffer.substring(0, cut); }
							
							// strip leading SPACE/LF/CR/TAB/BLANK in remaining text (are already collapsed into one)
							// FIXME: breaks cases where space after SHY is not from letter-spacing
							cut = i;
							while(cut < bufSize && charBuffer.charAt(cut) == blankChar) cut++;
							flushBuffers(cut);
							return rv; }}
					
					// force hard break
					if (force) {
						forcedBreakCount++;
						String rv = charBuffer.substring(0, limit);
						flushBuffers(limit);
						return rv; }
					
					return "";
				}
				
				/**
				 * @return returns true if there are characters not yet extracted with nextTranslatedRow
				 */
				public boolean hasNext() {
					return charBuffer.length() > 0
						|| (inputBuffer != null && inputBuffer.hasNext())
						|| inputStream.hasNext();
				}
				
				/**
				 * @return returns the characters not yet extracted with nextTranslatedRow
				 */
				public String getTranslatedRemainder() {
					
					// Note that I could use clone() here, but the current code is slightly more efficient
					BrailleStream save_inputStream = inputStream;
					inputStream = emptyBrailleStream;
					List save_inputBuffer = inputBuffer != null ? ImmutableList.copyOf(inputBuffer) : null;
					if (save_inputBuffer != null) {
						if (save_inputStream.hasNext()) {
							inputBuffer = peekingIterator(
									concat(save_inputBuffer.iterator(), charactersOf(save_inputStream.remainder()).iterator()));
						} else {
							inputBuffer = peekingIterator(save_inputBuffer.iterator());
						}
					} else if (save_inputStream.hasNext()) {
						inputBuffer = peekingIterator(charactersOf(save_inputStream.remainder()).iterator());
					} else {
						inputBuffer = null;
					}
					StringBuilder save_charBuffer = charBuffer;
					charBuffer = new StringBuilder(charBuffer.toString());
					ArrayList save_wrapInfo = wrapInfo;
					wrapInfo = new ArrayList(wrapInfo);
					boolean save_lastCharIsSpace = lastCharIsSpace;
					int save_forcedBreakCount = forcedBreakCount;
					forcedBreakCount = 0;
					fillRow(Integer.MAX_VALUE, true, false);
					String remainder = charBuffer.toString();
					inputStream = save_inputStream;
					inputBuffer = save_inputBuffer != null ? peekingIterator(save_inputBuffer.iterator()) : null;
					charBuffer = save_charBuffer;
					wrapInfo = save_wrapInfo;
					lastCharIsSpace = save_lastCharIsSpace;
					forcedBreakCount = save_forcedBreakCount;
					return remainder;
				}
				
				/**
				 * @return returns the number of characters in getTranslatedRemainder
				 */
				public int countRemaining() {
					return getTranslatedRemainder().length();
				}
				
				/**
				 * @return returns a copy of this result in the current state
				 */
				public BrailleTranslatorResult copy() {
					return (BrailleTranslatorResult)clone();
				}
				
				@Override
				// FIXME: could this be done more efficiently/lazily?
				public Object clone() {
					LineIterator clone; {
						try {
							clone = (LineIterator)super.clone();
						} catch (CloneNotSupportedException e) {
							throw new InternalError("coding error");
						}
					}
					clone.inputStream = (BrailleStream)inputStream.clone();
					if (inputBuffer != null) {
						List list = ImmutableList.copyOf(inputBuffer);
						inputBuffer = peekingIterator(list.iterator());
						clone.inputBuffer = peekingIterator(list.iterator());
					}
					clone.charBuffer = new StringBuilder(charBuffer.toString());
					clone.wrapInfo = new ArrayList(wrapInfo);
					return clone;
				}
				
				// FIXME: support METRIC_HYPHEN_COUNT
				public boolean supportsMetric(String metric) {
					return METRIC_FORCED_BREAK.equals(metric);
				}
				
				public double getMetric(String metric) {
					if (METRIC_FORCED_BREAK.equals(metric))
						return forcedBreakCount;
					else
						throw new UnsupportedMetricException("Metric not supported: " + metric);
				}
				
				private static final BrailleStream emptyBrailleStream = new BrailleStream() {
					public boolean hasNext() { return false; }
					public String next(int limit, boolean force, boolean allowHyphens) { throw new NoSuchElementException(); }
					public Character peek() { throw new NoSuchElementException(); }
					public String remainder() { throw new NoSuchElementException(); }
					public boolean hasPrecedingSpace() { return false; }
					@Override
					public Object clone() { return this; }
				};
			}
			
			/**
			 * Whether there is a space immediately before the substring starting at before.
			 */
			protected static boolean hasPrecedingSpace(String string, int before) {
				while (before > 0)
					switch (string.charAt(--before)) {
					case SPACE:
					case BLANK:
					case CR:
					case LF:
					case TAB:
					case LS:
						return true;
					case ZWSP:
						continue;
					default:
						return false;
					}
				return false;
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy