All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nl.dedicon.pipeline.braille.step.BrailleToText Maven / Gradle / Ivy

There is a newer version: 2.0.1
Show newest version
package nl.dedicon.pipeline.braille.step;

import static nl.dedicon.pipeline.braille.step.CapitalMode.letter;
import static nl.dedicon.pipeline.braille.step.CapitalMode.lowercase;
import static nl.dedicon.pipeline.braille.step.CapitalMode.permanent;
import static nl.dedicon.pipeline.braille.step.CapitalMode.word;
import static nl.dedicon.pipeline.braille.step.NumericMode.decimalSign;
import static nl.dedicon.pipeline.braille.step.NumericMode.moneyZeros;
import static nl.dedicon.pipeline.braille.step.NumericMode.no;
import static nl.dedicon.pipeline.braille.step.NumericMode.start;
import static nl.dedicon.pipeline.braille.step.NumericMode.yes;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

/**
 * Braille to Text
 *
 * @author Paul Rambags
 */
public class BrailleToText {

    private static final char
            BRAILLE_CAPITAL = 0x2828,           // '⠨'
            BRAILLE_CAPITAL_PERMANENT = 0x2818, // '⠘'
            BRAILLE_DIGIT = 0x283C,             // '⠼'
            BRAILLE_HYPHEN = 0x2824,            // '⠤'
            BRAILLE_RESTORE = 0x2820,           // '⠠'
            BRAILLE_SPACE = 0x2800,             // '⠀'
            SPACE = ' ';
    
    private CapitalMode capitalMode = lowercase;
    private NumericMode numericMode = no;
    private boolean endsWithHyphen = false;

    public void convert (Document document) {
        Node body = Utils.getChild(document.getDocumentElement(), "body");
        Node main = Utils.getFirstChildWithAttribute(body, "div", "id", "main");
        while (main != null) {
            Node volume = Utils.getFirstChildWithAttribute(main, "div", "class", "volume");
            while (volume != null) {
                Node page = Utils.getFirstChildWithAttribute(volume, "div", "class", "page");
                while (page != null) {
                    Node braillePage = Utils.getFirstChildWithAttribute(page, "div", "class", "braille-page");
                    Node textPage = Utils.getFirstChildWithAttribute(page, "div", "class", "text-page");
                    Node brailleRow = Utils.getFirstChildWithAttribute(braillePage, "div", "class", "row");
                    Node textRow = Utils.getFirstChildWithAttribute(textPage, "div", "class", "row");
                    while (brailleRow != null && textRow != null) {
                        String braille = brailleRow.getTextContent();
                        String text = convert(braille);
                        textRow.setTextContent(text);

                        brailleRow = Utils.getNextSiblingWithAttribute(brailleRow, "div", "class", "row");
                        textRow = Utils.getNextSiblingWithAttribute(textRow, "div", "class", "row");
                    }
                    page = Utils.getNextSiblingWithAttribute(page, "div", "class", "page");
                }
                volume = Utils.getNextSiblingWithAttribute(volume, "div", "class", "volume");
            }
            main = Utils.getNextSiblingWithAttribute(main, "div", "id", "main");
        }
    }
    
    // @todo Houd rekening met meerdere betekenissen van braille symbolen b.v. (123456) = é of %
    private String convert (String braille) {
        StringBuilder text = new StringBuilder();
        
        for (char b : braille.toCharArray()) {
            
            // text character
            char t;
            if (numericMode != no) {
                t = Utils.convertBrailleNumeric(b);
            } else {
                t = Utils.convertBraille(b);
                if (capitalMode != lowercase) {
                    t = String.valueOf(t).toUpperCase().charAt(0);
                }
            }
            text.append(t);

            // numeric mode
            if (b == BRAILLE_DIGIT) {
                numericMode = start;
            } else {
                switch (numericMode) {
                    case start:
                        if (Utils.isBrailleMinus(b) || Utils.isBrailleDigit(b)) {
                            numericMode = yes;
                        }
                        break;
                    case yes:
                        if (Utils.isBrailleDecimalSeparator(b)) {
                            numericMode = decimalSign;
                        } else if (!Utils.isBrailleDigit(b)) {
                            numericMode = no;
                        }
                        break;
                    case decimalSign:
                        if (Utils.isBrailleDigit(b)) {
                            numericMode = yes;
                        } else if (Utils.isBrailleMoneyZeros(b)) {
                            numericMode = moneyZeros;
                        } else {
                            numericMode = no;
                        }
                        break;
                    case moneyZeros:
                        if (!Utils.isBrailleMoneyZeros(b)) {
                            numericMode = no;
                        }
                        break;
                    case no:
                    default:
                        break;
                }
            }
            
            // capital mode
            if (b == BRAILLE_CAPITAL) {
                capitalMode = letter;
            } else if (b == BRAILLE_CAPITAL_PERMANENT) {
                switch(capitalMode) {
                    case lowercase: capitalMode = word;
                    case letter: capitalMode = word;
                    case word: capitalMode = permanent;
                    case permanent: capitalMode = word;
                }
            } else if (b == BRAILLE_RESTORE) {
                numericMode = no;
                capitalMode = lowercase;
            } else if (isSpace(b)) {
                numericMode = no;
                if (capitalMode != permanent) {
                    capitalMode = lowercase;
                }
            } else if (capitalMode == letter) {
                capitalMode = lowercase;
            }

            // ends with hyphen
            if (b == BRAILLE_HYPHEN) {
                endsWithHyphen = true;
            } else if (!isSpace(b)) {
                endsWithHyphen = false;
            }
        }
        
        // end of line
        numericMode = no;
        if (!endsWithHyphen) {
            if (capitalMode != permanent) {
                capitalMode = lowercase;
            }
        }
        
        return text.toString();
    }
    
    private static boolean isSpace(char b) {
        return b == BRAILLE_SPACE || b == SPACE;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy