All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nl.dedicon.pipeline.braille.step.SymbolsReplacer Maven / Gradle / Ivy

There is a newer version: 2.0.1
Show newest version
package nl.dedicon.pipeline.braille.step;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmSequenceIterator;
import nl.dedicon.pipeline.braille.model.Context;
import nl.dedicon.pipeline.braille.model.Replace;
import nl.dedicon.pipeline.braille.model.Symbol;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import static nl.dedicon.pipeline.braille.step.NumericMode.decimalSign;
import static nl.dedicon.pipeline.braille.step.NumericMode.moneyZeros;
import static nl.dedicon.pipeline.braille.step.NumericMode.no;
import static nl.dedicon.pipeline.braille.step.NumericMode.yes;


/**
 * Replace symbols in a DTBook and insert a symbols list
 * Based on W3C DOM
 * 
 * @author Paul Rambags
 */
public class SymbolsReplacer  {
    private static final QName _addleadingspace = new QName("addleadingspace");
    private static final QName _addtrailingspace = new QName("addtrailingspace");
    private static final QName _braille = new QName("braille");
    private static final QName _char = new QName("char");
    private static final QName _close = new QName("close");
    private static final QName _context = new QName("context");
    private static final QName _description = new QName("description");
    private static final QName _language = new QName("language");
    private static final QName _open = new QName("open");
    private static final QName _removeleadingspace = new QName("removeleadingspace");
    private static final QName _removetrailingspace = new QName("removetrailingspace");
    private static final QName _replace = new QName("replace");
    private static final QName _symbol = new QName("symbol");
    private static final QName _symbols = new QName("symbols");

    private final Map symbolsMap;
    private final Integer[] symbolLengthsDescending;
    private final Set symbolsListReplaces = new HashSet<>();
    
    /**
     * Constructor
     * 
     * @param symbolsCodeNode root document of the symbols code XML
     */
    public SymbolsReplacer(XdmNode symbolsCodeNode) {
        this.symbolsMap = filterSymbols(symbolsCodeNode);
        this.symbolLengthsDescending = determineSymbolLengths(this.symbolsMap);
    }
    
    /**
     * Generate a Symbols HashMap
     * 
     * @param symbolsCodeNode SymbolsCode root document of the symbols code file
     * @return Character -> Symbol map
     */
    private static Map filterSymbols(XdmNode symbolsCodeNode) {
        Map symbolsMap = new HashMap<>();

        XdmSequenceIterator symbolsIterator = symbolsCodeNode.axisIterator(Axis.CHILD, _symbols);
        while (symbolsIterator.hasNext()) {
            XdmNode symbolsNode = (XdmNode)symbolsIterator.next();
            XdmSequenceIterator symbolIterator = symbolsNode.axisIterator(Axis.CHILD, _symbol);
            while (symbolIterator.hasNext()) {

                XdmNode symbolNode = (XdmNode)symbolIterator.next();
                String character = Utils.getValue(symbolNode, _char);
                String language = Utils.getValue(symbolNode, _language);

                List replaces = new ArrayList<>();
                XdmSequenceIterator replaceIterator = symbolNode.axisIterator(Axis.CHILD, _replace);
                while (replaceIterator.hasNext()) {

                    XdmNode replaceNode = (XdmNode)replaceIterator.next();
                    Context context = Context.get(replaceNode.getAttributeValue(_context));
                    String braille = Utils.getValue(replaceNode, _braille);
                    String description = Utils.getChildNode(replaceNode, _description).getStringValue();

                    if (context != null && StringUtils.isNotBlank(braille)) {

                        XdmNode brailleNode = Utils.getChildNode(replaceNode, _braille);
                        boolean brailleAddLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addleadingspace));
                        boolean brailleRemoveLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removeleadingspace));
                        boolean brailleAddTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addtrailingspace));
                        boolean brailleRemoveTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removetrailingspace));
                        String brailleOpen = brailleNode.getAttributeValue(_open);
                        String brailleClose = brailleNode.getAttributeValue(_close);

                        Replace replace = new Replace();
                        replace.setContext(context);
                        replace.setBraille(DediconBrl.convert(braille));
                        replace.setBrailleAddLeadingSpace(brailleAddLeadingSpace);
                        replace.setBrailleRemoveLeadingSpace(brailleRemoveLeadingSpace);
                        replace.setBrailleAddTrailingSpace(brailleAddTrailingSpace);
                        replace.setBrailleRemoveTrailingSpace(brailleRemoveTrailingSpace);
                        replace.setBrailleOpen(brailleOpen);
                        replace.setBrailleClose(brailleClose);
                        replace.setDescription(description);
                        
                        replaces.add(replace);

                    }
                }

                if (StringUtils.isNotBlank(character) && !replaces.isEmpty()) {

                    Symbol symbol = new Symbol();
                    symbol.setCharacter(character);
                    symbol.setLanguage(language);
                    symbol.setReplaces(replaces);
                    replaces.stream().forEach(r -> r.setParent(symbol));
                    symbolsMap.put(entirelyUppercaseOrLowercase(character), symbol);
                }
            }
        }
        return symbolsMap;
    }

    /**
     * Determine the different symbol lengths in descending order
     * 
     * @param symbolsMap symbols map
     * @return symbol lengths in descending order
     */
    private static Integer[] determineSymbolLengths(Map symbolsMap) {
        Set symbolLengths = new HashSet<>();
        symbolsMap.keySet()
                .stream()
                .map(String::length)
                .forEach(symbolLengths::add);
        Integer[] symbolLengthsDescending = symbolLengths.toArray(new Integer[symbolLengths.size()]);
        // sort descending
        Arrays.sort(symbolLengthsDescending, (i,j) -> j.compareTo(i));
        return symbolLengthsDescending;
    }

    /**
     * Get the set of replaces for the symbol list of this DTBook
     * 
     * @return set of replaces
     */
    protected Set getSymbolsListReplaces() {
        return symbolsListReplaces;
    }

    private String replace(final String source, final Context context) {
        if (source == null) {
            return null;
        }
        
        NumericMode numericMode = no;
        
        String target = source;
        int index = 0;
        while (index < target.length()) {
            
            // numeric mode
            char c = target.charAt(index);
            if (Utils.isDigit(c)) {
                numericMode = yes;
            } else {
                switch (numericMode) {
                    case yes:
                        if (Utils.isDecimalSeparator(c)) {
                            numericMode = decimalSign;
                        } else if (!Utils.isDigit(c)) {
                            numericMode = no;
                        }
                        break;
                    case decimalSign:
                        if (Utils.isDigit(c)) {
                            numericMode = yes;
                        } else if (Utils.isMoneyZeros(c)) {
                            numericMode = moneyZeros;
                        } else {
                            numericMode = no;
                        }
                        break;
                    case moneyZeros:
                        if (!Utils.isMoneyZeros(c)) {
                            numericMode = no;
                        }
                        break;
                    case no:
                    default:
                        break;
                }
            }
            
            // do not replace symbols in numeric mode
            if (numericMode == no) {
                for (Integer symbolLength : symbolLengthsDescending) {
                    if (index + symbolLength > target.length()) {
                        continue;
                    }
                    String substring = target.substring(index, index + symbolLength);
                    Replace replaceBook = determineReplace(substring, context, ReplaceTarget.book);
                    if (replaceBook == null && context != Context.Default) {
                        replaceBook = determineReplace(substring, Context.Default, ReplaceTarget.book);
                    }
                    if (replaceBook == null) {
                        continue;
                    }

                    //
                    // Replace found
                    //

                    numericMode = no;

                    Replace replaceSymbolsList = determineReplace(substring, context, ReplaceTarget.symbolsList);
                    if (replaceSymbolsList == null && context != Context.Default) {
                        replaceSymbolsList = determineReplace(substring, Context.Default, ReplaceTarget.symbolsList);
                    }
                    if (replaceSymbolsList != null && StringUtils.isNotBlank(replaceSymbolsList.getDescription())) {
                        symbolsListReplaces.add(replaceSymbolsList);
                    }

                    String before = target.substring(0, index);
                    String braille = replaceBook.getBraille();
                    String after = target.substring(index + symbolLength);

                    if (StringUtils.isNotBlank(replaceBook.getBrailleOpen()) || StringUtils.isNotBlank(replaceBook.getBrailleClose())) {
                        int endIndex = StringUtils.indexOf(after, 'û');
                        if (startsWithBlank(after) && endIndex > 1) {
                            // replace leading white space and replace û
                            after = StringUtils.join(
                                    replaceBook.getBrailleOpen(),
                                    after.substring(1, endIndex),
                                    replaceBook.getBrailleClose(),
                                    after.substring(endIndex + 1)
                            );
                        } else {
                            // remove leading white space and find first non-whitespace
                            while (startsWithBlank(after)) {
                                after = after.substring(1);
                            }
                            endIndex = 0;
                            while (endIndex < after.length() && StringUtils.isNotBlank(after.substring(endIndex, endIndex + 1))) {
                                endIndex ++;
                            }
                            after = StringUtils.join(
                                    replaceBook.getBrailleOpen(),
                                    after.substring(0, endIndex),
                                    replaceBook.getBrailleClose(),
                                    after.substring(endIndex)
                            );
                        }
                    }
                    if (replaceBook.getBrailleAddLeadingSpace() && !endsWithBlank(before)) {
                        before = before.concat(" ");
                        index ++;
                    }
                    if (replaceBook.getBrailleRemoveLeadingSpace()) {
                        while (endsWithBlank(before)) {
                            before = StringUtils.chop(before);
                            index --;
                        }
                    }
                    if (replaceBook.getBrailleAddTrailingSpace() && !startsWithBlank(after)) {
                        after = " ".concat(after);
                        index ++;
                    }
                    if (replaceBook.getBrailleRemoveTrailingSpace()) {
                        while (startsWithBlank(after)) {
                            after = after.substring(1);
                        }
                    }

                    target = StringUtils.join(before, braille, after);

                    index += braille.length() - 1;
                    break;
                }
            }
            index++;
        }

        return target;
    }

    private Replace determineReplace(String substring, Context context, ReplaceTarget replaceTarget) {
        String key = entirelyUppercaseOrLowercase(substring);
        Symbol symbol = null;
        switch (replaceTarget) {
            case book:
                if (isUpperCase(key)) {
                    symbol = symbolsMap.get(key);
                    if (symbol == null) {
                        symbol = symbolsMap.get(key.toLowerCase());
                    }
                } else {
                    symbol = symbolsMap.get(key);
                }
                break;
            case symbolsList:
                if (isUpperCase(key)) {
                    symbol = symbolsMap.get(key.toLowerCase());
                    if (symbol == null) {
                        symbol = symbolsMap.get(key);
                    }
                } else {
                    symbol = symbolsMap.get(key);
                }
                break;
        }
        if (symbol == null) {
            return null;
        }
        return symbol.getReplaces()
                .stream()
                .filter(r -> r.getContext() == context)
                .findFirst()
                .orElse(null)
                ;
    }
    
    /**
     * Recursively replace all symbols in text nodes with their braille representation
     * 
     * @param node DTBook node
     */
    public void replaceSymbols(Node node) {
        if (node.getNodeType() == Node.TEXT_NODE) {
            String text = node.getTextContent();
            if (text != null && text.length() > 0) {
                Context context = determineContext(node);
                String replacement = replace(text, context);
                node.setTextContent(replacement);
            }
        } else {
            for (Node childNode = node.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
                // recursion
                replaceSymbols(childNode);
            }
        }
    }
    
    // @todo Code
    private Context determineContext(Node textNode) {
        Node parentNode = textNode.getParentNode();
        if (parentNode != null) {
            String attributeValue = ((Element)parentNode).getAttribute("class");
            if ("dummy-formula".equals(attributeValue)) {
                return Context.Formula;
            }
        }
        return Context.Default;
    }

    /**
     * Inserts the symbols list in a DTBook
     * 
     * @param document DTBook
     * @param header symbols list header
     */
    public void insertSymbolsList(Document document, String header) {
        // the header can contain symbols, too
        String headerWithSymbolsReplaced = replace(header, Context.Default);

        Element dtbook = document.getDocumentElement();
        Node book = Utils.getChild(dtbook, "book");

        if (book == null || getSymbolsListReplaces().isEmpty()) {
            return;
        }
        
        Node frontMatter = Utils.getChild(book, "frontmatter");
        if (frontMatter == null) {
            frontMatter = Utils.addChild(book, "frontmatter");
        }
        Element level1 = Utils.addChildBefore(frontMatter, frontMatter.getFirstChild(), "level1");
        level1.setAttribute("class", "symbols_list");
        
        // empty page number
        Element pagenum = Utils.addChild(level1, "pagenum");
        pagenum.setAttribute("id", "page-symbolslist");

        if (StringUtils.isNotBlank(headerWithSymbolsReplaced)) {
            Element h1 = Utils.addChild(level1, "h1");
            h1.setTextContent(headerWithSymbolsReplaced);
        }
        
        Element list = Utils.addChild(level1, "list");
        list.setAttribute("type", "pl");
        
        getSymbolsListReplaces().stream()
                .sorted((r1, r2) -> r1.getParent().getCharacter().compareTo(r2.getParent().getCharacter()))
                .forEachOrdered(r -> {
                    String text = String.format("\u283F%s\u00A0 %s", r.getBraille(), r.getDescription());
                    Element li = Utils.addChild(list, "li");
                    li.setTextContent(text);
                });
    }
    
    private static boolean startsWithBlank(String s) {
        return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.left(s, 1));
    }

    private static boolean endsWithBlank(String s) {
        return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.right(s, 1));
    }
    
    // a key in the symbolsMap must be entirely uppercase or lowercase
    private static String entirelyUppercaseOrLowercase(String symbol) {
        if (symbol == null) {
            return null;
        }
        return symbol.toUpperCase().equals(symbol) ? symbol : symbol.toLowerCase();
    }
    
    // determine for an entirely uppercase or lowercase string, whether it is in uppercase
    private static boolean isUpperCase(String entirelyUpperCaseOrLowercaseString) {
        if (entirelyUpperCaseOrLowercaseString == null) {
            return false;
        }
        return !entirelyUpperCaseOrLowercaseString.toLowerCase().equals(entirelyUpperCaseOrLowercaseString);
    }
    
    private enum ReplaceTarget {
        book,
        symbolsList
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy