Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
nl.dedicon.pipeline.braille.step.SymbolsReplacer Maven / Gradle / Ivy
package nl.dedicon.pipeline.braille.step;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmSequenceIterator;
import nl.dedicon.pipeline.braille.model.Context;
import nl.dedicon.pipeline.braille.model.Replace;
import nl.dedicon.pipeline.braille.model.Symbol;
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import static nl.dedicon.pipeline.braille.step.NumericMode.decimalSign;
import static nl.dedicon.pipeline.braille.step.NumericMode.moneyZeros;
import static nl.dedicon.pipeline.braille.step.NumericMode.no;
import static nl.dedicon.pipeline.braille.step.NumericMode.yes;
/**
* Replace symbols in a DTBook and insert a symbols list
* Based on W3C DOM
*
* @author Paul Rambags
*/
public class SymbolsReplacer {
private static final QName _addleadingspace = new QName("addleadingspace");
private static final QName _addtrailingspace = new QName("addtrailingspace");
private static final QName _braille = new QName("braille");
private static final QName _char = new QName("char");
private static final QName _close = new QName("close");
private static final QName _context = new QName("context");
private static final QName _description = new QName("description");
private static final QName _language = new QName("language");
private static final QName _open = new QName("open");
private static final QName _removeleadingspace = new QName("removeleadingspace");
private static final QName _removetrailingspace = new QName("removetrailingspace");
private static final QName _replace = new QName("replace");
private static final QName _symbol = new QName("symbol");
private static final QName _symbols = new QName("symbols");
private final Map symbolsMap;
private final Integer[] symbolLengthsDescending;
private final Set symbolsListReplaces = new HashSet<>();
/**
* Constructor
*
* @param symbolsCodeNode root document of the symbols code XML
*/
public SymbolsReplacer(XdmNode symbolsCodeNode) {
this.symbolsMap = filterSymbols(symbolsCodeNode);
this.symbolLengthsDescending = determineSymbolLengths(this.symbolsMap);
}
/**
* Generate a Symbols HashMap
*
* @param symbolsCodeNode SymbolsCode root document of the symbols code file
* @return Character -> Symbol map
*/
private static Map filterSymbols(XdmNode symbolsCodeNode) {
Map symbolsMap = new HashMap<>();
XdmSequenceIterator symbolsIterator = symbolsCodeNode.axisIterator(Axis.CHILD, _symbols);
while (symbolsIterator.hasNext()) {
XdmNode symbolsNode = (XdmNode)symbolsIterator.next();
XdmSequenceIterator symbolIterator = symbolsNode.axisIterator(Axis.CHILD, _symbol);
while (symbolIterator.hasNext()) {
XdmNode symbolNode = (XdmNode)symbolIterator.next();
String character = Utils.getValue(symbolNode, _char);
String language = Utils.getValue(symbolNode, _language);
List replaces = new ArrayList<>();
XdmSequenceIterator replaceIterator = symbolNode.axisIterator(Axis.CHILD, _replace);
while (replaceIterator.hasNext()) {
XdmNode replaceNode = (XdmNode)replaceIterator.next();
Context context = Context.get(replaceNode.getAttributeValue(_context));
String braille = Utils.getValue(replaceNode, _braille);
String description = Utils.getChildNode(replaceNode, _description).getStringValue();
if (context != null && StringUtils.isNotBlank(braille)) {
XdmNode brailleNode = Utils.getChildNode(replaceNode, _braille);
boolean brailleAddLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addleadingspace));
boolean brailleRemoveLeadingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removeleadingspace));
boolean brailleAddTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_addtrailingspace));
boolean brailleRemoveTrailingSpace = StringUtils.isNotBlank(brailleNode.getAttributeValue(_removetrailingspace));
String brailleOpen = brailleNode.getAttributeValue(_open);
String brailleClose = brailleNode.getAttributeValue(_close);
Replace replace = new Replace();
replace.setContext(context);
replace.setBraille(DediconBrl.convert(braille));
replace.setBrailleAddLeadingSpace(brailleAddLeadingSpace);
replace.setBrailleRemoveLeadingSpace(brailleRemoveLeadingSpace);
replace.setBrailleAddTrailingSpace(brailleAddTrailingSpace);
replace.setBrailleRemoveTrailingSpace(brailleRemoveTrailingSpace);
replace.setBrailleOpen(brailleOpen);
replace.setBrailleClose(brailleClose);
replace.setDescription(description);
replaces.add(replace);
}
}
if (StringUtils.isNotBlank(character) && !replaces.isEmpty()) {
Symbol symbol = new Symbol();
symbol.setCharacter(character);
symbol.setLanguage(language);
symbol.setReplaces(replaces);
replaces.stream().forEach(r -> r.setParent(symbol));
symbolsMap.put(entirelyUppercaseOrLowercase(character), symbol);
}
}
}
return symbolsMap;
}
/**
* Determine the different symbol lengths in descending order
*
* @param symbolsMap symbols map
* @return symbol lengths in descending order
*/
private static Integer[] determineSymbolLengths(Map symbolsMap) {
Set symbolLengths = new HashSet<>();
symbolsMap.keySet()
.stream()
.map(String::length)
.forEach(symbolLengths::add);
Integer[] symbolLengthsDescending = symbolLengths.toArray(new Integer[symbolLengths.size()]);
// sort descending
Arrays.sort(symbolLengthsDescending, (i,j) -> j.compareTo(i));
return symbolLengthsDescending;
}
/**
* Get the set of replaces for the symbol list of this DTBook
*
* @return set of replaces
*/
protected Set getSymbolsListReplaces() {
return symbolsListReplaces;
}
private String replace(final String source, final Context context) {
if (source == null) {
return null;
}
NumericMode numericMode = no;
String target = source;
int index = 0;
while (index < target.length()) {
// numeric mode
char c = target.charAt(index);
if (Utils.isDigit(c)) {
numericMode = yes;
} else {
switch (numericMode) {
case yes:
if (Utils.isDecimalSeparator(c)) {
numericMode = decimalSign;
} else if (!Utils.isDigit(c)) {
numericMode = no;
}
break;
case decimalSign:
if (Utils.isDigit(c)) {
numericMode = yes;
} else if (Utils.isMoneyZeros(c)) {
numericMode = moneyZeros;
} else {
numericMode = no;
}
break;
case moneyZeros:
if (!Utils.isMoneyZeros(c)) {
numericMode = no;
}
break;
case no:
default:
break;
}
}
// do not replace symbols in numeric mode
if (numericMode == no) {
for (Integer symbolLength : symbolLengthsDescending) {
if (index + symbolLength > target.length()) {
continue;
}
String substring = target.substring(index, index + symbolLength);
Replace replaceBook = determineReplace(substring, context, ReplaceTarget.book);
if (replaceBook == null && context != Context.Default) {
replaceBook = determineReplace(substring, Context.Default, ReplaceTarget.book);
}
if (replaceBook == null) {
continue;
}
//
// Replace found
//
numericMode = no;
Replace replaceSymbolsList = determineReplace(substring, context, ReplaceTarget.symbolsList);
if (replaceSymbolsList == null && context != Context.Default) {
replaceSymbolsList = determineReplace(substring, Context.Default, ReplaceTarget.symbolsList);
}
if (replaceSymbolsList != null && StringUtils.isNotBlank(replaceSymbolsList.getDescription())) {
symbolsListReplaces.add(replaceSymbolsList);
}
String before = target.substring(0, index);
String braille = replaceBook.getBraille();
String after = target.substring(index + symbolLength);
if (StringUtils.isNotBlank(replaceBook.getBrailleOpen()) || StringUtils.isNotBlank(replaceBook.getBrailleClose())) {
int endIndex = StringUtils.indexOf(after, 'û');
if (startsWithBlank(after) && endIndex > 1) {
// replace leading white space and replace û
after = StringUtils.join(
replaceBook.getBrailleOpen(),
after.substring(1, endIndex),
replaceBook.getBrailleClose(),
after.substring(endIndex + 1)
);
} else {
// remove leading white space and find first non-whitespace
while (startsWithBlank(after)) {
after = after.substring(1);
}
endIndex = 0;
while (endIndex < after.length() && StringUtils.isNotBlank(after.substring(endIndex, endIndex + 1))) {
endIndex ++;
}
after = StringUtils.join(
replaceBook.getBrailleOpen(),
after.substring(0, endIndex),
replaceBook.getBrailleClose(),
after.substring(endIndex)
);
}
}
if (replaceBook.getBrailleAddLeadingSpace() && !endsWithBlank(before)) {
before = before.concat(" ");
index ++;
}
if (replaceBook.getBrailleRemoveLeadingSpace()) {
while (endsWithBlank(before)) {
before = StringUtils.chop(before);
index --;
}
}
if (replaceBook.getBrailleAddTrailingSpace() && !startsWithBlank(after)) {
after = " ".concat(after);
index ++;
}
if (replaceBook.getBrailleRemoveTrailingSpace()) {
while (startsWithBlank(after)) {
after = after.substring(1);
}
}
target = StringUtils.join(before, braille, after);
index += braille.length() - 1;
break;
}
}
index++;
}
return target;
}
private Replace determineReplace(String substring, Context context, ReplaceTarget replaceTarget) {
String key = entirelyUppercaseOrLowercase(substring);
Symbol symbol = null;
switch (replaceTarget) {
case book:
if (isUpperCase(key)) {
symbol = symbolsMap.get(key);
if (symbol == null) {
symbol = symbolsMap.get(key.toLowerCase());
}
} else {
symbol = symbolsMap.get(key);
}
break;
case symbolsList:
if (isUpperCase(key)) {
symbol = symbolsMap.get(key.toLowerCase());
if (symbol == null) {
symbol = symbolsMap.get(key);
}
} else {
symbol = symbolsMap.get(key);
}
break;
}
if (symbol == null) {
return null;
}
return symbol.getReplaces()
.stream()
.filter(r -> r.getContext() == context)
.findFirst()
.orElse(null)
;
}
/**
* Recursively replace all symbols in text nodes with their braille representation
*
* @param node DTBook node
*/
public void replaceSymbols(Node node) {
if (node.getNodeType() == Node.TEXT_NODE) {
String text = node.getTextContent();
if (text != null && text.length() > 0) {
Context context = determineContext(node);
String replacement = replace(text, context);
node.setTextContent(replacement);
}
} else {
for (Node childNode = node.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
// recursion
replaceSymbols(childNode);
}
}
}
// @todo Code
private Context determineContext(Node textNode) {
Node parentNode = textNode.getParentNode();
if (parentNode != null) {
String attributeValue = ((Element)parentNode).getAttribute("class");
if ("dummy-formula".equals(attributeValue)) {
return Context.Formula;
}
}
return Context.Default;
}
/**
* Inserts the symbols list in a DTBook
*
* @param document DTBook
* @param header symbols list header
*/
public void insertSymbolsList(Document document, String header) {
// the header can contain symbols, too
String headerWithSymbolsReplaced = replace(header, Context.Default);
Element dtbook = document.getDocumentElement();
Node book = Utils.getChild(dtbook, "book");
if (book == null || getSymbolsListReplaces().isEmpty()) {
return;
}
Node frontMatter = Utils.getChild(book, "frontmatter");
if (frontMatter == null) {
frontMatter = Utils.addChild(book, "frontmatter");
}
Element level1 = Utils.addChildBefore(frontMatter, frontMatter.getFirstChild(), "level1");
level1.setAttribute("class", "symbols_list");
// empty page number
Element pagenum = Utils.addChild(level1, "pagenum");
pagenum.setAttribute("id", "page-symbolslist");
if (StringUtils.isNotBlank(headerWithSymbolsReplaced)) {
Element h1 = Utils.addChild(level1, "h1");
h1.setTextContent(headerWithSymbolsReplaced);
}
Element list = Utils.addChild(level1, "list");
list.setAttribute("type", "pl");
getSymbolsListReplaces().stream()
.sorted((r1, r2) -> r1.getParent().getCharacter().compareTo(r2.getParent().getCharacter()))
.forEachOrdered(r -> {
String text = String.format("\u283F%s\u00A0 %s", r.getBraille(), r.getDescription());
Element li = Utils.addChild(list, "li");
li.setTextContent(text);
});
}
private static boolean startsWithBlank(String s) {
return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.left(s, 1));
}
private static boolean endsWithBlank(String s) {
return StringUtils.length(s) > 0 && StringUtils.isBlank(StringUtils.right(s, 1));
}
// a key in the symbolsMap must be entirely uppercase or lowercase
private static String entirelyUppercaseOrLowercase(String symbol) {
if (symbol == null) {
return null;
}
return symbol.toUpperCase().equals(symbol) ? symbol : symbol.toLowerCase();
}
// determine for an entirely uppercase or lowercase string, whether it is in uppercase
private static boolean isUpperCase(String entirelyUpperCaseOrLowercaseString) {
if (entirelyUpperCaseOrLowercaseString == null) {
return false;
}
return !entirelyUpperCaseOrLowercaseString.toLowerCase().equals(entirelyUpperCaseOrLowercaseString);
}
private enum ReplaceTarget {
book,
symbolsList
}
}