
cz.muni.fi.mir.mathmlcanonicalization.modules.MrowNormalizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mathml-canonicalizer Show documentation
Show all versions of mathml-canonicalizer Show documentation
MathMLCanonicalizer is able to canonicalize MathML input. Modular
architecture allows to set up canonicalization features according to
users needs.
/**
* Copyright 2013 MIR@MU Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package cz.muni.fi.mir.mathmlcanonicalization.modules;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Parent;
/**
* Normalize the number of mrow elements in MathML.
*
* Input
* Well-formed MathML, already processed by other modules (especially
* ElementMinimizer, MfencedReplacer and FunctionNormalizer)
* Output
* The original code with changes in mrow elements:
* - added mrow elements to places, where detected fenced formulae (and not
* already encapsulated in mrow)
* - removed redundant mrow elements in unneeded grouping - e.q. parents
* requiring only one child element accept any number of elements so the mrow
* tag is not needed (see example) or grouping with only presentation purpose
*
* Example
* <msqrt>
* <mrow>
* <mo>-</mo>
* <mn>1</mn>
* </mrow>
* </msqrt>
is transformed to
* <msqrt>
* <mo>-</mo>
* <mn>1</mn>
* </msqrt>
*
*
* @author Jakub Adler
*/
public class MrowNormalizer extends AbstractModule implements DOMModule {
/**
* Path to the property file with module settings.
*/
private static final String PROPERTIES_FILENAME = "MrowNormalizer.properties";
private static final Logger LOGGER = Logger.getLogger(MrowNormalizer.class.getName());
// properties
private static final String CHILD_COUNT_PREFIX = "childCount.";
private static final String OPENING = "open";
private static final String CLOSING = "close";
private static final String WRAP_ISIDE = "wrapInside";
private static final String WRAP_OUTSIDE = "wrapOutside";
public MrowNormalizer() {
loadProperties(PROPERTIES_FILENAME);
}
@Override
public void execute(final Document doc) {
if (doc == null) {
throw new NullPointerException("doc");
}
traverseChildrenElements(doc.getRootElement());
}
/**
* Recursively searches element content to possibly remove or add mrow where
* needed.
*
* @param element element to start at
*/
private void traverseChildrenElements(final Element element) {
assert element != null;
final List children = new ArrayList(element.getChildren());
for (Element child : children) {
traverseChildrenElements(child);
}
if (element.getName().equals(ROW)) {
checkRemoval(element);
} else {
checkAddition(element);
}
}
/**
* Removes a mrow element if possible.
*
* @param mrowElement the mrow element
*/
private void checkRemoval(final Element mrowElement) {
assert mrowElement != null && mrowElement.getName().equals(ROW);
final Parent parent = mrowElement.getParent();
if (!(parent instanceof Element)) {
return; // no parent element
}
final Element parentElement = (Element) parent;
final List children = mrowElement.getChildren();
if (children.size() <= 1) {
removeElement(mrowElement, parentElement);
LOGGER.log(Level.FINE, "Element {0} removed", mrowElement);
return;
}
final String childCountPropertyName = CHILD_COUNT_PREFIX + parentElement.getName();
if (!isProperty(childCountPropertyName)) {
return; // unknown parent element
}
final String childCountProperty = getProperty(childCountPropertyName);
final int childCount;
try {
childCount = Integer.parseInt(childCountProperty);
} catch (NumberFormatException e) {
LOGGER.log(Level.WARNING,
"{0} must be an integer, property ignored", childCountProperty);
return;
}
if (childCount == 1 || // parent can accept any number of elements so we can remove mrow
children.size() + parentElement.getChildren().size() - 1 == childCount) {
removeElement(mrowElement, parentElement);
}
}
private static void removeElement(final Element element, final Element parent) {
assert element != null && parent != null;
parent.addContent(parent.indexOf(element), element.cloneContent());
element.detach();
}
/**
* Test if element is an operator representing an opening or closing
* parenthesis according to properties
*
* @param element element to test
* @param propertyName name of property specifiyng opening or closing
* parentheses
* @return true if element is a parentheses according to propertyName
*/
private Boolean isParenthesis(final Element element, final String propertyName) {
assert element != null && propertyName != null && isProperty(propertyName);
if (!element.getName().equals(OPERATOR)) {
return false;
}
return getPropertySet(propertyName).contains(element.getTextNormalize());
}
/**
* Wrap previously detected fenced expressions in mrow to be same as output
* of MfencedReplacer
*
* @param siblings children of parent element
* @param fenced list of elements inside parentheses, children of parent
* element
* @param opening opening parenthesis, child of parent element
* @param closing closing parenthesis, child of parent element
*/
private void wrapFenced(final List siblings, final List fenced,
final Element opening, final Element closing) {
assert siblings != null && fenced != null && opening != null;
final Element parent = opening.getParentElement();
assert closing != null && closing.getParentElement().equals(parent);
for (Element e : fenced) {
e.detach();
}
final int openingIndex = parent.indexOf(opening);
// Element to be placed inside parentheses.
// If null, the original 'fenced' list will be used.
final Element innerElement;
if (fenced.isEmpty() || !isEnabled(WRAP_ISIDE)) {
innerElement = null; // will not wrap inside in mrow
} else if (fenced.size() == 1) {
innerElement = fenced.get(0); // no need to wrap, just one element
} else {
innerElement = new Element(ROW);
innerElement.addContent(fenced);
LOGGER.fine("Inner mrow added");
}
if (((parent.getName().equals(ROW)
&& siblings.get(0) == opening
&& siblings.get(siblings.size() - 1) == closing))
|| !isEnabled(WRAP_OUTSIDE)) {
// will not wrap outside in mrow
if (innerElement == null) {
parent.addContent(openingIndex + 1, fenced);
} else {
parent.addContent(openingIndex + 1, innerElement);
}
return;
}
// wrap outside in mrow
opening.detach();
closing.detach();
final Element outerMrowElement = new Element(ROW);
outerMrowElement.addContent(opening);
if (innerElement != null) {
outerMrowElement.addContent(innerElement);
} else {
outerMrowElement.addContent(fenced);
}
outerMrowElement.addContent(closing);
parent.addContent(openingIndex, outerMrowElement);
LOGGER.fine("Outer mrow added");
}
/**
* Add mrow if necessary
*/
private void checkAddition(final Element element) {
assert element != null;
final Parent parent = element.getParent();
if (!(parent instanceof Element)) {
return;
}
final Element parentElement = (Element) parent;
final List siblings = parentElement.getChildren();
if (isParenthesis(element, OPENING)) {
// Need to find matching closing par and register the elements between
int nesting = 0;
// list of elements inside parentheses
final List fenced = new ArrayList();
for (int i = siblings.indexOf(element) + 1; i < siblings.size(); i++) {
final Element current = siblings.get(i);
if (isParenthesis(current, OPENING)) {
nesting++; // opening parenthase reached
} else if (isParenthesis(current, CLOSING)) { // closing parenthase reached
if (nesting == 0) {
// matching closing parenthase
wrapFenced(siblings, fenced, element, current);
break;
} else {
nesting--;
}
}
fenced.add(current);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy