
cz.muni.fi.mir.mathmlcanonicalization.modules.ScriptNormalizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mathml-canonicalizer Show documentation
Show all versions of mathml-canonicalizer Show documentation
MathMLCanonicalizer is able to canonicalize MathML input. Modular
architecture allows to set up canonicalization features according to
users needs.
/**
* Copyright 2013 MIR@MU Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package cz.muni.fi.mir.mathmlcanonicalization.modules;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.filter.ElementFilter;
/**
* Handle sub/super/under/over/multi script elements in MathML.
*
*
* Normalize the occurence of <msub>, <msup>, <msubsup>,
* <munder>, <mover>, <munderover> and <mmultiscripts>
* (with children <mprescripts/> and <none/>) elements in MathML.
*
Input
* Well-formed MathML
* Output
* The original code with always used:
* - <msubsup> (or <msub>) for sums, integrals, etc. (converted
* from <munderover>, <munder> and <msub>, <msup>
* combinations)
* - <msub> inside <msup> in nested formulae
* - nested <msub> and <msup> instead of <msubsup> in
* identifiers (not for sums, integrals, etc.)
* - Unicode scripts converted to MathML scripts
* - (sub/super)scripts instead of <mmultiscript> where possible
* - maybe conversion all (under/over)scripts to (sub/super) scripts?
*
*
* @author Jaroslav Dufek
* @author David Formanek
*/
public class ScriptNormalizer extends AbstractModule implements DOMModule {
/**
* Path to the property file with module settings.
*/
private static final String PROPERTIES_FILENAME = "ScriptNormalizer.properties";
private static final Logger LOGGER = Logger.getLogger(ScriptNormalizer.class.getName());
// properties key names
private static final String SWAP_SCRIPTS = "swapscripts";
private static final String SPLIT_SCRIPTS_ELEMENTS = "splitscriptselements";
private static final String UNIFY_SCRIPTS = "unifyscripts";
public ScriptNormalizer() {
loadProperties(PROPERTIES_FILENAME);
}
@Override
public void execute(final Document doc) {
if (doc == null) {
throw new NullPointerException("doc");
}
final Element root = doc.getRootElement();
if (isEnabled(UNIFY_SCRIPTS)) {
final Map replaceMap = new HashMap();
replaceMap.put(UNDERSCRIPT, SUBSCRIPT);
replaceMap.put(OVERSCRIPT, SUPERSCRIPT);
replaceMap.put(UNDEROVER, SUBSUP);
replaceDescendants(root, replaceMap);
} else {
// TODO: normalize unconverted munder/mover/munderover
}
// TODO: convert multiscript where possible
if (isEnabled(SWAP_SCRIPTS)) {
normalizeSupInSub(root);
}
Collection chosenElements = getPropertySet(SPLIT_SCRIPTS_ELEMENTS);
if (chosenElements.isEmpty()) {
LOGGER.fine("Msubsup conversion is switched off");
} else {
normalizeMsubsup(root, chosenElements);
}
// TODO: convert sub/sup combination with not chosen elements to subsup
}
private void normalizeSupInSub(final Element element) {
assert element != null;
final List children = element.getChildren();
for (int i = 0; i < children.size(); i++) {
final Element actual = children.get(i);
normalizeSupInSub(actual);
if (!actual.getName().equals(SUBSCRIPT)) {
continue;
}
List subscriptChildren = actual.getChildren();
if (subscriptChildren.size() != 2) {
LOGGER.info("Invalid msub, skipped");
continue;
}
if (!subscriptChildren.get(0).getName().equals(SUPERSCRIPT)) {
continue;
}
final List superscriptChildren = subscriptChildren.get(0).getChildren();
if (superscriptChildren.size() != 2) {
LOGGER.info("Invalid msup, skipped");
continue;
}
final Element newMsub = new Element(SUBSCRIPT);
newMsub.addContent(superscriptChildren.get(0).detach());
newMsub.addContent(subscriptChildren.get(1).detach());
final Element newMsup = new Element(SUPERSCRIPT);
newMsup.addContent(newMsub);
newMsup.addContent(superscriptChildren.get(0).detach());
children.set(i, newMsup);
LOGGER.fine("Sub/sup scripts swapped");
}
}
private void normalizeMsubsup(final Element element, Collection firstChildren) {
assert element != null && firstChildren != null;
final List children = element.getChildren();
for (int i = 0; i < children.size(); i++) {
final Element actual = children.get(i);
if (actual.getName().equals(SUBSUP)) {
final List actualChildren = actual.getChildren();
if (actualChildren.size() != 3) {
LOGGER.info("Invalid msubsup, skipped");
continue;
}
if (!firstChildren.contains(actualChildren.get(0).getName())) {
continue;
}
final Element newMsub = new Element(SUBSCRIPT);
newMsub.addContent(actualChildren.get(0).detach());
newMsub.addContent(actualChildren.get(0).detach());
final Element newMsup = new Element(SUPERSCRIPT);
newMsup.addContent(newMsub);
newMsup.addContent(actualChildren.get(0).detach());
children.set(i, newMsup);
i--; // move back to check the children of the new transformation
LOGGER.fine("Msubsup converted to nested msub and msup");
} else {
normalizeMsubsup(actual, firstChildren);
}
}
}
private void replaceDescendants(final Element ancestor, final Map map) {
assert ancestor != null && map != null;
final List toReplace = new ArrayList();
for (Element element : ancestor.getDescendants(new ElementFilter())) {
if (map.containsKey(element.getName())) {
toReplace.add(element);
}
}
for (Element element : toReplace) {
replaceElement(element, map.get(element.getName()));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy