All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cz.muni.fi.mir.mathmlcanonicalization.modules.ScriptNormalizer Maven / Gradle / Ivy

Go to download

MathMLCanonicalizer is able to canonicalize MathML input. Modular architecture allows to set up canonicalization features according to users needs.

There is a newer version: 1.3.1
Show newest version
/**
 * Copyright 2013 MIR@MU Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package cz.muni.fi.mir.mathmlcanonicalization.modules;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.filter.ElementFilter;

/**
 * Handle sub/super/under/over/multi script elements in MathML.
 *
 * 

* Normalize the occurence of <msub>, <msup>, <msubsup>, * <munder>, <mover>, <munderover> and <mmultiscripts> * (with children <mprescripts/> and <none/>) elements in MathML. *

Input * Well-formed MathML *
Output
* The original code with always used:
    *
  • <msubsup> (or <msub>) for sums, integrals, etc. (converted * from <munderover>, <munder> and <msub>, <msup> * combinations)
  • *
  • <msub> inside <msup> in nested formulae
  • *
  • nested <msub> and <msup> instead of <msubsup> in * identifiers (not for sums, integrals, etc.)
  • *
  • Unicode scripts converted to MathML scripts
  • *
  • (sub/super)scripts instead of <mmultiscript> where possible
  • *
  • maybe conversion all (under/over)scripts to (sub/super) scripts?
  • *
* * @author Jaroslav Dufek * @author David Formanek */ public class ScriptNormalizer extends AbstractModule implements DOMModule { /** * Path to the property file with module settings. */ private static final String PROPERTIES_FILENAME = "ScriptNormalizer.properties"; private static final Logger LOGGER = Logger.getLogger(ScriptNormalizer.class.getName()); // properties key names private static final String SWAP_SCRIPTS = "swapscripts"; private static final String SPLIT_SCRIPTS_ELEMENTS = "splitscriptselements"; private static final String UNIFY_SCRIPTS = "unifyscripts"; public ScriptNormalizer() { loadProperties(PROPERTIES_FILENAME); } @Override public void execute(final Document doc) { if (doc == null) { throw new NullPointerException("doc"); } final Element root = doc.getRootElement(); if (isEnabled(UNIFY_SCRIPTS)) { final Map replaceMap = new HashMap(); replaceMap.put(UNDERSCRIPT, SUBSCRIPT); replaceMap.put(OVERSCRIPT, SUPERSCRIPT); replaceMap.put(UNDEROVER, SUBSUP); replaceDescendants(root, replaceMap); } else { // TODO: normalize unconverted munder/mover/munderover } // TODO: convert multiscript where possible if (isEnabled(SWAP_SCRIPTS)) { normalizeSupInSub(root); } Collection chosenElements = getPropertySet(SPLIT_SCRIPTS_ELEMENTS); if (chosenElements.isEmpty()) { LOGGER.fine("Msubsup conversion is switched off"); } else { normalizeMsubsup(root, chosenElements); } // TODO: convert sub/sup combination with not chosen elements to subsup } private void normalizeSupInSub(final Element element) { assert element != null; final List children = element.getChildren(); for (int i = 0; i < children.size(); i++) { final Element actual = children.get(i); normalizeSupInSub(actual); if (!actual.getName().equals(SUBSCRIPT)) { continue; } List subscriptChildren = actual.getChildren(); if (subscriptChildren.size() != 2) { LOGGER.info("Invalid msub, skipped"); continue; } if (!subscriptChildren.get(0).getName().equals(SUPERSCRIPT)) { continue; } final List superscriptChildren = subscriptChildren.get(0).getChildren(); if (superscriptChildren.size() != 2) { LOGGER.info("Invalid msup, skipped"); continue; } final Element newMsub = new Element(SUBSCRIPT); newMsub.addContent(superscriptChildren.get(0).detach()); newMsub.addContent(subscriptChildren.get(1).detach()); final Element newMsup = new Element(SUPERSCRIPT); newMsup.addContent(newMsub); newMsup.addContent(superscriptChildren.get(0).detach()); children.set(i, newMsup); LOGGER.fine("Sub/sup scripts swapped"); } } private void normalizeMsubsup(final Element element, Collection firstChildren) { assert element != null && firstChildren != null; final List children = element.getChildren(); for (int i = 0; i < children.size(); i++) { final Element actual = children.get(i); if (actual.getName().equals(SUBSUP)) { final List actualChildren = actual.getChildren(); if (actualChildren.size() != 3) { LOGGER.info("Invalid msubsup, skipped"); continue; } if (!firstChildren.contains(actualChildren.get(0).getName())) { continue; } final Element newMsub = new Element(SUBSCRIPT); newMsub.addContent(actualChildren.get(0).detach()); newMsub.addContent(actualChildren.get(0).detach()); final Element newMsup = new Element(SUPERSCRIPT); newMsup.addContent(newMsub); newMsup.addContent(actualChildren.get(0).detach()); children.set(i, newMsup); i--; // move back to check the children of the new transformation LOGGER.fine("Msubsup converted to nested msub and msup"); } else { normalizeMsubsup(actual, firstChildren); } } } private void replaceDescendants(final Element ancestor, final Map map) { assert ancestor != null && map != null; final List toReplace = new ArrayList(); for (Element element : ancestor.getDescendants(new ElementFilter())) { if (map.containsKey(element.getName())) { toReplace.add(element); } } for (Element element : toReplace) { replaceElement(element, map.get(element.getName())); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy