All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.mycore.common.xml.MCRXMLHelper Maven / Gradle / Ivy

There is a newer version: 2024.05
Show newest version
/*
 * This file is part of ***  M y C o R e  ***
 * See http://www.mycore.de/ for details.
 *
 * MyCoRe is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * MyCoRe is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with MyCoRe.  If not, see .
 */

package org.mycore.common.xml;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import javax.xml.XMLConstants;
import javax.xml.transform.TransformerException;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jdom2.Attribute;
import org.jdom2.Comment;
import org.jdom2.Content;
import org.jdom2.DocType;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.Parent;
import org.jdom2.ProcessingInstruction;
import org.jdom2.Text;
import org.jdom2.Verifier;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;
import org.jdom2.transform.JDOMSource;
import org.mycore.common.MCRConstants;
import org.mycore.common.MCRException;
import org.mycore.common.content.MCRByteContent;
import org.mycore.common.content.streams.MCRByteArrayOutputStream;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;

/**
 * This class provides some static utility methods to deal with XML/DOM
 * elements, nodes etc.
 *
 * @author Detlev Degenhardt
 * @author Frank Lützenkirchen
 * @author Thomas Scheffler (yagee)
 */
public class MCRXMLHelper {

    private static final Logger LOGGER = LogManager.getLogger(MCRXMLHelper.class);

    /**
     * Removes characters that are illegal in XML text nodes or attribute
     * values.
     *
     * @param text
     *            the String that should be used in XML elements or attributes
     * @return the String with all illegal characters removed
     */
    public static String removeIllegalChars(String text) {
        if (text == null || text.trim().length() == 0) {
            return text;
        }
        if (Verifier.checkCharacterData(text) == null) {
            return text;
        }

        // It seems we have to filter out invalid XML characters...
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < text.length(); i++) {
            if (Verifier.isXMLCharacter(text.charAt(i))) {
                sb.append(text.charAt(i));
            }
        }
        return sb.toString();
    }

    /**
     * validates doc using XML Schema defined schemaURI
     * @param doc document to be validated
     * @param schemaURI URI of XML Schema document
     * @throws SAXException if validation fails
     * @throws IOException if resolving resources fails
     */
    public static void validate(Document doc, String schemaURI) throws SAXException, IOException {
        SchemaFactory sf = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
        sf.setResourceResolver(MCREntityResolver.instance());
        Schema schema;
        try {
            schema = sf.newSchema(MCRURIResolver.instance().resolve(schemaURI, null));
        } catch (TransformerException e) {
            Throwable cause = e.getCause();
            if (cause == null) {
                throw new IOException(e);
            }
            if (cause instanceof SAXException) {
                throw (SAXException) cause;
            }
            if (cause instanceof IOException) {
                throw (IOException) cause;
            }
            throw new IOException(e);
        }
        Validator validator = schema.newValidator();
        validator.setResourceResolver(MCREntityResolver.instance());
        validator.validate(new JDOMSource(doc));
    }

    /**
     * @see JDOMtoGSONSerializer
     *
     * @param content the jdom element to serialize
     * @return a gson element
     */
    public static JsonElement jsonSerialize(Content content) {
        return JDOMtoGSONSerializer.serialize(content);
    }

    /**
     * @see JDOMtoGSONSerializer#serializeElement(Element)
     *
     * @param element the jdom element to serialize
     * @return a gson object
     */
    public static JsonObject jsonSerialize(Element element) {
        return JDOMtoGSONSerializer.serializeElement(element);
    }

    /**
     * checks whether two documents are equal.
     *
     * This test performs a deep check across all child components of a
     * Document.
     *
     * @param d1
     *            first Document to compare
     * @param d2
     *            second Document to compare
     * @return true, if d1 and d2 are deep equal
     * @see Document#equals(java.lang.Object)
     */
    public static boolean deepEqual(Document d1, Document d2) {
        try {
            return JDOMEquivalent.equivalent(canonicalElement(d1), canonicalElement(d2));
        } catch (Exception e) {
            LOGGER.warn("Could not compare documents.", e);
            return false;
        }
    }

    /**
     * checks whether two elements are equal.
     *
     * This test performs a deep check across all child components of a
     * element.
     *
     * @param e1
     *            first Element to compare
     * @param e2
     *            second Element to compare
     * @return true, if e1 and e2 are deep equal
     * @see Document#equals(java.lang.Object)
     */
    public static boolean deepEqual(Element e1, Element e2) {
        try {
            return JDOMEquivalent.equivalent(canonicalElement(e1), canonicalElement(e2));
        } catch (Exception e) {
            LOGGER.warn("Could not compare elements.", e);
            return false;
        }
    }

    private static Element canonicalElement(Parent e) throws IOException, SAXParseException {
        XMLOutputter xout = new XMLOutputter(Format.getCompactFormat());
        MCRByteArrayOutputStream bout = new MCRByteArrayOutputStream();
        if (e instanceof Element) {
            xout.output((Element) e, bout);
        } else {
            xout.output((Document) e, bout);
        }
        Document xml = MCRXMLParserFactory.getNonValidatingParser()
            .parseXML(new MCRByteContent(bout.getBuffer(), 0, bout.size()));
        return xml.getRootElement();
    }

    private static class JDOMEquivalent {

        private JDOMEquivalent() {
        }

        public static boolean equivalent(Element e1, Element e2) {
            return equivalentName(e1, e2) && equivalentAttributes(e1, e2)
                && equivalentContent(e1.getContent(), e2.getContent());
        }

        public static boolean equivalent(Text t1, Text t2) {
            String v1 = t1.getValue();
            String v2 = t2.getValue();
            boolean equals = v1.equals(v2);
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("Text differs \"{}\"!=\"{}\"", t1, t2);
            }
            return equals;
        }

        public static boolean equivalent(DocType d1, DocType d2) {
            boolean equals = d1.getPublicID().equals(d2.getPublicID()) && d1.getSystemID().equals(d2.getSystemID());
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("DocType differs \"{}\"!=\"{}\"", d1, d2);
            }
            return equals;
        }

        public static boolean equivalent(Comment c1, Comment c2) {
            String v1 = c1.getValue();
            String v2 = c2.getValue();
            boolean equals = v1.equals(v2);
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("Comment differs \"{}\"!=\"{}\"", c1, c2);
            }
            return equals;
        }

        public static boolean equivalent(ProcessingInstruction p1, ProcessingInstruction p2) {
            String t1 = p1.getTarget();
            String t2 = p2.getTarget();
            String d1 = p1.getData();
            String d2 = p2.getData();
            boolean equals = t1.equals(t2) && d1.equals(d2);
            if (!equals && LOGGER.isDebugEnabled()) {
                LOGGER.debug("ProcessingInstruction differs \"{}\"!=\"{}\"", p1, p2);
            }
            return equals;
        }

        public static boolean equivalentAttributes(Element e1, Element e2) {
            List aList1 = e1.getAttributes();
            List aList2 = e2.getAttributes();
            if (aList1.size() != aList2.size()) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Number of attributes differ \"{}\"!=\"{}\" for element {}", aList1, aList2,
                        e1.getName());
                }
                return false;
            }
            HashSet orig = new HashSet<>(aList1.size());
            for (Attribute attr : aList1) {
                orig.add(attr.toString());
            }
            for (Attribute attr : aList2) {
                orig.remove(attr.toString());
            }
            if (!orig.isEmpty() && LOGGER.isDebugEnabled()) {
                LOGGER.debug("Attributes differ \"{}\"!=\"{}\"", aList1, aList2);
            }
            return orig.isEmpty();
        }

        public static boolean equivalentContent(List l1, List l2) {
            if (l1.size() != l2.size()) {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("Number of content list elements differ {}!={}", l1.size(), l2.size());
                }
                return false;
            }
            boolean result = true;
            Iterator i1 = l1.iterator();
            Iterator i2 = l2.iterator();
            while (result && i1.hasNext() && i2.hasNext()) {
                Object o1 = i1.next();
                Object o2 = i2.next();
                if (o1 instanceof Element && o2 instanceof Element) {
                    result = equivalent((Element) o1, (Element) o2);
                } else if (o1 instanceof Text && o2 instanceof Text) {
                    result = equivalent((Text) o1, (Text) o2);
                } else if (o1 instanceof Comment && o2 instanceof Comment) {
                    result = equivalent((Comment) o1, (Comment) o2);
                } else if (o1 instanceof ProcessingInstruction && o2 instanceof ProcessingInstruction) {
                    result = equivalent((ProcessingInstruction) o1, (ProcessingInstruction) o2);
                } else if (o1 instanceof DocType && o2 instanceof DocType) {
                    result = equivalent((DocType) o1, (DocType) o2);
                } else {
                    result = false;
                }
            }
            return result;
        }

        public static boolean equivalentName(Element e1, Element e2) {
            Namespace ns1 = e1.getNamespace();
            String localName1 = e1.getName();

            Namespace ns2 = e2.getNamespace();
            String localName2 = e2.getName();

            return ns1.equals(ns2) && localName1.equals(localName2);
        }
    }

    /**
     * Helper class to serialize jdom XML to gson JSON.
     * 

* To support fast javascript dot property access its decided to use the underscore (_) * for attributes and the dollar sign ($) for text nodes. The colon sign (:) is used for * namespaces (you have to use square brackets in javascript for accessing those). *

* *
    *
  • _version -> version attribute
  • *
  • $text -> text node
  • *
  • _xmlns:mods -> mods namespace
  • *
  • _mods:title -> title attribute with mods namespace
  • *
* * Example *
     * {
     *   "_version": "3.0",
     *   "_xmlns:mods": "http://www.loc.gov/mods/v3"
     *   "mods:titleInfo": {
     *     "mods:title": {
     *       "$text": "hello xml serializer"
     *     }
     *   }
     * }
     * 
*
    *
  • get the version -> mods._version -> "3.0"
  • *
  • get the text of the title -> mods["mods:titleInfo"]["mods:title"].$text * -> "hello xml serializer"
  • *
* BE AWARE THAT MIXED CONTENT IS NOT SUPPORTED! * * @author Matthias Eichner */ private static class JDOMtoGSONSerializer { /** * This method is capable of serializing Elements and Text nodes. * Return null otherwise. * * @param content the content to serialize * @return the serialized content, or null if the type is not supported */ public static JsonElement serialize(Content content) { if (content instanceof Element) { return serializeElement((Element) content); } if (content instanceof Text) { return serializeText((Text) content); } return null; } static JsonPrimitive serializeText(Text text) { return new JsonPrimitive(text.getText()); } static JsonObject serializeElement(Element element) { JsonObject json = new JsonObject(); // text String text = element.getText(); if (text != null && text.trim().length() > 0) { json.addProperty("$text", text); } // attributes element.getAttributes().forEach(attr -> json.addProperty(getName(attr), attr.getValue())); // namespaces element.getAdditionalNamespaces().forEach(ns -> json.addProperty(getName(ns), ns.getURI())); // children // - build child map of pair with their respective elements Map, List> childContentMap = new HashMap<>(); for (Element child : element.getChildren()) { Pair key = new Pair<>(child.getName(), child.getNamespace()); List contentList = childContentMap.computeIfAbsent(key, k -> new ArrayList<>()); contentList.add(child); } // - run through the map and serialize for (Map.Entry, List> entry : childContentMap.entrySet()) { Pair key = entry.getKey(); List contentList = entry.getValue(); String name = getName(key.x, key.y); if (entry.getValue().size() == 1) { json.add(name, serializeElement(contentList.get(0))); } else if (contentList.size() >= 2) { JsonArray arr = new JsonArray(); contentList.forEach(child -> arr.add(serialize(child))); json.add(name, arr); } else { throw new MCRException( "Unexcpected error while parsing children of element '" + element.getName() + "'"); } } return json; } private static String getName(Namespace ns) { return "_xmlns:" + getCononicalizedPrefix(ns); } private static String getName(Attribute attribute) { return "_" + getName(attribute.getName(), attribute.getNamespace()); } private static String getName(String name, Namespace namespace) { if (namespace != null && !namespace.getURI().equals("")) { return getCononicalizedPrefix(namespace) + ":" + name; } return name; } private static String getCononicalizedPrefix(Namespace namespace) { return MCRConstants .getStandardNamespaces() .parallelStream() .filter(namespace::equals) .findAny() .map(Namespace::getPrefix) .orElse(namespace.getPrefix()); } private static class Pair { public final X x; public final Y y; Pair(X x, Y y) { this.x = x; this.y = y; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } Pair pair = (Pair) o; return Objects.equals(x, pair.x) && Objects.equals(y, pair.y); } @Override public int hashCode() { return Objects.hash(x, y); } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy