All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kiwiproject.xml.KiwiXml Maven / Gradle / Ivy

package org.kiwiproject.xml;

import static java.util.Objects.isNull;
import static org.apache.commons.lang3.StringUtils.abbreviate;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgument;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotNull;
import static org.kiwiproject.base.KiwiStrings.blankToNull;
import static org.kiwiproject.collect.KiwiMaps.newHashMap;
import static org.kiwiproject.logging.LazyLogParameterSupplier.lazy;

import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.kiwiproject.io.KiwiIO;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.PropertyException;
import javax.xml.bind.annotation.XmlType;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Stream;

/**
 * Static utilities for converting to/from XML using JAXB and a few other XML-related utilities.
 *
 * @implNote the appropriate JAXB dependencies must be available at runtime
 */
@UtilityClass
@Slf4j
public class KiwiXml {

    /**
     * Constant that can be used to disable all namespace validations when converting from XML to objects.
     *
     * @see #toObjectAssertingValid(String, Class, NamespaceValidation, List)
     */
    public static final String DISABLE_NAMESPACE_VALIDATION = "*";

    private static final String NAME_KEY = "name";
    private static final String NAMESPACE_KEY = "namespace";
    private static final String PREFIX_MATCH = "([\\w\\d]+:)?";
    private static final List DEFAULT_IGNORED_NAMESPACES = List.of("##default");

    /**
     * Static map that stores mappings from class to {@link JAXBContext}, used to improve performance
     * by re-using existing context objects.
     */
    private static final ConcurrentMap, JAXBContext> jaxbContextMap = new ConcurrentHashMap<>();

    /**
     * Whether validation should be performed or not.
     */
    public enum NamespaceValidation {
        YES,
        NO
    }

    /**
     * Find the classes for which there is a cached {@link JAXBContext}. Each time an instance of a class is
     * converted to XML, the {@link JAXBContext} is retrieved from an internal cache. If there isn't one, then
     * one is created and stored in the cache.
     *
     * @return the set of classes for which {@link KiwiXml} has a cached {@link JAXBContext}. The returned set
     * is an unmodifiable copy of the actual cached classes
     * @implNote The internal cache is a static {@link ConcurrentMap}.
     */
    public static Set> getCachedJAXBContextClasses() {
        return Set.copyOf(jaxbContextMap.keySet());
    }

    /**
     * Clear the internal cache of class to {@link JAXBContext} mappings.
     */
    public static void clearCachedJAXBContextClasses() {
        jaxbContextMap.clear();
    }

    /**
     * Convert the given object to an XML representation.
     *
     * @param object the object to convert to XML
     * @return the XML representation of the object
     */
    public static String toXml(Object object) {
        checkArgumentNotNull(object);
        return toXml(object, object.getClass());
    }

    /**
     * Convert the given object to an XML representation.
     *
     * @param object the object to convert to XML
     * @param clazz  the type of class being converted
     * @return the XML representation of the object
     */
    public static String toXml(Object object, Class clazz) {
        return toXml(object, clazz, Map.of());
    }

    /**
     * Convert the given object to an XML representation.
     *
     * @param object               the object to convert to XML
     * @param clazz                the type of class being converted
     * @param marshallerProperties the properties to be set on the {@link Marshaller} during the conversion process
     * @return the XML representation of the object
     * @see Marshaller#setProperty(String, Object)
     */
    public static String toXml(Object object, Class clazz, Map marshallerProperties) {
        checkArgumentNotNull(object, "object cannot be null");
        checkArgumentNotNull(clazz, "clazz cannot be null");

        try {
            var writer = new StringWriter();
            var context = getJaxbContext(clazz);
            var introspector = context.createJAXBIntrospector();
            var marshaller = createMarshaller(context, marshallerProperties);

            if (isNull(introspector.getElementName(object))) {
                JAXBElement jaxbElement = createJaxbWrappedObject(object);
                marshaller.marshal(jaxbElement, writer);
            } else {
                marshaller.marshal(object, writer);
            }
            return writer.toString();
        } catch (XmlRuntimeException xre) {
            LOG.error("Error converting object to XML", xre);
            throw xre;
        } catch (Exception e) {
            LOG.error("Unknown error converting object to XML", e);
            throw new XmlRuntimeException("Unable to convert to XML", e);
        }
    }

    private static Marshaller createMarshaller(JAXBContext context, Map marshallerProperties)
            throws JAXBException {

        var marshaller = context.createMarshaller();
        Map properties = isNull(marshallerProperties) ?
                new HashMap<>() : new HashMap<>(marshallerProperties);

        properties.putIfAbsent(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        properties.forEach((k, v) -> setMarshallerProperty(marshaller, k, v));

        return marshaller;
    }

    private static void setMarshallerProperty(Marshaller marshaller, String key, Object value) {
        try {
            marshaller.setProperty(key, value);
        } catch (PropertyException e) {
            LOG.error("Encountered exception setting property: {}, with value: {}", key, value);
            throw new XmlRuntimeException("Unable to configure marshaller properties", e);
        }
    }

    private static JAXBElement createJaxbWrappedObject(Object objectToTranslate) {
        var nameAndNamespace = getNameAndNamespace(objectToTranslate.getClass());
        var qName = new QName(nameAndNamespace.get(NAMESPACE_KEY), nameAndNamespace.get(NAME_KEY));

        return new JAXBElement<>(qName, Object.class, objectToTranslate);
    }

    /**
     * Convert the given XML into an object of the specified type.
     *
     * @param xml   the XML to convert
     * @param clazz type of object to convert into
     * @param    the target type
     * @return a new instance of the specified type
     */
    public static  T toObject(String xml, Class clazz) {
        return toObjectAssertingValid(xml, clazz, NamespaceValidation.YES);
    }

    /**
     * Convert the given XML into an object of the specified type, ignoring any XML namespace information and not
     * performing any namespace validation.
     *
     * @param xml   the XML to convert
     * @param clazz type of object to convert into
     * @param    the target type
     * @return a new instance of the specified type
     */
    public static  T toObjectIgnoringNamespace(String xml, Class clazz) {
        return toObjectAssertingValid(xml, clazz, NamespaceValidation.NO, List.of(DISABLE_NAMESPACE_VALIDATION));
    }

    /**
     * Convert the given XML into an object of the specified type, optionally performing namespace validation.
     *
     * @param xml                 the XML to convert
     * @param clazz               type of object to convert into
     * @param namespaceValidation should namespace validation be performed or not
     * @param                  the target type
     * @return a new instance of the specified type
     */
    public static  T toObjectAssertingValid(String xml,
                                               Class clazz,
                                               NamespaceValidation namespaceValidation) {
        return toObjectAssertingValid(xml, clazz, namespaceValidation, DEFAULT_IGNORED_NAMESPACES);
    }

    /**
     * Convert the given XML into an object of the specified type, performing basic validation and ignoring
     * the specified namespaces.
     *
     * @param xml                 the XML to convert
     * @param clazz               type of object to convert into
     * @param namespaceValidation should namespace validation be performed or not
     * @param ignoredNamespaces   list of namespaces to ignore
     * @param                  the target type
     * @return a new instance of the specified type
     */
    public static  T toObjectAssertingValid(String xml,
                                               Class clazz,
                                               NamespaceValidation namespaceValidation,
                                               List ignoredNamespaces) {

        checkArgumentNotBlank(xml, "xml cannot be blank");
        checkArgumentNotNull(clazz, "clazz cannot be null");
        try {
            if (namespaceValidation == NamespaceValidation.YES) {
                checkArgument(validateXmlMatchesType(xml, clazz),
                        IllegalArgumentException.class, "XML namespace does not match expected type");
            }
            return tryWithFactory(xml, clazz, ignoredNamespaces);
        } catch (JAXBException e) {
            throw newXmlRuntimeException(e.getLinkedException(), xml);
        } catch (XMLStreamException e) {
            throw newXmlRuntimeException(e.getNestedException(), xml);
        } catch (Exception e) {
            throw newXmlRuntimeException(e, xml);
        }
    }

    /**
     * Validate that the given XML has a namespace that matches the given class, which is generally assumed to
     * be annotated with {@link XmlType}.
     *
     * @param xml   the input XML to compare
     * @param clazz the {@link Class} to compare; assumed to be annotated with {@link XmlType}
     * @param    the type of the target class
     * @return true if the XML namespace (e.g. xmlns) matches the namespace of the {@link XmlType} annotation
     * on the given class
     * @throws XmlRuntimeException if something bad and unexpected happens. The thrown exception wraps a
     *                             {@link XMLStreamException} or other cause.
     */
    public static  boolean validateXmlMatchesType(String xml, Class clazz) {
        return validateXmlMatchesType(xml, clazz, DEFAULT_IGNORED_NAMESPACES);
    }

    /**
     * Validate that the given XML has a namespace that matches the given class, which is generally assumed to
     * be annotated with {@link XmlType}, but ignoring the given list of namespaces.
     *
     * @param xml               the input XML to compare
     * @param clazz             the {@link Class} to compare; assumed to be annotated with {@link XmlType}
     * @param ignoredNamespaces the namespaces to ignore
     * @param                the type of the target class
     * @return true if the XML namespace (e.g. xmlns) matches the namespace of the {@link XmlType} annotation
     * on the given class
     * @throws XmlRuntimeException if something bad and unexpected happens. The thrown exception wraps a
     *                             {@link XMLStreamException} or other cause.
     */
    public static  boolean validateXmlMatchesType(String xml, Class clazz, List ignoredNamespaces) {
        checkArgumentNotBlank(xml);
        checkArgumentNotNull(clazz);
        checkArgumentNotNull(ignoredNamespaces);

        String xmlns;
        String classNamespace;
        try {
            var xmlNamespaceURI = blankToNull(getRootQualifiedName(xml).getNamespaceURI());
            xmlns = filterIgnoredNamespaces(ignoredNamespaces, xmlNamespaceURI);

            var clazzNamespace = blankToNull(getNameAndNamespace(clazz).get(NAMESPACE_KEY));
            classNamespace = filterIgnoredNamespaces(ignoredNamespaces, clazzNamespace);

            if (StringUtils.equals(xmlns, classNamespace)) {
                LOG.trace("Return true for xmlns: {} and classNamespace: {}", xmlns, classNamespace);
                return true;
            }

            if (isBlank(xmlns) || isBlank(classNamespace)) {
                LOG.info("Skipping validation of namespace for class: {}, with namespace: '{}', and XML with namespace: '{}'." +
                                " One or both are blank or ignored.",
                        clazz.getName(), classNamespace, xmlns);
                return true;
            }
        } catch (Exception e) {
            throw new XmlRuntimeException(e);
        }

        LOG.warn("XML root element with namespace: '{}' does not match expected namespace: '{}', of class: '{}'",
                xmlns, classNamespace, clazz.getName());
        return false;
    }

    private static QName getRootQualifiedName(String xml) throws XMLStreamException {
        XMLStreamReader xmlStreamReader = null;  // not AutoCloseable; cannot use try-with-resources

        try (var stringReader = new StringReader(xml)) {
            var xmlInputFactory = newSecureXMLInputFactory();
            xmlStreamReader = xmlInputFactory.createXMLStreamReader(stringReader);

            if (xmlStreamReader.hasNext()) {
                var event = xmlStreamReader.next();
                while (event != XMLStreamConstants.START_ELEMENT && xmlStreamReader.hasNext()) {
                    event = xmlStreamReader.next();
                }

                if (xmlStreamReader.isStartElement()) {
                    return xmlStreamReader.getName();
                }
            }

            throw new XmlRuntimeException("Unable to determine root element namespace");
        } finally {
            KiwiIO.closeQuietly(xmlStreamReader);
        }
    }

    private static String filterIgnoredNamespaces(List ignoredNamespaces, String namespace) {
        var shouldIgnore = isNull(namespace) || ignoredNamespaces.contains(namespace);
        if (shouldIgnore) {
            LOG.debug("Ignoring namespace: '{}', treating as null", namespace);
        }
        return shouldIgnore ? null : namespace;
    }

    private static  T tryWithFactory(String xml, Class clazz, List ignoredNamespaces)
            throws JAXBException, XMLStreamException {

        XMLStreamReader xmlStreamReader = null;  // not AutoCloseable; cannot use try-with-resources

        try (var stringReader = new StringReader(xml)) {
            var jaxbContext = getJaxbContext(clazz);
            var unmarshaller = jaxbContext.createUnmarshaller();
            var xmlInputFactory = newSecureXMLInputFactory();
            xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, isNamespaceAware(ignoredNamespaces));
            xmlStreamReader = xmlInputFactory.createXMLStreamReader(stringReader);
            JAXBElement rootElement = unmarshaller.unmarshal(xmlStreamReader, clazz);
            return rootElement.getValue();
        } finally {
            KiwiIO.closeQuietly(xmlStreamReader);
        }
    }

    private static synchronized JAXBContext getJaxbContext(Class clazz) {
        return jaxbContextMap.computeIfAbsent(clazz, KiwiXml::newJaxbContext);
    }

    private static JAXBContext newJaxbContext(Class clazz) {
        try {
            return JAXBContext.newInstance(clazz);
        } catch (JAXBException e) {
            throw new UncheckedJAXBException("Error creating JAXBContext for " + clazz, e);
        }
    }

    /**
     * Per Sonar rule java:S275 (XML parsers should not be vulnerable to XXE attacks), create a new
     * {@link XMLInputFactory} with external entity processing disabled.
     */
    private static XMLInputFactory newSecureXMLInputFactory() {
        var factory = XMLInputFactory.newInstance();
        factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
        return factory;
    }

    private static boolean isNamespaceAware(List ignoredNamespaces) {
        return !ignoredNamespaces.contains(DISABLE_NAMESPACE_VALIDATION);
    }

    private static XmlRuntimeException newXmlRuntimeException(Throwable cause, String xml) {
        LOG.trace("Encountered error trying to unmarshal XML: {}", lazy(() -> abbreviate(xml, 256)));

        return new XmlRuntimeException("Unable to unmarshal XML", cause);
    }

    /**
     * Extract XML name and namespace from the given class, assuming it is annotated with {@link XmlType}.
     * 

* If the given class is not annotated with {@link XmlType}, we return the namespace as {@code null} * and the name as the "simple name" of the class from {@link Class#getSimpleName()}. This allows some degree * of flexibility when working with non-annotated classes, though this should be the exception not the normal * situation. * * @param objectClass the class to get name and namespace from * @return a map containing entries for name and namespace * @see XmlType#name() * @see XmlType#namespace() */ public static Map getNameAndNamespace(Class objectClass) { checkArgumentNotNull(objectClass); return Optional.ofNullable(objectClass.getAnnotation(XmlType.class)) .map(KiwiXml::getNameAndNamespace) .orElseGet(() -> newHashMap( NAMESPACE_KEY, null, NAME_KEY, objectClass.getSimpleName() )); } private static Map getNameAndNamespace(XmlType xmlTypeAnnotation) { return newHashMap( NAMESPACE_KEY, xmlTypeAnnotation.namespace(), NAME_KEY, xmlTypeAnnotation.name() ); } /** * Removes tags from the given XML but ignoring namespaces. * * @param xml the XML containing tags to be removed * @param tagsToRemove names of the tags to remove * @return XML with the given tags removed */ public static String stripTags(String xml, String... tagsToRemove) { var namespacedTagsToRemove = Stream.of(tagsToRemove) .map(tag -> PREFIX_MATCH + tag) .toArray(String[]::new); return stripTagsConsideringNamespace(xml, namespacedTagsToRemove); } /** * Removes tags from the given XML taking into account the full tag name (i.e. possibly including namespace). * * @param xml the XML containing tags to be removed * @param tagsToRemove names of the tags to remove * @return XML with the given tags removed */ public static String stripTagsConsideringNamespace(String xml, String... tagsToRemove) { return Arrays.stream(tagsToRemove) .reduce(xml, (accumulatedXml, tagToRemove) -> { var tagRegex = "<" + tagToRemove + ">[\\s\\S\\w\\W]*"; return accumulatedXml.replaceAll(tagRegex, ""); }); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy