All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kiwiproject.xml.KiwiXml Maven / Gradle / Ivy

Go to download

Kiwi is a utility library. We really like Google's Guava, and also use Apache Commons. But if they don't have something we need, and we think it is useful, this is where we put it.

There is a newer version: 4.5.2
Show newest version
package org.kiwiproject.xml;

import static java.util.Objects.isNull;
import static org.apache.commons.lang3.StringUtils.abbreviate;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgument;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotNull;
import static org.kiwiproject.base.KiwiStrings.blankToNull;
import static org.kiwiproject.collect.KiwiMaps.newHashMap;
import static org.kiwiproject.logging.LazyLogParameterSupplier.lazy;

import lombok.experimental.UtilityClass;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.kiwiproject.io.KiwiIO;

import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.PropertyException;
import javax.xml.bind.annotation.XmlType;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Stream;

/**
 * Static utilities for converting to/from XML using JAXB and a few other XML-related utilities.
 *
 * @implNote the appropriate JAXB dependencies must be available at runtime
 */
@UtilityClass
@Slf4j
public class KiwiXml {

    /**
     * Constant that can be used to disable all namespace validations when converting from XML to objects.
     *
     * @see #toObjectAssertingValid(String, Class, NamespaceValidation, List)
     */
    public static final String DISABLE_NAMESPACE_VALIDATION = "*";

    private static final String NAME_KEY = "name";
    private static final String NAMESPACE_KEY = "namespace";
    private static final String PREFIX_MATCH = "([\\w\\d]+:)?";
    private static final List DEFAULT_IGNORED_NAMESPACES = List.of("##default");

    /**
     * Static map that stores mappings from class to {@link JAXBContext}, used to improve performance
     * by re-using existing context objects.
     */
    private static final ConcurrentMap, JAXBContext> jaxbContextMap = new ConcurrentHashMap<>();

    /**
     * Whether validation should be performed or not.
     */
    public enum NamespaceValidation {
        YES,
        NO
    }

    /**
     * Find the classes for which there is a cached {@link JAXBContext}. Each time an instance of a class is
     * converted to XML, the {@link JAXBContext} is retrieved from an internal cache. If there isn't one, then
     * one is created and stored in the cache.
     *
     * @return the set of classes for which {@link KiwiXml} has a cached {@link JAXBContext}. The returned set
     * is an unmodifiable copy of the actual cached classes
     * @implNote The internal cache is a static {@link ConcurrentMap}.
     */
    public static Set> getCachedJAXBContextClasses() {
        return Set.copyOf(jaxbContextMap.keySet());
    }

    /**
     * Clear the internal cache of class to {@link JAXBContext} mappings.
     */
    public static void clearCachedJAXBContextClasses() {
        jaxbContextMap.clear();
    }

    /**
     * Convert the given object to an XML representation.
     *
     * @param object the object to convert to XML
     * @return the XML representation of the object
     */
    public static String toXml(Object object) {
        checkArgumentNotNull(object);
        return toXml(object, object.getClass());
    }

    /**
     * Convert the given object to an XML representation.
     *
     * @param object the object to convert to XML
     * @param clazz  the type of class being converted
     * @return the XML representation of the object
     */
    public static String toXml(Object object, Class clazz) {
        return toXml(object, clazz, Map.of());
    }

    /**
     * Convert the given object to an XML representation.
     *
     * @param object               the object to convert to XML
     * @param clazz                the type of class being converted
     * @param marshallerProperties the properties to be set on the {@link Marshaller} during the conversion process
     * @return the XML representation of the object
     * @see Marshaller#setProperty(String, Object)
     */
    public static String toXml(Object object, Class clazz, Map marshallerProperties) {
        checkArgumentNotNull(object, "object cannot be null");
        checkArgumentNotNull(clazz, "clazz cannot be null");

        try {
            var writer = new StringWriter();
            var context = getJaxbContext(clazz);
            var introspector = context.createJAXBIntrospector();
            var marshaller = createMarshaller(context, marshallerProperties);

            if (isNull(introspector.getElementName(object))) {
                JAXBElement jaxbElement = createJaxbWrappedObject(object);
                marshaller.marshal(jaxbElement, writer);
            } else {
                marshaller.marshal(object, writer);
            }
            return writer.toString();
        } catch (XmlRuntimeException xre) {
            LOG.error("Error converting object to XML", xre);
            throw xre;
        } catch (Exception e) {
            LOG.error("Unknown error converting object to XML", e);
            throw new XmlRuntimeException("Unable to convert to XML", e);
        }
    }

    private static Marshaller createMarshaller(JAXBContext context, Map marshallerProperties)
            throws JAXBException {

        var marshaller = context.createMarshaller();
        Map properties = isNull(marshallerProperties) ?
                new HashMap<>() : new HashMap<>(marshallerProperties);

        properties.putIfAbsent(Marshaller.JAXB_FORMATTED_OUTPUT, true);
        properties.forEach((k, v) -> setMarshallerProperty(marshaller, k, v));

        return marshaller;
    }

    private static void setMarshallerProperty(Marshaller marshaller, String key, Object value) {
        try {
            marshaller.setProperty(key, value);
        } catch (PropertyException e) {
            LOG.error("Encountered exception setting property: {}, with value: {}", key, value);
            throw new XmlRuntimeException("Unable to configure marshaller properties", e);
        }
    }

    private static JAXBElement createJaxbWrappedObject(Object objectToTranslate) {
        var nameAndNamespace = getNameAndNamespace(objectToTranslate.getClass());
        var qName = new QName(nameAndNamespace.get(NAMESPACE_KEY), nameAndNamespace.get(NAME_KEY));

        return new JAXBElement<>(qName, Object.class, objectToTranslate);
    }

    /**
     * Convert the given XML into an object of the specified type.
     *
     * @param xml   the XML to convert
     * @param clazz type of object to convert into
     * @param    the target type
     * @return a new instance of the specified type
     */
    public static  T toObject(String xml, Class clazz) {
        return toObjectAssertingValid(xml, clazz, NamespaceValidation.YES);
    }

    /**
     * Convert the given XML into an object of the specified type, ignoring any XML namespace information and not
     * performing any namespace validation.
     *
     * @param xml   the XML to convert
     * @param clazz type of object to convert into
     * @param    the target type
     * @return a new instance of the specified type
     */
    public static  T toObjectIgnoringNamespace(String xml, Class clazz) {
        return toObjectAssertingValid(xml, clazz, NamespaceValidation.NO, List.of(DISABLE_NAMESPACE_VALIDATION));
    }

    /**
     * Convert the given XML into an object of the specified type, optionally performing namespace validation.
     *
     * @param xml                 the XML to convert
     * @param clazz               type of object to convert into
     * @param namespaceValidation should namespace validation be performed or not
     * @param                  the target type
     * @return a new instance of the specified type
     */
    public static  T toObjectAssertingValid(String xml,
                                               Class clazz,
                                               NamespaceValidation namespaceValidation) {
        return toObjectAssertingValid(xml, clazz, namespaceValidation, DEFAULT_IGNORED_NAMESPACES);
    }

    /**
     * Convert the given XML into an object of the specified type, performing basic validation and ignoring
     * the specified namespaces.
     *
     * @param xml                 the XML to convert
     * @param clazz               type of object to convert into
     * @param namespaceValidation should namespace validation be performed or not
     * @param ignoredNamespaces   list of namespaces to ignore
     * @param                  the target type
     * @return a new instance of the specified type
     */
    public static  T toObjectAssertingValid(String xml,
                                               Class clazz,
                                               NamespaceValidation namespaceValidation,
                                               List ignoredNamespaces) {

        checkArgumentNotBlank(xml, "xml cannot be blank");
        checkArgumentNotNull(clazz, "clazz cannot be null");
        try {
            if (namespaceValidation == NamespaceValidation.YES) {
                checkArgument(validateXmlMatchesType(xml, clazz),
                        IllegalArgumentException.class, "XML namespace does not match expected type");
            }
            return tryWithFactory(xml, clazz, ignoredNamespaces);
        } catch (JAXBException e) {
            throw newXmlRuntimeException(e.getLinkedException(), xml);
        } catch (XMLStreamException e) {
            throw newXmlRuntimeException(e.getNestedException(), xml);
        } catch (Exception e) {
            throw newXmlRuntimeException(e, xml);
        }
    }

    /**
     * Validate that the given XML has a namespace that matches the given class, which is generally assumed to
     * be annotated with {@link XmlType}.
     *
     * @param xml   the input XML to compare
     * @param clazz the {@link Class} to compare; assumed to be annotated with {@link XmlType}
     * @param    the type of the target class
     * @return true if the XML namespace (e.g. xmlns) matches the namespace of the {@link XmlType} annotation
     * on the given class
     * @throws XmlRuntimeException if something bad and unexpected happens. The thrown exception wraps a
     *                             {@link XMLStreamException} or other cause.
     */
    public static  boolean validateXmlMatchesType(String xml, Class clazz) {
        return validateXmlMatchesType(xml, clazz, DEFAULT_IGNORED_NAMESPACES);
    }

    /**
     * Validate that the given XML has a namespace that matches the given class, which is generally assumed to
     * be annotated with {@link XmlType}, but ignoring the given list of namespaces.
     *
     * @param xml               the input XML to compare
     * @param clazz             the {@link Class} to compare; assumed to be annotated with {@link XmlType}
     * @param ignoredNamespaces the namespaces to ignore
     * @param                the type of the target class
     * @return true if the XML namespace (e.g. xmlns) matches the namespace of the {@link XmlType} annotation
     * on the given class
     * @throws XmlRuntimeException if something bad and unexpected happens. The thrown exception wraps a
     *                             {@link XMLStreamException} or other cause.
     */
    public static  boolean validateXmlMatchesType(String xml, Class clazz, List ignoredNamespaces) {
        checkArgumentNotBlank(xml);
        checkArgumentNotNull(clazz);
        checkArgumentNotNull(ignoredNamespaces);

        String xmlns;
        String classNamespace;
        try {
            var xmlNamespaceURI = blankToNull(getRootQualifiedName(xml).getNamespaceURI());
            xmlns = filterIgnoredNamespaces(ignoredNamespaces, xmlNamespaceURI);

            var clazzNamespace = blankToNull(getNameAndNamespace(clazz).get(NAMESPACE_KEY));
            classNamespace = filterIgnoredNamespaces(ignoredNamespaces, clazzNamespace);

            if (StringUtils.equals(xmlns, classNamespace)) {
                LOG.trace("Return true for xmlns: {} and classNamespace: {}", xmlns, classNamespace);
                return true;
            }

            if (isBlank(xmlns) || isBlank(classNamespace)) {
                LOG.info("Skipping validation of namespace for class: {}, with namespace: '{}', and XML with namespace: '{}'." +
                                " One or both are blank or ignored.",
                        clazz.getName(), classNamespace, xmlns);
                return true;
            }
        } catch (Exception e) {
            throw new XmlRuntimeException(e);
        }

        LOG.warn("XML root element with namespace: '{}' does not match expected namespace: '{}', of class: '{}'",
                xmlns, classNamespace, clazz.getName());
        return false;
    }

    private static QName getRootQualifiedName(String xml) throws XMLStreamException {
        XMLStreamReader xmlStreamReader = null;  // not AutoCloseable; cannot use try-with-resources

        try (var stringReader = new StringReader(xml)) {
            var xmlInputFactory = newSecureXMLInputFactory();
            xmlStreamReader = xmlInputFactory.createXMLStreamReader(stringReader);

            if (xmlStreamReader.hasNext()) {
                var event = xmlStreamReader.next();
                while (event != XMLStreamConstants.START_ELEMENT && xmlStreamReader.hasNext()) {
                    event = xmlStreamReader.next();
                }

                if (xmlStreamReader.isStartElement()) {
                    return xmlStreamReader.getName();
                }
            }

            throw new XmlRuntimeException("Unable to determine root element namespace");
        } finally {
            KiwiIO.closeQuietly(xmlStreamReader);
        }
    }

    private static String filterIgnoredNamespaces(List ignoredNamespaces, String namespace) {
        var shouldIgnore = isNull(namespace) || ignoredNamespaces.contains(namespace);
        if (shouldIgnore) {
            LOG.debug("Ignoring namespace: '{}', treating as null", namespace);
        }
        return shouldIgnore ? null : namespace;
    }

    private static  T tryWithFactory(String xml, Class clazz, List ignoredNamespaces)
            throws JAXBException, XMLStreamException {

        XMLStreamReader xmlStreamReader = null;  // not AutoCloseable; cannot use try-with-resources

        try (var stringReader = new StringReader(xml)) {
            var jaxbContext = getJaxbContext(clazz);
            var unmarshaller = jaxbContext.createUnmarshaller();
            var xmlInputFactory = newSecureXMLInputFactory();
            xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, isNamespaceAware(ignoredNamespaces));
            xmlStreamReader = xmlInputFactory.createXMLStreamReader(stringReader);
            JAXBElement rootElement = unmarshaller.unmarshal(xmlStreamReader, clazz);
            return rootElement.getValue();
        } finally {
            KiwiIO.closeQuietly(xmlStreamReader);
        }
    }

    private static synchronized JAXBContext getJaxbContext(Class clazz) {
        return jaxbContextMap.computeIfAbsent(clazz, KiwiXml::newJaxbContext);
    }

    private static JAXBContext newJaxbContext(Class clazz) {
        try {
            return JAXBContext.newInstance(clazz);
        } catch (JAXBException e) {
            throw new UncheckedJAXBException("Error creating JAXBContext for " + clazz, e);
        }
    }

    /**
     * Per Sonar rule java:S275 (XML parsers should not be vulnerable to XXE attacks), create a new
     * {@link XMLInputFactory} with external entity processing disabled.
     */
    private static XMLInputFactory newSecureXMLInputFactory() {
        var factory = XMLInputFactory.newInstance();
        factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
        return factory;
    }

    private static boolean isNamespaceAware(List ignoredNamespaces) {
        return !ignoredNamespaces.contains(DISABLE_NAMESPACE_VALIDATION);
    }

    private static XmlRuntimeException newXmlRuntimeException(Throwable cause, String xml) {
        LOG.trace("Encountered error trying to unmarshal XML: {}", lazy(() -> abbreviate(xml, 256)));

        return new XmlRuntimeException("Unable to unmarshal XML", cause);
    }

    /**
     * Extract XML name and namespace from the given class, assuming it is annotated with {@link XmlType}.
     * 

* If the given class is not annotated with {@link XmlType}, we return the namespace as {@code null} * and the name as the "simple name" of the class from {@link Class#getSimpleName()}. This allows some degree * of flexibility when working with non-annotated classes, though this should be the exception not the normal * situation. * * @param objectClass the class to get name and namespace from * @return a map containing entries for name and namespace * @see XmlType#name() * @see XmlType#namespace() */ public static Map getNameAndNamespace(Class objectClass) { checkArgumentNotNull(objectClass); return Optional.ofNullable(objectClass.getAnnotation(XmlType.class)) .map(KiwiXml::getNameAndNamespace) .orElseGet(() -> newHashMap( NAMESPACE_KEY, null, NAME_KEY, objectClass.getSimpleName() )); } private static Map getNameAndNamespace(XmlType xmlTypeAnnotation) { return newHashMap( NAMESPACE_KEY, xmlTypeAnnotation.namespace(), NAME_KEY, xmlTypeAnnotation.name() ); } /** * Removes tags from the given XML but ignoring namespaces. * * @param xml the XML containing tags to be removed * @param tagsToRemove names of the tags to remove * @return XML with the given tags removed */ public static String stripTags(String xml, String... tagsToRemove) { var namespacedTagsToRemove = Stream.of(tagsToRemove) .map(tag -> PREFIX_MATCH + tag) .toArray(String[]::new); return stripTagsConsideringNamespace(xml, namespacedTagsToRemove); } /** * Removes tags from the given XML taking into account the full tag name (i.e. possibly including namespace). * * @param xml the XML containing tags to be removed * @param tagsToRemove names of the tags to remove * @return XML with the given tags removed */ public static String stripTagsConsideringNamespace(String xml, String... tagsToRemove) { return Arrays.stream(tagsToRemove) .reduce(xml, (accumulatedXml, tagToRemove) -> { var tagRegex = "<" + tagToRemove + ">[\\s\\S\\w\\W]*"; return accumulatedXml.replaceAll(tagRegex, ""); }); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy