All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.helger.schematron.SchematronHelper Maven / Gradle / Ivy

There is a newer version: 5.6.5
Show newest version
/**
 * Copyright (C) 2014-2020 Philip Helger (www.helger.com)
 * philip[at]helger[dot]com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.helger.schematron;

import java.io.IOException;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.Immutable;
import javax.xml.transform.Source;
import javax.xml.transform.dom.DOMSource;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;

import com.helger.commons.ValueEnforcer;
import com.helger.commons.annotation.Nonempty;
import com.helger.commons.annotation.PresentForCodeCoverage;
import com.helger.commons.annotation.ReturnsMutableCopy;
import com.helger.commons.collection.impl.CommonsArrayList;
import com.helger.commons.collection.impl.ICommonsList;
import com.helger.commons.error.SingleError;
import com.helger.commons.error.list.ErrorList;
import com.helger.commons.error.list.IErrorList;
import com.helger.commons.hierarchy.visit.ChildrenProviderHierarchyVisitor;
import com.helger.commons.hierarchy.visit.DefaultHierarchyVisitorCallback;
import com.helger.commons.hierarchy.visit.EHierarchyVisitorReturn;
import com.helger.commons.io.resource.IReadableResource;
import com.helger.commons.location.SimpleLocation;
import com.helger.commons.state.ESuccess;
import com.helger.commons.string.StringHelper;
import com.helger.commons.wrapper.Wrapper;
import com.helger.schematron.pure.errorhandler.IPSErrorHandler;
import com.helger.schematron.pure.errorhandler.LoggingPSErrorHandler;
import com.helger.schematron.resolve.DefaultSchematronIncludeResolver;
import com.helger.schematron.svrl.SVRLFailedAssert;
import com.helger.schematron.svrl.SVRLHelper;
import com.helger.schematron.svrl.SVRLResourceError;
import com.helger.schematron.svrl.jaxb.SchematronOutputType;
import com.helger.xml.microdom.IMicroDocument;
import com.helger.xml.microdom.IMicroElement;
import com.helger.xml.microdom.IMicroNode;
import com.helger.xml.microdom.serialize.MicroReader;
import com.helger.xml.sax.InputSourceFactory;
import com.helger.xml.serialize.read.ISAXReaderSettings;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * This is a helper class that provides a way to easily apply an Schematron
 * resource on an XML resource.
 *
 * @author Philip Helger
 */
@Immutable
public final class SchematronHelper
{
  private static final Logger LOGGER = LoggerFactory.getLogger (SchematronHelper.class);

  @PresentForCodeCoverage
  private static final SchematronHelper s_aInstance = new SchematronHelper ();

  private SchematronHelper ()
  {}

  /**
   * Check if the passed namespace URI is deprecated.
   *
   * @param sNamespaceURI
   *        The namespace URI to check. May be null.
   * @return true if the passed namespace URI is a deprecated
   *         Schematron namespace URI, false if not.
   * @since 5.4.1
   */
  public static boolean isDeprecatedSchematronNS (@Nullable final String sNamespaceURI)
  {
    if (CSchematron.DEPRECATED_NAMESPACE_SCHEMATRON.equals (sNamespaceURI))
      return true;
    // null or whatever
    return false;
  }

  /**
   * Check if the passed namespace URI is supported.
   *
   * @param sNamespaceURI
   *        The namespace URI to check. May be null.
   * @param bLenient
   *        true to support old namespace URIs, false
   *        if not.
   * @return true if the passed namespace URI is a valid Schematron
   *         namespace URI, false if not.
   * @since 5.4.1
   */
  public static boolean isValidSchematronNS (@Nullable final String sNamespaceURI, final boolean bLenient)
  {
    if (CSchematron.NAMESPACE_SCHEMATRON.equals (sNamespaceURI))
      return true;
    if (bLenient && isDeprecatedSchematronNS (sNamespaceURI))
      return true;
    // null or whatever
    return false;
  }

  /**
   * Get a list of all supported namespaces.
   *
   * @param bLenient
   *        true to support old namespace URIs, false
   *        if not.
   * @return The non-null and non-empty list of all supported
   *         schematron namespace URIs.
   * @since 5.4.1
   */
  @Nonnull
  @Nonempty
  @ReturnsMutableCopy
  public static ICommonsList  getAllValidSchematronNS (final boolean bLenient)
  {
    final ICommonsList  ret = new CommonsArrayList <> (2);
    ret.add (CSchematron.NAMESPACE_SCHEMATRON);
    if (bLenient)
      ret.add (CSchematron.DEPRECATED_NAMESPACE_SCHEMATRON);
    return ret;
  }

  /**
   * Apply the passed schematron on the passed XML resource using a custom error
   * handler.
   *
   * @param aSchematron
   *        The Schematron resource. May not be null.
   * @param aXML
   *        The XML resource. May not be null.
   * @return null if either the Schematron or the XML could not be
   *         read.
   * @throws IllegalStateException
   *         if the processing throws an unexpected exception.
   */
  @Nullable
  public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron, @Nonnull final IReadableResource aXML)
  {
    ValueEnforcer.notNull (aSchematron, "SchematronResource");
    ValueEnforcer.notNull (aXML, "XMLSource");

    try
    {
      // Apply Schematron on XML
      return aSchematron.applySchematronValidationToSVRL (aXML);
    }
    catch (final Exception ex)
    {
      throw new IllegalArgumentException ("Failed to apply Schematron " +
                                          aSchematron.getID () +
                                          " onto XML resource " +
                                          aXML.getResourceID (),
                                          ex);
    }
  }

  /**
   * Apply the passed schematron on the passed XML resource.
   *
   * @param aSchematron
   *        The Schematron resource. May not be null.
   * @param aXML
   *        The XML resource. May not be null.
   * @return null if either the Schematron or the XML could not be
   *         read.
   * @throws IllegalStateException
   *         if the processing throws an unexpected exception.
   */
  @Nullable
  public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron, @Nonnull final Source aXML)
  {
    ValueEnforcer.notNull (aSchematron, "SchematronResource");
    ValueEnforcer.notNull (aXML, "XMLSource");

    try
    {
      // Apply Schematron on XML.
      return aSchematron.applySchematronValidationToSVRL (aXML);
    }
    catch (final Exception ex)
    {
      throw new IllegalArgumentException ("Failed to apply Schematron " + aSchematron.getID () + " onto XML source " + aXML, ex);
    }
  }

  /**
   * Apply the passed schematron on the passed XML node.
   *
   * @param aSchematron
   *        The Schematron resource. May not be null.
   * @param aNode
   *        The XML node. May not be null.
   * @return null if either the Schematron or the XML could not be
   *         read.
   * @throws IllegalStateException
   *         if the processing throws an unexpected exception.
   */
  @Nullable
  public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron, @Nonnull final Node aNode)
  {
    ValueEnforcer.notNull (aSchematron, "SchematronResource");
    ValueEnforcer.notNull (aNode, "Node");

    return applySchematron (aSchematron, new DOMSource (aNode));
  }

  /**
   * Convert a {@link SchematronOutputType} to an {@link IErrorList}.
   *
   * @param aSchematronOutput
   *        The result of Schematron validation
   * @param sResourceName
   *        The name of the resource that was validated (may be a file path
   *        etc.)
   * @return List non-null error list of {@link SVRLResourceError}
   *         objects.
   */
  @Nonnull
  public static IErrorList convertToErrorList (@Nonnull final SchematronOutputType aSchematronOutput, @Nullable final String sResourceName)
  {
    ValueEnforcer.notNull (aSchematronOutput, "SchematronOutput");

    final ErrorList ret = new ErrorList ();
    for (final SVRLFailedAssert aFailedAssert : SVRLHelper.getAllFailedAssertions (aSchematronOutput))
      ret.add (aFailedAssert.getAsResourceError (sResourceName));
    return ret;
  }

  @SuppressFBWarnings ("RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
  @Nonnull
  private static ESuccess _recursiveResolveAllSchematronIncludes (@Nonnull final IMicroElement eRoot,
                                                                  @Nonnull final IReadableResource aResource,
                                                                  @Nullable final ISAXReaderSettings aSettings,
                                                                  @Nonnull final IPSErrorHandler aErrorHandler,
                                                                  final boolean bLenient)
  {
    if (eRoot != null)
    {
      final DefaultSchematronIncludeResolver aIncludeResolver = new DefaultSchematronIncludeResolver (aResource);

      for (final IMicroElement aElement : eRoot.getAllChildElementsRecursive ())
        if (isValidSchematronNS (aElement.getNamespaceURI (), bLenient) && aElement.getLocalName ().equals (CSchematronXML.ELEMENT_INCLUDE))
        {
          String sHref = aElement.getAttributeValue (CSchematronXML.ATTR_HREF);
          try
          {
            final int nHashIndex = sHref.indexOf ('#');
            String sAnchor = null;
            if (nHashIndex >= 0)
            {
              sAnchor = sHref.substring (nHashIndex + 1);
              sHref = sHref.substring (0, nHashIndex);
            }

            final IReadableResource aIncludeRes = aIncludeResolver.getResolvedSchematronResource (sHref);
            if (aIncludeRes == null)
            {
              aErrorHandler.handleError (SingleError.builderError ()
                                                    .setErrorLocation (new SimpleLocation (aResource.getPath ()))
                                                    .setErrorText ("Failed to resolve include '" + sHref + "'")
                                                    .build ());
              return ESuccess.FAILURE;
            }

            if (LOGGER.isDebugEnabled ())
              LOGGER.debug ("Resolved '" +
                            sHref +
                            "' relative to '" +
                            aIncludeResolver.getBaseHref () +
                            "' as '" +
                            aIncludeRes.getPath () +
                            "'");

            // Read XML to be included
            final IMicroDocument aIncludedDoc = MicroReader.readMicroXML (aIncludeRes, aSettings);
            if (aIncludedDoc == null)
            {
              aErrorHandler.handleError (SingleError.builderError ()
                                                    .setErrorLocation (new SimpleLocation (aResource.getPath ()))
                                                    .setErrorText ("Failed to parse include " + aIncludeRes)
                                                    .build ());
              return ESuccess.FAILURE;
            }

            IMicroElement aIncludedContent;
            if (sAnchor == null)
            {
              // no anchor present - include the whole document

              // Return the document element
              aIncludedContent = aIncludedDoc.getDocumentElement ();
            }
            else
            {
              final String sFinalAnchor = sAnchor;
              final Wrapper  aMatch = new Wrapper <> ();
              // Also include the root element in the search
              ChildrenProviderHierarchyVisitor.visitFrom (aIncludedDoc.getDocumentElement (),
                                                          new DefaultHierarchyVisitorCallback  ()
                                                          {
                                                            @Override
                                                            public EHierarchyVisitorReturn onItemBeforeChildren (final IMicroNode aItem)
                                                            {
                                                              if (aItem.isElement ())
                                                              {
                                                                final IMicroElement aCurElement = (IMicroElement) aItem;
                                                                final String sID = aCurElement.getAttributeValue ("id");
                                                                if (sFinalAnchor.equals (sID))
                                                                  aMatch.set (aCurElement);
                                                              }
                                                              return EHierarchyVisitorReturn.CONTINUE;
                                                            }
                                                          },
                                                          true);
              aIncludedContent = aMatch.get ();
              if (aIncludedContent == null)
              {
                aErrorHandler.handleError (SingleError.builderWarn ()
                                                      .setErrorLocation (new SimpleLocation (aResource.getPath ()))
                                                      .setErrorText ("Failed to resolve an element with the ID '" +
                                                                     sAnchor +
                                                                     "' in " +
                                                                     aIncludeRes +
                                                                     "! Therefore including the whole document!")
                                                      .build ());
                aIncludedContent = aIncludedDoc.getDocumentElement ();
              }
            }

            // Important to detach from parent!
            aIncludedContent.detachFromParent ();

            // It is okay to include sthg else
            if (false)
            {
              // Check for correct namespace URI of included content
              if (!isValidSchematronNS (aIncludedContent.getNamespaceURI (), bLenient))
              {
                aErrorHandler.handleError (SingleError.builderError ()
                                                      .setErrorLocation (new SimpleLocation (aResource.getPath ()))
                                                      .setErrorText ("The included resource " +
                                                                     aIncludeRes +
                                                                     " contains the wrong XML namespace URI '" +
                                                                     aIncludedContent.getNamespaceURI () +
                                                                     "' but was expected to have: " +
                                                                     StringHelper.getImplodedMapped (", ",
                                                                                                     getAllValidSchematronNS (bLenient),
                                                                                                     x -> "'" + x + "'"))
                                                      .build ());
                return ESuccess.FAILURE;
              }
            }

            // Check that not a whole Schema but only a part is included
            if (isValidSchematronNS (aIncludedContent.getNamespaceURI (), bLenient) &&
                CSchematronXML.ELEMENT_SCHEMA.equals (aIncludedContent.getLocalName ()))
            {
              aErrorHandler.handleError (SingleError.builderWarn ()
                                                    .setErrorLocation (new SimpleLocation (aResource.getPath ()))
                                                    .setErrorText ("The included resource " +
                                                                   aIncludeRes +
                                                                   " seems to be a complete schema. To includes parts of a schema the respective element must be the root element of the included resource.")
                                                    .build ());
            }

            // Recursive resolve includes
            if (_recursiveResolveAllSchematronIncludes (aIncludedContent, aIncludeRes, aSettings, aErrorHandler, bLenient).isFailure ())
              return ESuccess.FAILURE;

            // Now replace "include" element with content in MicroDOM
            aElement.getParent ().replaceChild (aElement, aIncludedContent);
          }
          catch (final IOException ex)
          {
            aErrorHandler.handleError (SingleError.builderError ()
                                                  .setErrorLocation (new SimpleLocation (aResource.getPath ()))
                                                  .setErrorText ("Failed to read include '" + sHref + "'")
                                                  .setLinkedException (ex)
                                                  .build ());
            return ESuccess.FAILURE;
          }
        }
    }
    return ESuccess.SUCCESS;
  }

  /**
   * Resolve all Schematron includes of the passed resource.
   *
   * @param aResource
   *        The Schematron resource to read. May not be null.
   * @return null if the passed resource could not be read as XML
   *         document
   */
  @Nullable
  public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource)
  {
    return getWithResolvedSchematronIncludes (aResource,
                                              (ISAXReaderSettings) null,
                                              new LoggingPSErrorHandler (),
                                              CSchematron.DEFAULT_ALLOW_DEPRECATED_NAMESPACES);
  }

  /**
   * Resolve all Schematron includes of the passed resource.
   *
   * @param aResource
   *        The Schematron resource to read. May not be null.
   * @param bLenient
   *        true if 'old' schematron NS is tolerated.
   * @return null if the passed resource could not be read as XML
   *         document
   * @since 5.4.1
   */
  @Nullable
  public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource, final boolean bLenient)
  {
    return getWithResolvedSchematronIncludes (aResource, (ISAXReaderSettings) null, new LoggingPSErrorHandler (), bLenient);
  }

  /**
   * Resolve all Schematron includes of the passed resource.
   *
   * @param aResource
   *        The Schematron resource to read. May not be null.
   * @param aSettings
   *        The SAX reader settings to be used. May be null to use
   *        the default settings.
   * @param aErrorHandler
   *        The error handler to be used. May not be null.
   * @return null if the passed resource could not be read as XML
   *         document
   */
  @Nullable
  public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource,
                                                                  @Nullable final ISAXReaderSettings aSettings,
                                                                  @Nonnull final IPSErrorHandler aErrorHandler)
  {
    return getWithResolvedSchematronIncludes (aResource, aSettings, aErrorHandler, CSchematron.DEFAULT_ALLOW_DEPRECATED_NAMESPACES);
  }

  /**
   * Resolve all Schematron includes of the passed resource.
   *
   * @param aResource
   *        The Schematron resource to read. May not be null.
   * @param aSettings
   *        The SAX reader settings to be used. May be null to use
   *        the default settings.
   * @param aErrorHandler
   *        The error handler to be used. May not be null.
   * @param bLenient
   *        true if 'old' Schematron NS is tolerated.
   * @return null if the passed resource could not be read as XML
   *         document
   * @since 5.4.1
   */
  @Nullable
  public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource,
                                                                  @Nullable final ISAXReaderSettings aSettings,
                                                                  @Nonnull final IPSErrorHandler aErrorHandler,
                                                                  final boolean bLenient)
  {
    final InputSource aIS = InputSourceFactory.create (aResource);
    final IMicroDocument aDoc = MicroReader.readMicroXML (aIS, aSettings);
    if (aDoc != null)
    {
      // Resolve all Schematron includes
      if (_recursiveResolveAllSchematronIncludes (aDoc.getDocumentElement (), aResource, aSettings, aErrorHandler, bLenient).isFailure ())
      {
        // Error resolving includes
        return null;
      }
    }
    return aDoc;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy