All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.magicwerk.brownies.jdom.XmlProcessor Maven / Gradle / Ivy

/*
 * Copyright 2013 by Thomas Mauch
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * $Id$
 */
package org.magicwerk.brownies.jdom;

import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.function.UnaryOperator;

import javax.xml.XMLConstants;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.ErrorListener;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.ValidatorHandler;

import org.apache.xerces.util.XMLCatalogResolver;
import org.apache.xml.resolver.CatalogManager;
import org.magicwerk.brownies.collections.GapList;
import org.magicwerk.brownies.collections.IList;
import org.magicwerk.brownies.core.StreamTools;
import org.magicwerk.brownies.core.cache.ThreadCacheMap;
import org.magicwerk.brownies.core.conditional.ConditionalOperator;
import org.magicwerk.brownies.core.exceptions.MultiException;
import org.magicwerk.brownies.core.exceptions.WrapperException;
import org.magicwerk.brownies.core.files.FilePath;
import org.magicwerk.brownies.core.files.PathTools;
import org.magicwerk.brownies.core.net.NetTools;
import org.magicwerk.brownies.core.reflect.ReflectTools;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;

import ch.qos.logback.classic.Logger;

/**
 * Class {@link XmlProcessor} is the integration point for all XML related functionality.
 * It can handle catalogs, resolvers, etc. for creating readers, doing validation or transformations.
 */
public class XmlProcessor {
	// --- Types ---

	/**
	 * Type of validation to perform upon XML document.
	 * One of NONE, DTD, XSD, INTERNAL, AUTO.
	 */
	public enum Validation {
		/** No validation, however external DTDs are read */
		NONE,
		/** Validate against DTD */
		DTD,
		/** Validate against XSD */
		XSD,
		/** No validation is done, also any external DTD is not read */
		INTERNAL,
		/** First try to validate against XSD, then against DTD */
		AUTO
	}

	public enum DefaultResolverMode {
		ALLOW_DEFAULT_RESOLVER_FOR_FILES_AND_URIS,
		ALLOW_DEFAULT_RESOLVER_FOR_FILES,
		DISALLOW_DEFAULT_RESOlVER
	}

	// --- Fields ---

	static final Logger LOG = (Logger) LoggerFactory.getLogger(XmlProcessor.class);

	/** Error listener for Transformer and TransformerFactory */
	private static ErrorListener xmlErrorListener = new XmlErrorListener();

	static final String CLASSPATH_URL = "classpath://";

	static final String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
	static final String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema";
	static final String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";

	/** Resolver to use */
	private UnaryOperator resolver = new ConditionalOperator().setDefaultResult(null);
	/** Catalogs to use */
	private IList catalogs = GapList.create();

	private DefaultResolverMode defaultResolverMode = DefaultResolverMode.ALLOW_DEFAULT_RESOLVER_FOR_FILES_AND_URIS;

	/** Log verbose information about using the catalog */
	private boolean catalogVerbose = false;

	//-- State
	/** Catalog resolver (must be rebuilt if catalogs change) */
	private XMLCatalogResolver catalogResolver;

	// --- Methods ---

	/**
	 * Create an InputSource for classpath name.
	 *
	 * @param name	name of resource in classpath
	 * @return		created InputSource
	 */
	public static InputSource getInputSourceForClasspath(String name) {
		InputStream stream = XmlSource.class.getClassLoader().getResourceAsStream(name);
		InputSource source = new InputSource(stream);
		return source;
	}

	/**
	 * Create an InputSource from URL.
	 * The URL supports file paths, "file:", "classpath:", and "http:".
	 *
	 * @param url	URL of type file path, "file:", "classpath:", and "http:"
	 * @return		created InputSource
	 */
	public static InputSource getInputSource(String url) {
		if (url.startsWith(CLASSPATH_URL)) {
			return getInputSourceForClasspath(url.substring(CLASSPATH_URL.length()));
		} else {
			// InputSource should supports file paths (without "file:" or "http:"), but fails if they contain special characters like umlaute
			String uri = url;
			try {
				NetTools.getUrl(uri);
			} catch (Exception e) {
				uri = NetTools.getFileUriString(url);
			}
			return new InputSource(uri);
		}
	}

	public static InputSource getInputSourceForString(String content) {
		Reader reader = StreamTools.getReader(content);
		InputSource source = new InputSource(reader);
		return source;
	}

	/**
	 * Create an StreamSource for classpath name.
	 *
	 * @param name	name of resource in classpath
	 * @return		created StreamSource
	 */
	public static StreamSource getStreamSourceForClasspath(String name) {
		InputStream stream = XmlSource.class.getClassLoader().getResourceAsStream(name);
		StreamSource source = new StreamSource(stream);
		return source;
	}

	/**
	 * Create an InputSource from URL.
	 * The URL supports file paths, "file:", "classpath:", and "http:".
	 *
	 * @param url	URL of type file path, "file:", "classpath:", and "http:"
	 * @return		created InputSource
	 */
	public static StreamSource getStreamSource(String url) {
		if (url.startsWith(CLASSPATH_URL)) {
			return getStreamSourceForClasspath(url.substring(CLASSPATH_URL.length()));
		} else {
			// StreamSource supports file paths, "file:", and "http:"
			return new StreamSource(url);
		}
	}

	/**
	 * Add catalog to use.
	 *
	 * @param catalog  catalog to use
	 */
	public void addCatalog(String catalog) {
		catalogs.add(catalog);
		catalogResolver = null;
	}

	/**
	 * Get entity resolver.
	 */
	public ConditionalOperator getResolverConditionalOperator() {
		return (ConditionalOperator) resolver;
	}

	/**
	 * Get entity resolver.
	 */
	public UnaryOperator getResolverOperator() {
		return resolver;
	}

	public XmlProcessor setResolverOperator(UnaryOperator resolver) {
		this.resolver = resolver;
		return this;
	}

	public XmlProcessor setDefaultResolverMode(DefaultResolverMode defaultResolverMode) {
		this.defaultResolverMode = defaultResolverMode;
		return this;
	}

	public void validate(XmlSource source) {
		if (source.getValidation() != Validation.AUTO) {
			doValidate(source);
		} else {
			// Auto-validation: try XSD first, then DTD
			// This is probably not the best solution, but it does not need to load the document to make the decision.
			// Otherwise we would have at least to read the head of the document and look for  to know it uses DTD.
			Exception xsdException = null;
			try {
				XmlSource xsdSource = new XmlSource(source).setValidation(Validation.XSD);
				doValidate(xsdSource);
				return;
			} catch (Exception e) {
				xsdException = e;
			}
			Exception dtdException = null;
			try {
				XmlSource dtdSource = new XmlSource(source).setValidation(Validation.DTD);
				doValidate(dtdSource);
				return;
			} catch (Exception e) {
				dtdException = e;
			}
			throw new MultiException(xsdException, dtdException);
		}
	}

	void doValidate(XmlSource source) {
		try {
			XMLReader xmlReader = getXmlReader(source);
			xmlReader.parse(source.getInputSource());
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}

	/**
	 * Apply a XSLT transfromation to a XML document.
	 *
	 * @param xmlSource	source of XML transformation
	 * @param xslt		XSLT stylesheet to apply
	 * @param xmlResult	result of XML transformation
	 * @throws 			RuntimeException if the transformation fails
	 */
	public void transform(XmlSource xmlSource, XmlStylesheet xslt, XmlResult xmlResult) {
		List xslts = new ArrayList();
		xslts.add(xslt);
		transform(xmlSource, xslts, xmlResult);
	}

	/**
	 * Apply a series of XSLT transfromations to a XML document.
	 *
	 * @param xmlSource	source of XML transformation
	 * @param xslts		list of XSLT stylesheets to apply
	 * @param xmlResult	result of XML transformation
	 * @throws 			RuntimeException if the transformation fails
	 */
	public void transform(XmlSource xmlSource, List xslts, XmlResult xmlResult) {
		try {
			TransformerFactory tfactory = TransformerFactory.newInstance();
			tfactory.setErrorListener(xmlErrorListener);

			// We could check before the cast with
			// tfactory.getFeature(SAXSource.FEATURE)
			SAXTransformerFactory stfactory = (SAXTransformerFactory) tfactory;

			// Create a TransformerHandler for each XSL file
			List transformerHandlerList = new ArrayList();
			for (int i = 0; i < xslts.size(); i++) {
				transformerHandlerList.add(stfactory.newTransformerHandler(xslts.get(i).getStreamSource()));
			}
			// Set parameters on Transformer for each XSL file
			for (int i = 0; i < xslts.size(); i++) {
				XmlStylesheet xslt = xslts.get(i);
				if (xslt.getNumParams() > 0) {
					TransformerHandler transformerHandler = (TransformerHandler) transformerHandlerList.get(i);
					Transformer transformer = transformerHandler.getTransformer();
					for (int j = 0; j < xslt.getNumParams(); j++) {
						XmlParam param = xslt.getParam(j);
						transformer.setParameter(param.getName(), param.getValue());
					}
				}
			}

			// Set result to link transformers together, so transformer 0 will pipe to transformer1 etc.
			int size = transformerHandlerList.size();
			for (int i = 0; i < size - 1; i++) {
				TransformerHandler transformerHandler = (TransformerHandler) transformerHandlerList.get(i);
				Result result = new SAXResult(transformerHandlerList.get(i + 1));
				transformerHandler.setResult(result);
			}

			// Set result for last transformer
			TransformerHandler transformerHandler = (TransformerHandler) transformerHandlerList.get(size - 1);

			// Note this strange error handling:
			// Error if validateResult is false and the target directory does not exist:
			//Exception in thread "main" javax.xml.transform.TransformerException:
			//java.io.FileNotFoundException: output/users4.xml (Das System kann den angegebenen Pfad nicht finden)
			// Error if validateResult is true and the target directory does not exist:
			//Exception in thread "main" javax.xml.transform.TransformerException:
			//org.xml.sax.SAXException: setResult() muss vor startDocument() aufgerufen werden.

			if (xmlResult.getValidation() == Validation.NONE) {
				// We do not have to validate the result pipe directly to output file
				transformerHandler.setResult(xmlResult.getStreamResult());

			} else if (xmlResult.getValidation() == Validation.XSD) {
				// TransformerHandler extends ContentHandler
				// ValidatorHandler implements ContentHandler

				SAXTransformerFactory tf = (SAXTransformerFactory) TransformerFactory.newInstance();
				TransformerHandler th = tf.newTransformerHandler();
				th.setResult(xmlResult.getStreamResult());

				Schema schema;
				if (xmlResult.getSchemaUrl() == null) {
					schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema();
				} else {
					StreamSource ss = getStreamSource(xmlResult.getSchemaUrl());
					schema = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(ss);
				}
				ValidatorHandler vh = schema.newValidatorHandler();
				vh.setErrorHandler(new XmlErrorHandler());
				vh.setContentHandler(th);
				Result result = new SAXResult(vh);

				transformerHandler.setResult(result);
			} else {
				throw new IllegalArgumentException("Validation mode not supported");
			}

			XMLReader xmlReader = doGetXmlReader(xmlSource);
			xmlReader.setContentHandler(transformerHandlerList.get(0));
			xmlReader.parse(xmlSource.getInputSource());
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}

	public XMLReader getXmlReader(XmlSource source) {
		try {
			return doGetXmlReader(source);
		} catch (Exception e) {
			throw new WrapperException(e);
		}
	}

	XMLReader doGetXmlReader(XmlSource source) throws Exception {
		SAXParser saxParser = getSAXParser(source.getValidation(), source.getSchemaFile());
		XMLReader xmlReader = saxParser.getXMLReader();
		xmlReader.setErrorHandler(new XmlErrorHandler());
		setEntityResolver(xmlReader);

		if (source.getValidation() == Validation.INTERNAL) {
			xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
		}

		return xmlReader;
	}

	/**
	 * Create XML reader out of SAX parser.
	 *
	 * @param saxParser		SAX parser
	 * @return				created XML reader
	 * @throws 				SAXException if creation fails
	 */
	public XMLReader getXMLReader(SAXParser saxParser) throws SAXException {
		XMLReader xmlReader = saxParser.getXMLReader();
		xmlReader.setErrorHandler(new XmlErrorHandler());
		setEntityResolver(xmlReader);
		return xmlReader;
	}

	void setEntityResolver(XMLReader xmlReader) {
		// Set catalog
		EntityResolver resolver = null;
		if (!catalogs.isEmpty()) {
			if (catalogResolver == null) {
				catalogResolver = createXmlCatalogResolver();
			}
			resolver = catalogResolver;
		}
		// Set the resolver on the parser.
		//xmlReader.setProperty("http://apache.org/xml/properties/internal/entity-resolver", resolver);
		xmlReader.setEntityResolver(new XmlEntityResolver(resolver));
	}

	XMLCatalogResolver createXmlCatalogResolver() {
		// Create catalog resolver and set a catalog list.
		XMLCatalogResolver catalogResolver = new XMLCatalogResolver();

		// Set to true to see catalog output
		if (catalogVerbose) {
			// The XMLCatalogResolver creates a new hidden instance of CatalogManager, so it cannot be configured by
			// - System.setProperty("xml.catalog.verbosity", "9");
			// - CatalogManager.getStaticManager().setVerbosity(9);
			CatalogManager catalogManager = (CatalogManager) ReflectTools.getAnyBeanValue(catalogResolver, "fResolverCatalogManager");
			catalogManager.setVerbosity(9);
		}

		catalogResolver.setPreferPublic(true);
		catalogResolver.setCatalogList(catalogs.toArray(new String[catalogs.size()]));
		return catalogResolver;
	}

	/**
	 * Create SAX parser as specified.
	 *
	 * @param validation   type of validation needed
	 * @param schemaFile   schema file for validation (may be null)
	 * @return             created SAX parser (never null)
	 */
	public static SAXParser getSAXParser(Validation validation, String schemaFile) {
		try {
			SAXParser saxParser = saxParserCache.get(validation);
			if (validation == Validation.XSD && schemaFile != null) {
				saxParser.setProperty(JAXP_SCHEMA_SOURCE, schemaFile);
			}
			return saxParser;
		} catch (Exception e) {
			throw new WrapperException(e);
		}
	}

	static final ThreadCacheMap saxParserCache = new ThreadCacheMap<>(
			validation -> {
				try {
					SAXParserFactory spf = SAXParserFactory.newInstance();
					spf.setNamespaceAware(true);
					if (validation == Validation.DTD || validation == Validation.XSD || validation == Validation.AUTO) {
						spf.setValidating(true);
					}
					return spf.newSAXParser();
				} catch (Exception e) {
					throw new WrapperException(e);
				}
			},
			(validation, sp) -> {
				try {
					sp.reset();
					if (validation == Validation.XSD) {
						sp.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
					}
				} catch (Exception e) {
					throw new WrapperException(e);
				}
			});

	/**
	 * An entity resolver is used to load DTD and XSD files.
	 * This entity resolver logs the external entities to resolve.
	 */
	class XmlEntityResolver implements EntityResolver {
		/** Logger */
		private Logger LOG = (Logger) LoggerFactory.getLogger(XmlEntityResolver.class);

		/** Default parent resolver (may be null) */
		private EntityResolver parent;

		/**
		 * Constructor.
		 * The parent resolver will be used if no custom defined solver has been used.
		 *
		 * @param parent  parent resolver (may be null)
		 */
		public XmlEntityResolver(EntityResolver parent) {
			this.parent = parent;
		}

		@Override
		public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
			// A DTD may have both publicId and systemId, a XSD has only sytemId.
			// If null is returned, the default behavior is used to get the entity, so
			// a HTTP connection may be opened. To prevent this, you can return an empty input source.

			LOG.debug("Resolving systemId={} with defined resolvers", systemId);
			InputSource source = null;
			String resolved = resolver.apply(systemId);
			if (resolved != null) {
				if (resolved.isEmpty()) {
					LOG.debug("Resolving systemId={} with empty source", systemId);
					source = getInputSourceForString("");
				} else {
					LOG.debug("Resolving systemId={} with {}", systemId, resolved);
					source = getInputSource(resolved);
				}
			}

			if (source == null) {
				// It can be that a classpath entity comes until here if it is constructed internally,
				// e.g. by a TransformerFactory if reading into DOM
				if (systemId.startsWith(CLASSPATH_URL)) {
					LOG.debug("Resolving systemId={} from classpath", systemId);
					source = getInputSourceForClasspath(systemId.substring(CLASSPATH_URL.length()));
				}
			}
			if (source == null) {
				if (parent != null) {
					if (parent instanceof XMLCatalogResolver) {
						LOG.debug("Resolving publicId={}, systemId={} from catalog", publicId, systemId);
					}
					source = parent.resolveEntity(publicId, systemId);
					if (source == null) {
						boolean tryLocalSystemId = true;
						if (tryLocalSystemId) {
							String localSystemId = PathTools.getName(systemId);
							LOG.debug("Resolving publicId={}, systemId={} from catalog", publicId, localSystemId);
							source = parent.resolveEntity(publicId, localSystemId);
						}
					}
				}
			}

			boolean fail = false;
			String status;
			if (source != null) {
				status = "found";
			} else {
				if (defaultResolverMode == DefaultResolverMode.DISALLOW_DEFAULT_RESOlVER) {
					status = "not found -> fail because default resolver is disabled";
					fail = true;
				}
				FilePath path = FilePath.of(systemId);
				boolean needsRemoteFile = (path.getType().equals(FilePath.Type.URI) && !path.getScheme().equals("file"));
				if (needsRemoteFile && defaultResolverMode == DefaultResolverMode.ALLOW_DEFAULT_RESOLVER_FOR_FILES) {
					status = "not found -> fail because default resolver is disabled for remote references";
					fail = true;
				} else {
					status = "not found -> try default resolver";
				}
			}
			LOG.debug("Resolving publicId={}, systemId={}: {}", new Object[] { publicId, systemId, status });
			if (fail) {
				throw new RuntimeException(status);
			}
			// Can return null to use default approach for resolving entities
			return source;
		}
	}

	/**
	 * Class which implements an error listener for Transformer and TransformerFactory
	 */
	public static class XmlErrorListener implements ErrorListener {

		@Override
		public void warning(TransformerException exception) {
			// It could be that we would like to treat all warnings as errors,
			// because warnings typically arise from erroneous stylesheets.
			// Note that the warning() function is not ready to throw exceptions:
			// The function addAttribute() in the class org.apache.xml.serializer.ToXMLStream calls warning(), but
			// swallows all exception thrown. So we have to log the warning at least.
			LOG.warn("Warning: " + exception.getMessage());
		}

		@Override
		public void error(TransformerException exception) throws TransformerException {
			// XSLT is not as draconian as XML. There are numerous errors
			// which the processor may but does not have to recover from;
			// e.g. multiple templates that match a node with the same priority.			
			// This should just be logged, as in case of a severe error fatalError() will be called later
			// (and only the exception passed to fatalError will contain the real cause)
			LOG.warn("Error: " + exception.getMessage());

			// Note that I cannot simply rethrow the received TransformerException as this error is swallowed by
			// com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl.passErrorsToListener
			// without logging, so we use a simple RuntimException which works fine.
			//throw new RuntimeException(exception);
		}

		@Override
		public void fatalError(TransformerException exception) throws TransformerException {
			// This is an error which the processor cannot recover from,
			// e.g. a malformed stylesheet or input document so I must throw this exception here.
			// Note that I cannot simply rethrow the received TransformerException as this error is swallowed by
			// com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl.passErrorsToListener
			// without logging, so we use a simple RuntimException which works fine.
			LOG.error("Fatal error: " + exception.getMessage());
			throw new RuntimeException(exception);
		}
	}

	// Error handler to report errors and warnings
	public static class XmlErrorHandler implements ErrorHandler {
		/** Error handler output goes here */
		private PrintStream out = null;

		XmlErrorHandler() {
		}

		XmlErrorHandler(PrintStream out) {
			this.out = out;
		}

		/**
		 * Returns a string describing parse exception details
		 */
		private String getParseExceptionInfo(SAXParseException spe) {
			String systemId = spe.getSystemId();
			if (systemId == null) {
				systemId = "null";
			}
			String info = "URI=" + systemId + " Line=" + spe.getLineNumber() + " Column=" + spe.getColumnNumber() + ": " + spe.getMessage();
			return info;
		}

		// The following methods are standard SAX ErrorHandler methods.
		// See SAX documentation for more info.

		@Override
		public void warning(SAXParseException spe) throws SAXException {
			if (out != null) {
				LOG.warn("Warning: " + getParseExceptionInfo(spe));
			}
		}

		@Override
		public void error(SAXParseException spe) throws SAXException {
			String message = "Error: " + getParseExceptionInfo(spe);
			throw new SAXException(message);
		}

		@Override
		public void fatalError(SAXParseException spe) throws SAXException {
			String message = "Fatal Error: " + getParseExceptionInfo(spe);
			throw new SAXException(message);
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy