org.pageseeder.diffx.Extension Maven / Gradle / Ivy
/*
* Copyright 2010-2015 Allette Systems (Australia)
* http://www.allette.com.au
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pageseeder.diffx;
import org.pageseeder.diffx.config.DiffConfig;
import org.pageseeder.diffx.config.TextGranularity;
import org.pageseeder.diffx.config.WhiteSpaceProcessing;
import org.pageseeder.diffx.core.OptimisticXMLProcessor;
import org.pageseeder.diffx.format.DefaultXMLDiffOutput;
import org.pageseeder.diffx.load.DOMLoader;
import org.pageseeder.diffx.xml.NamespaceSet;
import org.pageseeder.diffx.xml.Sequence;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Hashtable;
import java.util.Map;
/**
* To use Diff-X as an XSLT extension.
*
* In Saxon, declare the namespace as:
*
{@code
*
* }
*
* Diff-X can be called within XSLT with:
*
{@code
*
* }
*
* Note: the method signatures requires DOM arguments, include the Saxon-DOM
jar
* on your classpath to use this extension function with Saxon.
*
* @author Christophe Lauret
* @version 0.9.0
*/
public final class Extension {
/**
* Maps the DOM builder factory to use with the given DOM package.
*
*
This is because some XSLT processors will only accept certain types DOM objects.
*/
private static final Map BUILDERS = new Hashtable<>();
static {
BUILDERS.put("net.sf.saxon.dom", "net.sf.saxon.dom.DocumentBuilderFactoryImpl");
}
/**
* Compares the two specified Node
s and returns the diff as a node.
*
* Only the first node in the node list is sequenced.
*
* @param xml1 The first XML node to compare.
* @param xml2 The second XML node to compare.
* @param whitespace The white space processing (a valid {@link WhiteSpaceProcessing} value).
* @param granularity The text granularity (a valid {@link TextGranularity} value).
*
* @throws DiffException Should a Diff exception occur.
* @throws IOException Should an I/O exception occur.
*/
public static Node diff(Node xml1, Node xml2, String whitespace, String granularity)
throws DiffException, IOException {
// Get the config
DiffConfig config = toConfig(whitespace, granularity);
// Get Sequences
DOMLoader loader = new DOMLoader();
loader.setConfig(config);
Sequence seq1 = loader.load(xml1);
Sequence seq2 = loader.load(xml2);
if (seq1.size() == 0 && seq2.size() == 0) return null;
// Start comparing
StringWriter out = new StringWriter();
diff(seq1, seq2, out);
// Return a node
try {
String factory = getFactoryClass(xml1, xml2);
return toNode(out.toString(), config, factory);
} catch (Exception ex) {
throw new DiffException("Could not generate Node from Diff result", ex);
}
}
// private helpers ------------------------------------------------------------------------------
/**
* Compares the two specified xml files and prints the diff onto the given writer.
*
* @param seq1 The first XML reader to compare.
* @param seq2 The first XML reader to compare.
* @param out Where the output goes.
*/
private static void diff(Sequence seq1, Sequence seq2, Writer out) {
DefaultXMLDiffOutput output = new DefaultXMLDiffOutput(out);
NamespaceSet namespaces = NamespaceSet.merge(seq1.getNamespaces(), seq2.getNamespaces());
output.setNamespaces(namespaces);
OptimisticXMLProcessor processor = new OptimisticXMLProcessor();
processor.diff(seq1.tokens(), seq2.tokens(), output);
}
/**
* Returns the Diff-X config for the specified argument as String.
*
* @param whitespace A valid white space processing value.
* @param granularity A valid text granularity value.
*
* @return the Diff-X config for the specified arguments as String.
*/
private static DiffConfig toConfig(String whitespace, String granularity) {
WhiteSpaceProcessing ws = WhiteSpaceProcessing.valueOf(whitespace);
TextGranularity tg = TextGranularity.valueOf(granularity);
return new DiffConfig(ws, tg);
}
/**
* Returns a node for the specified string value.
*
* @param xml The XML to parse.
* @param config The DiffX configuration to use.
* @param factory The class name of the DOM builder factory.
*
* @return the corresponding document node.
*/
private static Node toNode(String xml, DiffConfig config, String factory) throws IOException, ParserConfigurationException, SAXException {
DocumentBuilderFactory dbFactory = factory == null ? DocumentBuilderFactory.newInstance()
: DocumentBuilderFactory.newInstance(factory, Extension.class.getClassLoader());
dbFactory.setNamespaceAware(config.isNamespaceAware());
dbFactory.setExpandEntityReferences(true);
dbFactory.setValidating(false);
DocumentBuilder builder = dbFactory.newDocumentBuilder();
Document document = builder.parse(new InputSource(new StringReader(xml)));
return document.getDocumentElement();
}
/**
* Returns the factory class to use based on the given NodeList
s.
*
* @param xml1 the first node list.
* @param xml2 the second node list.
*/
private static String getFactoryClass(Node xml1, Node xml2) {
Package pkg = xml1 != null ? xml1.getClass().getPackage()
: xml2 != null ? xml2.getClass().getPackage()
: null;
return pkg == null ? null : BUILDERS.get(pkg.getName());
}
}