All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.functions.SaxonDeepEqual Maven / Gradle / Ivy

There is a newer version: 9.9.1-2
Show newest version
package net.sf.saxon.functions;

import net.sf.saxon.Configuration;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.expr.sort.AtomicComparer;
import net.sf.saxon.expr.sort.GenericAtomicComparer;
import net.sf.saxon.lib.ErrorReporter;
import net.sf.saxon.lib.Logger;
import net.sf.saxon.lib.StringCollator;
import net.sf.saxon.om.*;
import net.sf.saxon.pattern.SameNameTest;
import net.sf.saxon.query.QueryResult;
import net.sf.saxon.str.StringTool;
import net.sf.saxon.str.UnicodeBuilder;
import net.sf.saxon.trans.UncheckedXPathException;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.trans.XmlProcessingIncident;
import net.sf.saxon.transpile.CSharp;
import net.sf.saxon.tree.iter.AtomicIterator;
import net.sf.saxon.tree.iter.AxisIterator;
import net.sf.saxon.tree.iter.ListIterator;
import net.sf.saxon.tree.tiny.WhitespaceTextImpl;
import net.sf.saxon.tree.util.Navigator;
import net.sf.saxon.tree.util.Orphan;
import net.sf.saxon.type.ComplexType;
import net.sf.saxon.type.ComplexVariety;
import net.sf.saxon.type.SchemaType;
import net.sf.saxon.type.Type;
import net.sf.saxon.value.*;

import javax.xml.transform.OutputKeys;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;

/**
 * Implements the saxon:deep-equal() function, a variant of fn:deep-equal that provides
 * additional control over how the comparison is performed.
 * 

The specification is as follows:

*

arg1 The first sequence to be compared

*

arg2 The second sequence to be compared

*

collation The collation to be used (null if the default collation is to be used)

*

flags A string whose characters select options that cause the comparison to vary from the * standard fn:deep-equals() function. The flags are:

*
    *
  • N - take namespace nodes into account
  • *
  • J - join adjacent text nodes (e.g, nodes either side of a comment) *
  • A - compare type annotations
  • *
  • C - take comments into account
  • *
  • F - take namespace prefixes into account
  • *
  • I - take the is-id() and is-idref() properties into account
  • *
  • P - take processing instructions into account
  • *
  • S - compare string values, not typed values
  • *
  • w - don't take whitespace-only text nodes into account
  • *
*

returns true if the sequences are deep equal, otherwise false

*/ public class SaxonDeepEqual extends SystemFunction { /** * Flag indicating that two elements should only be considered equal if they have the same * in-scope namespaces */ public static final int INCLUDE_NAMESPACES = 1; /** * Flag indicating that two element or attribute nodes are considered equal only if their * names use the same namespace prefix */ public static final int INCLUDE_PREFIXES = 1 << 1; /** * Flag indicating that comment children are taken into account when comparing element or document nodes */ public static final int INCLUDE_COMMENTS = 1 << 2; /** * Flag indicating that processing instruction nodes are taken into account when comparing element or document nodes */ public static final int INCLUDE_PROCESSING_INSTRUCTIONS = 1 << 3; /** * Flag indicating that whitespace text nodes are ignored when comparing element nodes */ public static final int EXCLUDE_WHITESPACE_TEXT_NODES = 1 << 4; /** * Flag indicating that elements and attributes should always be compared according to their string * value, not their typed value */ public static final int COMPARE_STRING_VALUES = 1 << 5; /** * Flag indicating that elements and attributes must have the same type annotation to be considered * deep-equal */ public static final int COMPARE_ANNOTATIONS = 1 << 6; /** * Flag indicating that a warning message explaining the reason why the sequences were deemed non-equal * should be sent to the ErrorListener */ public static final int WARNING_IF_FALSE = 1 << 7; /** * Flag indicating that adjacent text nodes in the top-level sequence are to be merged */ public static final int JOIN_ADJACENT_TEXT_NODES = 1 << 8; /** * Flag indicating that the is-id and is-idref flags are to be compared */ public static final int COMPARE_ID_FLAGS = 1 << 9; /** * Flag indicating that the variety of the node type is to be ignored (for example, a mixed content * node can compare equal to an element-only content node */ public static final int EXCLUDE_VARIETY = 1 << 10; /** * Evaluate this function call at run-time * * @param context The XPath dynamic evaluation context * @param arguments The values of the arguments to the function call. Each argument value (which is in general * a sequence) is supplied in the form of a sequence. * @return the results of the function. * @throws net.sf.saxon.trans.XPathException if a dynamic error occurs during evaluation of the function. The Saxon run-time * code will add information about the error location. */ @Override public BooleanValue call(XPathContext context, Sequence[] arguments) throws XPathException { String flags = arguments[3].head().getStringValue(); if (flags.indexOf('!') >= 0) { // undocumented diagnostic option Logger err = context.getConfiguration().getLogger(); Properties indent = new Properties(); indent.setProperty(OutputKeys.METHOD, "xml"); indent.setProperty(OutputKeys.INDENT, "yes"); err.info("DeepEqual: first argument:"); QueryResult.serialize(QueryResult.wrap(arguments[0].iterate(), context.getConfiguration()), err.asStreamResult(), indent); err.info("DeepEqual: second argument:"); QueryResult.serialize(QueryResult.wrap(arguments[1].iterate(), context.getConfiguration()), err.asStreamResult(), indent); } Item collationValue = arguments[2].head(); Configuration config = context.getConfiguration(); String collation = collationValue == null ? config.getDefaultCollationName() : collationValue.getStringValue(); StringCollator collator = config.getCollation(collation); if (collator == null) { throw new XPathException("Unknown collation " + collation, "FOCH0002"); } GenericAtomicComparer comparer = new GenericAtomicComparer(collator, context); int flag = 0; if (flags.contains("N")) { flag |= INCLUDE_NAMESPACES; } if (flags.contains("J")) { flag |= JOIN_ADJACENT_TEXT_NODES; } if (flags.contains("C")) { flag |= INCLUDE_COMMENTS; } if (flags.contains("P")) { flag |= INCLUDE_PROCESSING_INSTRUCTIONS; } if (flags.contains("F")) { flag |= INCLUDE_PREFIXES; } if (flags.contains("S")) { flag |= COMPARE_STRING_VALUES; } if (flags.contains("A")) { flag |= COMPARE_ANNOTATIONS; } if (flags.contains("I")) { flag |= COMPARE_ID_FLAGS; } if (flags.contains("v")) { flag |= EXCLUDE_VARIETY; } if (flags.contains("w")) { flag |= EXCLUDE_WHITESPACE_TEXT_NODES; } if (flags.contains("?")) { flag |= WARNING_IF_FALSE; } boolean result = deepEqual( arguments[0].iterate(), arguments[1].iterate(), comparer, context, flag); return BooleanValue.get(result); } /** * Determine when two sequences are deep-equal * * @param op1 the first sequence * @param op2 the second sequence * @param comparer the comparer to be used * @param context the XPathContext item * @param flags bit-significant integer giving comparison options. Always zero for standard * F+O deep-equals comparison. * @return true if the sequences are deep-equal * @throws XPathException if either sequence contains a function item */ public static boolean deepEqual(SequenceIterator op1, SequenceIterator op2, AtomicComparer comparer, XPathContext context, int flags) throws XPathException { boolean result = true; String reason = null; ErrorReporter reporter = context.getErrorReporter(); try { if ((flags & JOIN_ADJACENT_TEXT_NODES) != 0) { op1 = mergeAdjacentTextNodes(op1); op2 = mergeAdjacentTextNodes(op2); } int pos1 = 0; int pos2 = 0; while (true) { Item item1 = op1.next(); Item item2 = op2.next(); if (item1 == null && item2 == null) { break; } pos1++; pos2++; if (item1 == null || item2 == null) { result = false; if (item1 == null) { reason = "Second sequence is longer (first sequence length = " + pos2 + ")"; } else { reason = "First sequence is longer (second sequence length = " + pos1 + ")"; } if (item1 instanceof WhitespaceTextImpl || item2 instanceof WhitespaceTextImpl) { reason += " (the first extra node is whitespace text)"; } break; } if (item1 instanceof FunctionItem || item2 instanceof FunctionItem) { if (!(item1 instanceof FunctionItem && item2 instanceof FunctionItem)) { reason = "if one item is a function then both must be functions (position " + pos1 + ")"; return false; } // two maps or arrays can be deep-equal boolean fe = ((FunctionItem) item1).deepEquals((FunctionItem) item2, context, comparer, flags); if (!fe) { result = false; reason = "functions at position " + pos1 + " differ"; break; } continue; } if (item1 instanceof ObjectValue || item2 instanceof ObjectValue) { if (!item1.equals(item2)) { return false; } continue; } if (item1 instanceof NodeInfo) { if (item2 instanceof NodeInfo) { String message = deepEquals((NodeInfo) item1, (NodeInfo) item2, comparer, context, flags); if (message != null) { result = false; reason = "nodes at position " + pos1 + " differ: " + message; break; } } else { result = false; reason = "comparing a node to an atomic value at position " + pos1; break; } } else { if (item2 instanceof NodeInfo) { result = false; reason = "comparing an atomic value to a node at position " + pos1; break; } else { AtomicValue av1 = (AtomicValue) item1; AtomicValue av2 = (AtomicValue) item2; if (av1.isNaN() && av2.isNaN()) { // treat as equal, no action } else if (!comparer.comparesEqual(av1, av2)) { result = false; reason = "atomic values at position " + pos1 + " differ"; break; } } } } // end while } catch (UncheckedXPathException uxe) { throw uxe.getXPathException(); } catch (ClassCastException err) { // this will happen if the sequences contain non-comparable values // comparison errors are masked //err.printStackTrace(); result = false; reason = "sequences contain non-comparable values"; } if (!result) { explain(reporter, reason, flags, null, null); // config.getErrorReporter().warning( // new XPathException("deep-equal(): " + reason) // ); } return result; } /* * Determine whether two nodes are deep-equal * @return null if they are deep equal, or an explanation of the reason if not */ public static String deepEquals(NodeInfo n1, NodeInfo n2, AtomicComparer comparer, XPathContext context, int flags) throws XPathException { // shortcut: a node is always deep-equal to itself if (n1.equals(n2)) { return null; } ErrorReporter reporter = context.getErrorReporter(); if (n1.getNodeKind() != n2.getNodeKind()) { String reason = "node kinds differ: comparing " + showKind(n1) + " to " + showKind(n2); explain(reporter, reason, flags, n1, n2); return reason; } switch (n1.getNodeKind()) { case Type.ELEMENT: if (!Navigator.haveSameName(n1, n2)) { final String reason = "element names differ: " + NameOfNode.makeName(n1).getStructuredQName().getEQName() + " != " + NameOfNode.makeName(n2).getStructuredQName().getEQName(); explain(reporter, reason, flags, n1, n2); return reason; } if (((flags & INCLUDE_PREFIXES) != 0) && !n1.getPrefix().equals(n2.getPrefix())) { final String reason = "element prefixes differ: " + n1.getPrefix() + " != " + n2.getPrefix(); explain(reporter, reason, flags, n1, n2); return reason; } AxisIterator a1 = n1.iterateAxis(AxisInfo.ATTRIBUTE); AxisIterator a2 = n2.iterateAxis(AxisInfo.ATTRIBUTE); if (!SequenceTool.sameLength(a1, a2)) { final String reason = "elements have different number of attributes"; explain(reporter, reason, flags, n1, n2); return reason; } NodeInfo att1; a1 = n1.iterateAxis(AxisInfo.ATTRIBUTE); while ((att1 = a1.next()) != null) { AxisIterator a2iter = n2.iterateAxis(AxisInfo.ATTRIBUTE, new SameNameTest(att1)); NodeInfo att2 = a2iter.next(); if (att2 == null) { final String reason = "one element has an attribute " + NameOfNode.makeName(att1).getStructuredQName().getEQName() + ", the other does not"; explain(reporter, reason, flags, n1, n2); return reason; } String attReason = deepEquals(att1, att2, comparer, context, flags); if (attReason != null) { final String reason = "elements have different values for the attribute " + NameOfNode.makeName(att1).getStructuredQName().getEQName() + " - " + attReason; explain(reporter, reason, flags, n1, n2); return reason; } } if ((flags & INCLUDE_NAMESPACES) != 0) { NamespaceMap nm1 = n1.getAllNamespaces(); NamespaceMap nm2 = n2.getAllNamespaces(); if (!nm1.equals(nm2)) { final String reason = "elements have different in-scope namespaces: " + nm1 + " versus " + nm2; explain(reporter, reason, flags, n1, n2); return reason; } } if ((flags & COMPARE_ANNOTATIONS) != 0) { if (!n1.getSchemaType().equals(n2.getSchemaType())) { final String reason = "elements have different type annotation"; explain(reporter, reason, flags, n1, n2); return reason; } } if ((flags & EXCLUDE_VARIETY) == 0) { if (n1.getSchemaType().isComplexType() != n2.getSchemaType().isComplexType()) { final String reason = "one element has complex type, the other simple"; explain(reporter, reason, flags, n1, n2); return reason; } if (n1.getSchemaType().isComplexType()) { ComplexVariety variety1 = ((ComplexType) n1.getSchemaType()).getVariety(); ComplexVariety variety2 = ((ComplexType) n2.getSchemaType()).getVariety(); if (variety1 != variety2) { final String reason = "both elements have complex type, but a different variety"; explain(reporter, reason, flags, n1, n2); return reason; } } } if ((flags & COMPARE_STRING_VALUES) == 0) { final SchemaType type1 = n1.getSchemaType(); final SchemaType type2 = n2.getSchemaType(); final boolean isSimple1 = type1.isSimpleType() || ((ComplexType) type1).isSimpleContent(); final boolean isSimple2 = type2.isSimpleType() || ((ComplexType) type2).isSimpleContent(); if (isSimple1 != isSimple2) { final String reason = "one element has a simple type, the other does not"; explain(reporter, reason, flags, n1, n2); return reason; } if (isSimple1) { assert isSimple2; final AtomicIterator v1 = n1.atomize().iterate(); final AtomicIterator v2 = n2.atomize().iterate(); boolean typedValueComparison = deepEqual(v1, v2, comparer, context, flags); return typedValueComparison ? null : "typed values of elements differ"; } } if ((flags & COMPARE_ID_FLAGS) != 0) { if (n1.isId() != n2.isId()) { final String reason = "one element is an ID, the other is not"; explain(reporter, reason, flags, n1, n2); return reason; } if (n1.isIdref() != n2.isIdref()) { final String reason = "one element is an IDREF, the other is not"; explain(reporter, reason, flags, n1, n2); return reason; } } CSharp.emitCode("goto case Saxon.Hej.type.Type.DOCUMENT;"); // fall through case Type.DOCUMENT: AxisIterator c1 = n1.iterateAxis(AxisInfo.CHILD); AxisIterator c2 = n2.iterateAxis(AxisInfo.CHILD); while (true) { NodeInfo d1 = c1.next(); while (d1 != null && isIgnorable(d1, flags)) { d1 = c1.next(); } NodeInfo d2 = c2.next(); while (d2 != null && isIgnorable(d2, flags)) { d2 = c2.next(); } if (d1 == null || d2 == null) { boolean r = d1 == d2; if (!r) { String message = "the first operand contains a node with " + (d1 == null ? "fewer" : "more") + " children than the second"; if (d1 instanceof WhitespaceTextImpl || d2 instanceof WhitespaceTextImpl) { message += " (the first extra child is whitespace text)"; } explain(reporter, message, flags, n1, n2); return message; } return null; } String recursiveResult = deepEquals(d1, d2, comparer, context, flags); if (recursiveResult != null) { return recursiveResult; } } case Type.ATTRIBUTE: if (!Navigator.haveSameName(n1, n2)) { final String reason = "attribute names differ: " + NameOfNode.makeName(n1).getStructuredQName().getEQName() + " != " + NameOfNode.makeName(n1).getStructuredQName().getEQName(); explain(reporter, reason, flags, n1, n2); return reason; } if (((flags & INCLUDE_PREFIXES) != 0) && !n1.getPrefix().equals(n2.getPrefix())) { final String reason = "attribute prefixes differ: " + n1.getPrefix() + " != " + n2.getPrefix(); explain(reporter, reason, flags, n1, n2); return reason; } if ((flags & COMPARE_ANNOTATIONS) != 0) { if (!n1.getSchemaType().equals(n2.getSchemaType())) { final String reason = "attributes have different type annotations"; explain(reporter, reason, flags, n1, n2); return reason; } } boolean ar; if ((flags & COMPARE_STRING_VALUES) == 0) { ar = deepEqual(n1.atomize().iterate(), n2.atomize().iterate(), comparer, context, 0); } else { ar = comparer.comparesEqual(new StringValue(n1.getUnicodeStringValue()), new StringValue(n2.getUnicodeStringValue())); } if (!ar) { final String reason = "attribute values differ"; explain(reporter, reason, flags, n1, n2); return reason; } if ((flags & COMPARE_ID_FLAGS) != 0) { if (n1.isId() != n2.isId()) { final String reason = "one attribute is an ID, the other is not"; explain(reporter, reason, flags, n1, n2); return reason; } if (n1.isIdref() != n2.isIdref()) { final String reason = "one attribute is an IDREF, the other is not"; explain(reporter, reason, flags, n1, n2); return reason; } } return null; case Type.PROCESSING_INSTRUCTION: case Type.NAMESPACE: if (!n1.getLocalPart().equals(n2.getLocalPart())) { final String reason = Type.displayTypeName(n1) + " names differ"; explain(reporter, reason, flags, n1, n2); return reason; } CSharp.emitCode("goto case Saxon.Hej.type.Type.TEXT;"); // drop through case Type.TEXT: case Type.COMMENT: boolean vr = comparer.comparesEqual((AtomicValue) n1.atomize(), (AtomicValue) n2.atomize()); if (!vr) { if ((flags & WARNING_IF_FALSE) != 0) { String v1 = n1.getStringValue(); String v2 = n2.getStringValue(); String message = ""; if (v1.length() != v2.length()) { message = "lengths (" + v1.length() + "," + v2.length() + ")"; } if (v1.length() < 10 && v2.length() < 10) { message = " (\"" + v1 + "\" vs \"" + v2 + "\")"; } else { int min = Math.min(v1.length(), v2.length()); if (v1.substring(0, min).equals(v2.substring(0, min))) { message += " different at char " + min + "(\"" + StringTool.diagnosticDisplay((v1.length() > v2.length() ? v1 : v2).substring(min)) + "\")"; } else if (v1.charAt(0) != v2.charAt(0)) { message += " different at start " + "(\"" + v1.substring(0, Math.min(v1.length(), 10)) + "\", \"" + v2.substring(0, Math.min(v2.length(), 10)) + "\")"; } else { for (int i = 1; i < min; i++) { if (!v1.substring(0, i).equals(v2.substring(0, i))) { message += " different at char " + (i - 1) + "(\"" + v1.substring(i - 1, Math.min(v1.length(), i + 10)) + "\", \"" + v2.substring(i - 1, Math.min(v2.length(), i + 10)) + "\")"; break; } } } } explain(reporter, Type.displayTypeName(n1) + " values differ (" + Navigator.getPath(n1) + ", " + Navigator.getPath(n2) + "): " + message, flags, n1, n2); return message; } else { return "atomized values differ"; } } return null; default: throw new IllegalArgumentException("Unknown node type"); } } private static boolean isIgnorable(NodeInfo node, int flags) { final int kind = node.getNodeKind(); if (kind == Type.COMMENT) { return (flags & INCLUDE_COMMENTS) == 0; } else if (kind == Type.PROCESSING_INSTRUCTION) { return (flags & INCLUDE_PROCESSING_INSTRUCTIONS) == 0; } else if (kind == Type.TEXT) { return ((flags & EXCLUDE_WHITESPACE_TEXT_NODES) != 0) && Whitespace.isAllWhite(node.getUnicodeStringValue()); } return false; } private static void explain(ErrorReporter reporter, String message, int flags, NodeInfo n1, NodeInfo n2) { if ((flags & WARNING_IF_FALSE) != 0) { reporter.report(new XmlProcessingIncident("deep-equal() " + (n1 != null && n2 != null ? "comparing " + Navigator.getPath(n1) + " to " + Navigator.getPath(n2) + ": " : ": ") + message).asWarning()); } } private static String showKind(Item item) { if (item instanceof NodeInfo && ((NodeInfo) item).getNodeKind() == Type.TEXT && Whitespace.isAllWhite(item.getUnicodeStringValue())) { return "whitespace text() node"; } else { return Type.displayTypeName(item); } } private static String showNamespaces(HashSet bindings) { StringBuilder sb = new StringBuilder(256); for (NamespaceBinding binding : bindings) { sb.append(binding.getPrefix()); sb.append("="); sb.append(binding.getNamespaceUri()); sb.append(" "); } sb.setLength(sb.length() - 1); return sb.toString(); } private static SequenceIterator mergeAdjacentTextNodes(SequenceIterator in) throws XPathException { List items = new ArrayList<>(20); boolean prevIsText = false; UnicodeBuilder textBuffer = new UnicodeBuilder(); while (true) { Item next = in.next(); if (next == null) { break; } if (next instanceof NodeInfo && ((NodeInfo) next).getNodeKind() == Type.TEXT) { textBuffer.accept(next.getUnicodeStringValue()); prevIsText = true; } else { if (prevIsText) { Orphan textNode = new Orphan(null); textNode.setNodeKind(Type.TEXT); textNode.setStringValue(textBuffer.toUnicodeString()); items.add(textNode); textBuffer.clear(); } prevIsText = false; items.add(next); } } if (prevIsText) { Orphan textNode = new Orphan(null); textNode.setNodeKind(Type.TEXT); textNode.setStringValue(textBuffer.toUnicodeString()); items.add(textNode); } return new ListIterator.Of<>(items); } // /** // * Execute a dynamic call to the function // * // * @param context the dynamic evaluation context // * @param arguments the values of the arguments, supplied as Sequences. // * @return the result of the evaluation, in the form of a Sequence. It is the responsibility // * of the callee to ensure that the type of result conforms to the expected result type. // * @throws XPathException (should not happen) // */ // // @Override // public BooleanValue call(XPathContext context, Sequence[] arguments) throws XPathException { // GenericAtomicComparer comparer = new GenericAtomicComparer(getStringCollator(), context); // boolean b = deepEqual(arguments[0].iterate(), arguments[1].iterate(), comparer, context, 0); // return BooleanValue.get(b); // } @Override public String getStreamerName() { return "DeepEqual"; } } // Copyright (c) 2009-2023 Saxonica Limited




© 2015 - 2025 Weber Informatics LLC | Privacy Policy