
jcmp.DocCompare Maven / Gradle / Ivy
package jcmp;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.HashSet;
import jiff.JsonDiff;
import jiff.AbstractFieldFilter;
/**
* Compares two documents json documents, and builds an list of all changes
*
* Containers (objects and arrays) are compared recursively. The comparison
* algorithm works like this:
*
* Objects: A field-by-field comparison is done. If a field exists in the first
* document but not in the second, that field is removed. If a field exists in
* the second document but not the first, that field is added. If a field exists
* in both documents with different values, that field is modified.
*
* Arrays: There are two possible algorithms to compare arrays. If array
* elements contain a unique identifier (which is defined by the caller), then
* array elements of the first and the second document are matched using the
* unique identifiers of array elements. Then each matching array element is
* compared to generate the detailed difference. If array elements don't have
* unique identifiers, then each element of the first array is compared to each
* element of the second array, and the elements with minimal number of changes
* are associated. Elements that are too different from each other are not
* associated.
*
* Differences:
*
* An Addition denotes a new field or array element. Addition.field1 is null,
* meaning the field does not exist in document1, and Addition.field2 denotes
* the new field, or array element.
*
* A Removal denotes a removed field or array element. Removal.field1 denotes
* the element in document1, and Removal.field2 is null.
*
* A Modification denotes a content modification of a field, or array element.
* Both field1 and field2 are non-null, and set to the name of the modified
* field.
*
* A Move denotes an array element move. field1 denotes the old index of the
* array element, and field2 denotes the new index.
*
* If new elements are added to an array, or existing elements are removed, the
* addition and removal appear as diff, and any node that shifted during the
* operation appears within a Move.
*/
public abstract class DocCompare {
/**
* Thrown if there is an array whose elements contain identities, but for at
* least one element identity cannot be retrieved
*/
public static final class InvalidArrayIdentity extends Exception {
public InvalidArrayIdentity(List p) {
this(JsonDiff.toString(p));
}
public InvalidArrayIdentity(String p) {
super(p);
}
}
/**
* Thrown if there is an array whose elements contain identities, but they
* are not unique
*/
public static final class DuplicateArrayIdentity extends Exception {
public DuplicateArrayIdentity(List p) {
this(JsonDiff.toString(p));
}
public DuplicateArrayIdentity(String p) {
super(p);
}
}
/**
* The array element identity extractor interface.
*/
public interface IdentityExtractor {
/**
* It should return an identity object from the given array element. The
* returned object should implement equals and hashCode methods.
*/
Object getIdentity(T element);
}
/**
* Contains the edit script, and number of changed and unchanged fields. The
* number of changed fields does not include array element moves, it only
* includes additions, removals, and modifications.
*/
public static class Difference {
private final List> delta;
private int numUnchangedFields;
private int numChangedFields;
/**
* Default ctor, sets numUnchangedFields to zero, initializes an empty
* list
*/
public Difference() {
delta = new ArrayList<>();
numUnchangedFields = 0;
}
public Difference(List> delta) {
this.delta = delta;
for (Delta d : this.delta) {
if (d instanceof Addition
|| d instanceof Removal
|| d instanceof Modification) {
numChangedFields++;
}
}
}
/**
* Constructs a difference with one modification, no unchanged fields
*/
public Difference(Delta d) {
this(new ArrayList>(1));
add(d);
}
/**
* Constructs a Difference denoting no difference
*/
public Difference(int numFields) {
delta = new ArrayList<>();
numUnchangedFields = numFields;
}
/**
* Returns the number of unmodified fields
*/
public int getNumUnchangedFields() {
return numUnchangedFields;
}
/**
* Returns the number of modified fields, excluding array element moves
*/
public int getNumChangedFields() {
return numChangedFields;
}
/**
* Returns the list of changes
*/
public List> getDelta() {
return delta;
}
public void add(Difference diff) {
delta.addAll(diff.delta);
numUnchangedFields += diff.numUnchangedFields;
numChangedFields += diff.numChangedFields;
}
public void add(Delta d) {
delta.add(d);
if (d instanceof Addition
|| d instanceof Removal
|| d instanceof Modification) {
numChangedFields++;
}
}
/**
* A numeric value between 0 and 1 denoting how much an object is
* changed. 0 means no change, 1 means everything is changed.
*/
public double getChangeAmount() {
double d = numChangedFields + numUnchangedFields;
return d == 0 ? 0 : numChangedFields / d;
}
/**
* Returns true if there are not changes
*/
public boolean same() {
return delta.isEmpty();
}
@Override
public String toString() {
StringBuilder bld = new StringBuilder();
for (Delta x : delta) {
bld.append(x.toString()).append('\n');
}
return bld.toString();
}
}
/**
* Base class for a delta
*/
public static abstract class Delta {
protected final String field1;
protected final String field2;
public Delta(String field1, String field2) {
this.field1 = field1;
this.field2 = field2;
}
public Delta(List field1, List field2) {
this(field1 == null ? null : JsonDiff.toString(field1), field2 == null ? null : JsonDiff.toString(field2));
}
public String getField1() {
return field1;
}
public String getField2() {
return field2;
}
/**
* Return the non-null field, or field1 if both are non-null
*/
public String getField() {
return field1 == null ? field2 : field1;
}
}
/**
* Denotes an addition of a field or array element that isn't present in
* doc1
*/
public static class Addition extends Delta {
private final T addedNode;
public Addition(String field2, T addedNode) {
super(null, field2);
this.addedNode = addedNode;
}
public Addition(List field2, T addedNode) {
super(null, field2);
this.addedNode = addedNode;
}
public T getAddedNode() {
return addedNode;
}
@Override
public String toString() {
return "+ " + field2 + ":" + addedNode;
}
}
/**
* Denotes a removal of a field or array element that is present in doc1 but
* not in doc2
*/
public static class Removal extends Delta {
private final T removedNode;
public Removal(String field1, T removedNode) {
super(field1, null);
this.removedNode = removedNode;
}
public Removal(List field1, T removedNode) {
super(field1, null);
this.removedNode = removedNode;
}
public T getRemovedNode() {
return removedNode;
}
@Override
public String toString() {
return "- " + field1 + ":" + removedNode;
}
}
/**
* Denotes an array element move from one element index to another
*/
public static class Move extends Delta {
private final T movedNode;
public Move(String field1, String field2, T movedNode) {
super(field1, field2);
this.movedNode = movedNode;
}
public Move(List field1, List field2, T movedNode) {
super(field1, field2);
this.movedNode = movedNode;
}
public T getMovedNode() {
return movedNode;
}
@Override
public String toString() {
return "* " + field1 + "->" + field2 + ":" + movedNode;
}
}
/**
* Denotes a field modification
*/
public static class Modification extends Delta {
private final T node1;
private final T node2;
public Modification(String field1, T node1, String field2, T node2) {
super(field1, field2);
this.node1 = node1;
this.node2 = node2;
}
public Modification(List field1, T node1, List field2, T node2) {
super(field1, field2);
this.node1 = node1;
this.node2 = node2;
}
public T getUnmodifiedNode() {
return node1;
}
public T getModifiedNode() {
return node2;
}
@Override
public String toString() {
return "* " + field1 + "->" + field2 + ":" + node1 + " -> " + node2;
}
}
/**
* Contains a list of fields contained in array elements that uniquely
* identify array elements
*/
public static class ArrayIdentityFields {
private String[] fields;
public ArrayIdentityFields(String... fields) {
this.fields = fields;
}
public String[] getFields() {
return fields;
}
}
/**
* Default array identity object. Contains the identity values, computes
* hashcode from them
*/
public static class DefaultIdentity {
private final T[] nodes;
private Integer hcode;
public DefaultIdentity(T[] nodes) {
this.nodes = nodes;
}
@Override
public int hashCode() {
if (hcode == null) {
int code = 0;
for (int i = 0; i < nodes.length; i++) {
if (nodes[i] != null) {
code += nodes[i].hashCode();
}
}
hcode = code;
}
return hcode;
}
@Override
public boolean equals(Object x) {
try {
DefaultIdentity d = (DefaultIdentity) x;
for (int i = 0; i < nodes.length; i++) {
if (!d.nodes[i].equals(nodes[i])) {
return false;
}
}
} catch (Exception e) {
return false;
}
return true;
}
}
private final Map arrayIdentities = new HashMap<>();
protected abstract boolean isValue(BaseType value);
protected abstract boolean isArray(BaseType value);
protected abstract boolean isObject(BaseType value);
protected abstract boolean isNull(BaseType value);
protected abstract ValueType asValue(BaseType value);
protected abstract ArrayType asArray(BaseType value);
protected abstract ObjectType asObject(BaseType value);
protected abstract boolean equals(ValueType v1, ValueType v2);
protected abstract Iterator> getFields(ObjectType o);
protected abstract boolean hasField(ObjectType value, String field);
protected abstract BaseType getField(ObjectType value, String field);
protected abstract IdentityExtractor getArrayIdentityExtractorImpl(ArrayIdentityFields fields);
protected abstract BaseType getElement(ArrayType value, int index);
protected abstract int size(ArrayType value);
/**
* Adds a group of fields that can uniquely identify array elements for
* object arrays
*
* @param array The name of the array field
* @param identities The fields of the array element that can identiy an
* element
*
* In the following document:
* {
* ...
* "aField": [
* { "_id":1,"field":...},
* { "_id":2,"field":...}
* ]
* }
*
* the call looks like
*
* jsonCompare.addArrayIdentity(new Path("aField"),new Path("_id"));
*
If there are more than one fields that uniquely identify an
* eleent, list those in the argument list.
*/
public void addArrayIdentity(String array, String... identities) {
arrayIdentities.put(array, new ArrayIdentityFields(identities));
}
/**
* Compares two documents and returns the difference
*/
public Difference compareNodes(BaseType node1, BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
return compareNodes(new ArrayList(), node1, new ArrayList(), node2);
}
public Difference compareNodes(List field1,
BaseType node1,
List field2,
BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
if (isValue(node1) && isValue(node2)) {
if (!equals(asValue(node1), asValue(node2))) {
return new Difference(new Modification(field1, node1, field2, node2));
}
} else if (isArray(node1) && isArray(node2)) {
return compareArrays(field1, asArray(node1), field2, asArray(node2));
} else if (isObject(node1) && isObject(node2)) {
return compareObjects(field1, asObject(node1), field2, asObject(node2));
} else if (!(isNull(node1) && isNull(node2))) {
return new Difference<>(new Modification(field1, node1, field2, node2));
}
return new Difference<>(1);
}
/**
* Compares two object nodes recursively and returns the differences
*/
public Difference compareObjects(List field1,
ObjectType node1,
List field2,
ObjectType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference ret = new Difference<>();
// Field by field comparison of obj1 to obj2.
for (Iterator> fields = getFields(node1); fields.hasNext();) {
Map.Entry field = fields.next();
String fieldName = field.getKey();
field1.add(fieldName);
BaseType value1 = field.getValue();
if (hasField(node2, fieldName)) {
// If both obj1 and obj2 have the same field, compare recursively
field2.add(fieldName);
BaseType value2 = getField(node2, fieldName);
ret.add(compareNodes(field1, value1, field2, value2));
pop(field2);
} else {
// obj1.field1 exists, obj2.field1 does not, so it is removed
ret.add(new Removal(field1, value1));
}
pop(field1);
}
// Now compare any new nodes added to obj2
for (Iterator> fields = getFields(node2); fields.hasNext();) {
Map.Entry field = fields.next();
String fieldName = field.getKey();
if (!hasField(node1, fieldName)) {
field2.add(fieldName);
ret.add(new Addition(field2, field.getValue()));
pop(field2);
}
}
return ret;
}
public IdentityExtractor getArrayIdentityExtractor(String arrayField) {
return getArrayIdentityExtractor(AbstractFieldFilter.parse(arrayField));
}
public IdentityExtractor getArrayIdentityExtractor(List arrayField) {
List p = new ArrayList<>();
int n = arrayField.size();
for (int i = 0; i < n; i++) {
String s = arrayField.get(i);
if ("*".equals(s) || isIndex(s)) {
p.add("*");
} else {
p.add(s);
}
}
ArrayIdentityFields fields = arrayIdentities.get(JsonDiff.toString(p));
if (fields != null) {
return getArrayIdentityExtractorImpl(fields);
} else {
return null;
}
}
private boolean isIndex(String s) {
try {
Integer.valueOf(s);
return true;
} catch (Exception e) {
return false;
}
}
public Difference compareArrays(List field1,
ArrayType node1,
List field2,
ArrayType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
IdentityExtractor ext = getArrayIdentityExtractor(field1);
if (ext == null) {
return compareArraysNoId(field1, node1, field2, node2);
} else {
return compareArraysWithId(field1, node1, field2, node2, ext);
}
}
/**
* Computes difference between arrays whose elements can be identitied by a
* unique identifier
*/
public Difference compareArraysWithId(List field1,
ArrayType node1,
List field2,
ArrayType node2,
IdentityExtractor idex)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference ret = new Difference<>();
// Build a map of identity -> index for both arrays
final Map
© 2015 - 2025 Weber Informatics LLC | Privacy Policy