
com.redhat.lightblue.util.DocComparator Maven / Gradle / Ivy
/*
Copyright 2013 Red Hat, Inc. and/or its affiliates.
This file is part of lightblue.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
package com.redhat.lightblue.util;
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.HashSet;
/**
* Compares two documents represented as an object tree, and builds an list of
* all changes
*
* Containers (objects and arrays) are compared recursively. The comparison
* algorithm works like this:
*
* Objects: A field-by-field comparison is done. If a field exists in the first
* document but not in the second, that field is removed. If a field exists in
* the second document but not the first, that field is added. If a field exists
* in both documents with different values, that field is modified.
*
* Arrays: There are two possible algorithms to compare arrays. If array
* elements contain a unique identifier (which is defined by the caller), then
* array elelements of the first and the second document are matched using the
* unique identifiers of array elements. Then each matching array element is
* compared to generate the detailed difference. If array elements don't have
* unique identifiers, then each element of the first array is compared to each
* element of the second array, and the elements with minimal number of changes
* are associated. Elements that are too different from each other are not
* associated.
*
* Differences:
*
* An Addition denotes a new field or array element. Addition.field1 is null,
* meaning the field does not exist in document1, and Addition.field2 denotes
* the new field, or array element.
*
* A Removal denotes a removed field or array element. Removal.field1 denotes
* the element in document1, and Removal.field2 is null.
*
* A Modification denotes a content modification of a field, or array element.
* Both field1 and field2 are non-null, and set to the name of the modified
* field.
*
* A Move denotes an array element move. field1 denotes the old index of the
* array element, and field2 denotes the new index.
*
* If new elements are added to an array, or existing elements are removed, the
* addition and removal appear as diff, and any node that shifted during the
* operation appears within a Move.
*/
public abstract class DocComparator {
/**
* Thrown if there is an array whose elements contain identities, but for at
* least one element identity cannot be retrieved
*/
public static final class InvalidArrayIdentity extends Exception {
public InvalidArrayIdentity(Path p) {
super(p.toString());
}
}
/**
* Thrown if there is an array whose elements contain identities, but they
* are not unique
*/
public static final class DuplicateArrayIdentity extends Exception {
public DuplicateArrayIdentity(Path p) {
super(p.toString());
}
}
/**
* The array element identity extractor interface.
*/
public interface IdentityExtractor {
/**
* It should return an identity object from the given array element. The
* returned object should implement equals and hashCode methods.
*/
Object getIdentity(T element);
}
/**
* Contains the edit script, and number of changed and unchanged fields. The
* number of changed fields does not include array element moves, it only
* includes additions, removals, and modifications.
*/
public static class Difference {
private final List> delta;
private int numUnchangedFields;
private int numChangedFields;
/**
* Default ctor, sets numUnchangedFields to zero, initializes an empty
* list
*/
public Difference() {
delta = new ArrayList<>();
numUnchangedFields = 0;
}
public Difference(List> delta) {
this.delta = delta;
for (Delta d : this.delta) {
if (d instanceof Addition
|| d instanceof Removal
|| d instanceof Modification) {
numChangedFields++;
}
}
}
/**
* Constructs a difference with one modification, no unchanged fields
*/
public Difference(Delta d) {
this(new ArrayList>(1));
add(d);
}
/**
* Constructs a Difference denoting no difference
*/
public Difference(int numFields) {
delta = new ArrayList<>();
numUnchangedFields = numFields;
}
/**
* Returns the number of unmodified fields
*/
public int getNumUnchangedFields() {
return numUnchangedFields;
}
/**
* Returns the number of modified fields, excluding array element moves
*/
public int getNumChangedFields() {
return numChangedFields;
}
/**
* Returns the list of changes
*/
public List> getDelta() {
return delta;
}
public void add(Difference diff) {
delta.addAll(diff.delta);
numUnchangedFields += diff.numUnchangedFields;
numChangedFields += diff.numChangedFields;
}
public void add(Delta d) {
delta.add(d);
if (d instanceof Addition
|| d instanceof Removal
|| d instanceof Modification) {
numChangedFields++;
}
}
/**
* A numeric value between 0 and 1 denoting how much an object is
* changed. 0 means no change, 1 means everything is changed.
*/
public double getChangeAmount() {
double d = numChangedFields + numUnchangedFields;
return d == 0 ? 0 : numChangedFields / d;
}
/**
* Returns true if there are not changes
*/
public boolean same() {
return delta.isEmpty();
}
@Override
public String toString() {
StringBuilder bld = new StringBuilder();
for (Delta x : delta) {
bld.append(x.toString()).append('\n');
}
return bld.toString();
}
}
/**
* Base class for a delta
*/
public static abstract class Delta {
protected final Path field1;
protected final Path field2;
public Delta(Path field1, Path field2) {
this.field1 = field1 == null ? null : field1.immutableCopy();
this.field2 = field2 == null ? null : field2.immutableCopy();
}
public Path getField1() {
return field1;
}
public Path getField2() {
return field2;
}
/**
* Return the non-null field, or field1 if both are non-null
*/
public Path getField() {
return field1 == null ? field2 : field1;
}
}
/**
* Denotes an addition of a field or array element that isn't present in
* doc1
*/
public static class Addition extends Delta {
private final T addedNode;
public Addition(Path field2, T addedNode) {
super(null, field2);
this.addedNode = addedNode;
}
public T getAddedNode() {
return addedNode;
}
@Override
public String toString() {
return "+ " + field2 + ":" + addedNode;
}
}
/**
* Denotes a removal of a field or array element that is present in doc1 but
* not in doc2
*/
public static class Removal extends Delta {
private final T removedNode;
public Removal(Path field1, T removedNode) {
super(field1, null);
this.removedNode = removedNode;
}
public T getRemovedNode() {
return removedNode;
}
@Override
public String toString() {
return "- " + field1 + ":" + removedNode;
}
}
/**
* Denotes an array element move from one element index to another
*/
public static class Move extends Delta {
private final T movedNode;
public Move(Path field1, Path field2, T movedNode) {
super(field1, field2);
this.movedNode = movedNode;
}
public T getMovedNode() {
return movedNode;
}
@Override
public String toString() {
return "* " + field1 + "->" + field2 + ":" + movedNode;
}
}
/**
* Denotes a field modification
*/
public static class Modification extends Delta {
private final T node1;
private final T node2;
public Modification(Path field1, T node1, Path field2, T node2) {
super(field1, field2);
this.node1 = node1;
this.node2 = node2;
}
public T getUnmodifiedNode() {
return node1;
}
public T getModifiedNode() {
return node2;
}
@Override
public String toString() {
return "* " + field1 + "->" + field2 + ":" + node1 + " -> " + node2;
}
}
/**
* Contains a list of fields contained in array elements that uniquely
* identify array elements
*/
public static class ArrayIdentityFields {
private Path[] fields;
public ArrayIdentityFields(Path... fields) {
this.fields = fields;
}
public Path[] getFields() {
return fields;
}
@Override
public String toString() {
return Arrays.toString(fields);
}
}
/**
* Default array identity object. Contains the identity values, computes
* hashcode from them
*/
public class DefaultIdentity {
private final BaseType[] nodes;
private Integer hcode;
public DefaultIdentity(BaseType[] nodes) {
this.nodes = nodes;
}
@Override
public int hashCode() {
if (hcode == null) {
int code = 0;
for (int i = 0; i < nodes.length; i++) {
if (nodes[i] != null) {
code += nodes[i].hashCode();
}
}
hcode = code;
}
return hcode;
}
@Override
public boolean equals(Object x) {
try {
DefaultIdentity d = (DefaultIdentity) x;
for (int i = 0; i < nodes.length; i++) {
if (!DocComparator.this.equals(asValue(d.nodes[i]), asValue(nodes[i]))) {
return false;
}
}
} catch (Exception e) {
return false;
}
return true;
}
}
private final Map arrayIdentities = new HashMap<>();
protected abstract boolean isValue(BaseType value);
protected abstract boolean isArray(BaseType value);
protected abstract boolean isObject(BaseType value);
protected abstract boolean isNull(BaseType value);
protected abstract ValueType asValue(BaseType value);
protected abstract ArrayType asArray(BaseType value);
protected abstract ObjectType asObject(BaseType value);
protected abstract boolean equals(ValueType v1, ValueType v2);
protected abstract Iterator> getFields(ObjectType o);
protected abstract boolean hasField(ObjectType value, String field);
protected abstract BaseType getField(ObjectType value, String field);
protected abstract IdentityExtractor getArrayIdentityExtractorImpl(ArrayIdentityFields fields);
protected abstract BaseType getElement(ArrayType value, int index);
protected abstract int size(ArrayType value);
/**
* Adds a group of fields that can uniquely identify array elements for
* object arrays
*
* @param array The name of the array field
* @param identities The fields of the array element that can identiy an
* element
*
* In the following document:
* {
* ...
* "aField": [
* { "_id":1,"field":...},
* { "_id":2,"field":...}
* ]
* }
*
* the call looks like
*
* jsonCompare.addArrayIdentity(new Path("aField"),new Path("_id"));
*
If there are more than one fields that uniquely identify an
* eleent, list those in the argument list.
*/
public void addArrayIdentity(Path array, Path... identities) {
arrayIdentities.put(array, new ArrayIdentityFields(identities));
}
public Map getArrayIdentities() {
return arrayIdentities;
}
/**
* Compares two documents and returns the difference
*/
public Difference compareNodes(BaseType node1, BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
return compareNodes(new MutablePath(), node1, new MutablePath(), node2);
}
public Difference compareNodes(MutablePath field1,
BaseType node1,
MutablePath field2,
BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
if (isValue(node1) && isValue(node2)) {
if (!equals(asValue(node1), asValue(node2))) {
return new Difference(new Modification(field1, node1, field2, node2));
}
} else if (isArray(node1) && isArray(node2)) {
return compareArrays(field1, asArray(node1), field2, asArray(node2));
} else if (isObject(node1) && isObject(node2)) {
return compareObjects(field1, asObject(node1), field2, asObject(node2));
} else if (!(isNull(node1) && isNull(node2))) {
return new Difference<>(new Modification(field1, node1, field2, node2));
}
return new Difference<>(1);
}
/**
* Compares two object nodes recursively and returns the differences
*/
public Difference compareObjects(MutablePath field1,
ObjectType node1,
MutablePath field2,
ObjectType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference ret = new Difference<>();
// Field by field comparison of obj1 to obj2.
for (Iterator> fields = getFields(node1); fields.hasNext();) {
Map.Entry field = fields.next();
String fieldName = field.getKey();
field1.push(fieldName);
BaseType value1 = field.getValue();
if (hasField(node2, fieldName)) {
// If both obj1 and obj2 have the same field, compare recursively
field2.push(fieldName);
BaseType value2 = getField(node2, fieldName);
ret.add(compareNodes(field1, value1, field2, value2));
field2.pop();
} else {
// obj1.field1 exists, obj2.field1 does not, so it is removed
ret.add(new Removal(field1, value1));
}
field1.pop();
}
// Now compare any new nodes added to obj2
for (Iterator> fields = getFields(node2); fields.hasNext();) {
Map.Entry field = fields.next();
String fieldName = field.getKey();
if (!hasField(node1, fieldName)) {
field2.push(fieldName);
ret.add(new Addition(field2, field.getValue()));
field2.pop();
}
}
return ret;
}
public IdentityExtractor getArrayIdentityExtractor(Path arrayField) {
MutablePath p = new MutablePath();
int n = arrayField.numSegments();
for (int i = 0; i < n; i++) {
if (arrayField.isIndex(i)) {
p.push(Path.ANY);
} else {
p.push(arrayField.head(i));
}
}
ArrayIdentityFields fields = arrayIdentities.get(p);
if (fields != null) {
return getArrayIdentityExtractorImpl(fields);
} else {
return null;
}
}
public Difference compareArrays(MutablePath field1,
ArrayType node1,
MutablePath field2,
ArrayType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
IdentityExtractor ext = getArrayIdentityExtractor(field1);
if (ext == null) {
return compareArraysNoId(field1, node1, field2, node2);
} else {
return compareArraysWithId(field1, node1, field2, node2, ext);
}
}
/**
* Computes difference between arrays whose elements can be identitied by a
* unique identifier
*/
public Difference compareArraysWithId(MutablePath field1,
ArrayType node1,
MutablePath field2,
ArrayType node2,
IdentityExtractor idex)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference ret = new Difference<>();
// Build a map of identity -> index for both arrays
final Map
© 2015 - 2025 Weber Informatics LLC | Privacy Policy