
com.redhat.lightblue.util.DocComparator Maven / Gradle / Ivy
/*
Copyright 2013 Red Hat, Inc. and/or its affiliates.
This file is part of lightblue.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
package com.redhat.lightblue.util;
import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.HashSet;
/**
* Compares two documents represented as an object tree, and builds an
* list of all changes
*
* Containers (objects and arrays) are compared recursively. The
* comparison algorithm works like this:
*
* Objects: A field-by-field comparison is done. If a field exists in
* the first document but not in the second, that field is removed. If
* a field exists in the second document but not the first, that field
* is added. If a field exists in both documents with different
* values, that field is modified.
*
* Arrays: There are two possible algorithms to compare arrays. If
* array elements contain a unique identifier (which is defined by the
* caller), then array elelements of the first and the second document
* are matched using the unique identifiers of array elements. Then
* each matching array element is compared to generate the detailed
* difference. If array elements don't have unique identifiers, then
* each element of the first array is compared to each element of the
* second array, and the elements with minimal number of changes are
* associated. Elements that are too different from each other are not
* associated.
*
* Differences:
*
* An Addition denotes a new field or array element. Addition.field1
* is null, meaning the field does not exist in document1, and
* Addition.field2 denotes the new field, or array element.
*
* A Removal denotes a removed field or array element. Removal.field1
* denotes the element in document1, and Removal.field2 is null.
*
* A Modification denotes a content modification of a field, or array
* element. Both field1 and field2 are non-null, and set to the name
* of the modified field.
*
* A Move denotes an array element move. field1 denotes the old index
* of the array element, and field2 denotes the new index.
*
* If new elements are added to an array, or existing elements are
* removed, the addition and removal appear as diff, and any node that
* shifted during the operation appears within a Move.
*/
public abstract class DocComparator {
/**
* Thrown if there is an array whose elements contain identities,
* but for at least one element identity cannot be retrieved
*/
public static final class InvalidArrayIdentity extends Exception {
public InvalidArrayIdentity(Path p) {
super(p.toString());
}
}
/**
* Thrown if there is an array whose elements contain identities,
* but they are not unique
*/
public static final class DuplicateArrayIdentity extends Exception {
public DuplicateArrayIdentity(Path p) {
super(p.toString());
}
}
/**
* The array element identity extractor interface.
*/
public interface IdentityExtractor {
/**
* It should return an identity object from the given array
* element. The returned object should implement equals and
* hashCode methods.
*/
Object getIdentity(T element);
}
/**
* Contains the edit script, and number of changed and unchanged
* fields. The number of changed fields does not include array
* element moves, it only includes additions, removals, and
* modifications.
*/
public static class Difference {
private final List> delta;
private int numUnchangedFields;
private int numChangedFields;
/**
* Default ctor, sets numUnchangedFields to zero, initializes an empty list
*/
public Difference() {
delta=new ArrayList<>();
numUnchangedFields=0;
}
public Difference(List> delta) {
this.delta=delta;
for(Delta d:this.delta) {
if(d instanceof Addition||
d instanceof Removal ||
d instanceof Modification)
numChangedFields++;
}
}
/**
* Constructs a difference with one modification, no unchanged fields
*/
public Difference(Delta d) {
this(new ArrayList>(1));
add(d);
}
/**
* Constructs a Difference denoting no difference
*/
public Difference(int numFields) {
delta=new ArrayList<>();
numUnchangedFields=numFields;
}
/**
* Returns the number of unmodified fields
*/
public int getNumUnchangedFields() {
return numUnchangedFields;
}
/**
* Returns the number of modified fields, excluding array element moves
*/
public int getNumChangedFields() {
return numChangedFields;
}
/**
* Returns the list of changes
*/
public List> getDelta() {
return delta;
}
public void add(Difference diff) {
delta.addAll(diff.delta);
numUnchangedFields+=diff.numUnchangedFields;
numChangedFields+=diff.numChangedFields;
}
public void add(Delta d) {
delta.add(d);
if(d instanceof Addition||
d instanceof Removal ||
d instanceof Modification)
numChangedFields++;
}
/**
* A numeric value between 0 and 1 denoting how much an object
* is changed. 0 means no change, 1 means everything is
* changed.
*/
public double getChangeAmount() {
double d=numChangedFields+numUnchangedFields;
return d==0?0:numChangedFields/d;
}
/**
* Returns true if there are not changes
*/
public boolean same() {
return delta.isEmpty();
}
@Override
public String toString() {
StringBuilder bld=new StringBuilder();
for(Delta x:delta) {
bld.append(x.toString()).append('\n');
}
return bld.toString();
}
}
/**
* Base class for a delta
*/
public static abstract class Delta {
protected final Path field1;
protected final Path field2;
public Delta(Path field1,Path field2) {
this.field1=field1==null?null:field1.immutableCopy();
this.field2=field2==null?null:field2.immutableCopy();
}
public Path getField1() {
return field1;
}
public Path getField2() {
return field2;
}
/**
* Return the non-null field, or field1 if both are non-null
*/
public Path getField() {
return field1==null?field2:field1;
}
}
/**
* Denotes an addition of a field or array element that isn't present in doc1
*/
public static class Addition extends Delta {
private final T addedNode;
public Addition(Path field2,T addedNode) {
super(null,field2);
this.addedNode=addedNode;
}
public T getAddedNode() {
return addedNode;
}
@Override
public String toString() {
return "+ "+field2+":"+addedNode;
}
}
/**
* Denotes a removal of a field or array element that is present in doc1 but not in doc2
*/
public static class Removal extends Delta {
private final T removedNode;
public Removal(Path field1,T removedNode) {
super(field1,null);
this.removedNode=removedNode;
}
public T getRemovedNode() {
return removedNode;
}
@Override
public String toString() {
return "- "+field1+":"+removedNode;
}
}
/**
* Denotes an array element move from one element index to another
*/
public static class Move extends Delta {
private final T movedNode;
public Move(Path field1,Path field2,T movedNode) {
super(field1,field2);
this.movedNode=movedNode;
}
public T getMovedNode() {
return movedNode;
}
@Override
public String toString() {
return "* "+field1+"->"+field2+":"+movedNode;
}
}
/**
* Denotes a field modification
*/
public static class Modification extends Delta {
private final T node1;
private final T node2;
public Modification(Path field1,T node1,Path field2,T node2) {
super(field1,field2);
this.node1=node1;
this.node2=node2;
}
public T getUnmodifiedNode() {
return node1;
}
public T getModifiedNode() {
return node2;
}
@Override
public String toString() {
return "* "+field1+"->"+field2+":"+node1+" -> "+node2;
}
}
/**
* Contains a list of fields contained in array elements that
* uniquely identify array elements
*/
public static class ArrayIdentityFields {
private Path[] fields;
public ArrayIdentityFields(Path...fields) {
this.fields=fields;
}
public Path[] getFields() {
return fields;
}
@Override
public String toString() {
return Arrays.toString(fields);
}
}
/**
* Default array identity object. Contains the identity values,
* computes hashcode from them
*/
public class DefaultIdentity {
private final BaseType[] nodes;
private Integer hcode;
public DefaultIdentity(BaseType[] nodes) {
this.nodes=nodes;
}
@Override
public int hashCode() {
if(hcode==null) {
int code=0;
for(int i=0;i arrayIdentities=new HashMap<>();
protected abstract boolean isValue(BaseType value);
protected abstract boolean isArray(BaseType value);
protected abstract boolean isObject(BaseType value);
protected abstract boolean isNull(BaseType value);
protected abstract ValueType asValue(BaseType value);
protected abstract ArrayType asArray(BaseType value);
protected abstract ObjectType asObject(BaseType value);
protected abstract boolean equals(ValueType v1,ValueType v2);
protected abstract Iterator> getFields(ObjectType o);
protected abstract boolean hasField(ObjectType value,String field);
protected abstract BaseType getField(ObjectType value,String field);
protected abstract IdentityExtractor getArrayIdentityExtractorImpl(ArrayIdentityFields fields);
protected abstract BaseType getElement(ArrayType value,int index);
protected abstract int size(ArrayType value);
/**
* Adds a group of fields that can uniquely identify array elements for object arrays
*
* @param array The name of the array field
* @param identities The fields of the array element that can identiy an element
*
* In the following document:
*
* {
* ...
* "aField": [
* { "_id":1,"field":...},
* { "_id":2,"field":...}
* ]
* }
*
* the call looks like
*
* jsonCompare.addArrayIdentity(new Path("aField"),new Path("_id"));
*
* If there are more than one fields that uniquely identify an eleent, list those in
* the argument list.
*/
public void addArrayIdentity(Path array,Path...identities) {
arrayIdentities.put(array,new ArrayIdentityFields(identities));
}
public Map getArrayIdentities() {
return arrayIdentities;
}
/**
* Compares two documents and returns the difference
*/
public Difference compareNodes(BaseType node1,BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
return compareNodes(new MutablePath(),node1,new MutablePath(),node2);
}
public Difference compareNodes(MutablePath field1,
BaseType node1,
MutablePath field2,
BaseType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
if(isValue(node1) && isValue(node2)) {
if(!equals(asValue(node1),asValue(node2))) {
return new Difference(new Modification(field1,node1,field2,node2));
}
} else if(isArray(node1) && isArray(node2)) {
return compareArrays(field1,asArray(node1),field2,asArray(node2));
} else if(isObject(node1) && isObject(node2)) {
return compareObjects(field1,asObject(node1),field2,asObject(node2));
} else {
if(!(isNull(node1) && isNull(node2)) ) {
return new Difference<>(new Modification(field1, node1, field2, node2));
}
}
return new Difference<>(1);
}
/**
* Compares two object nodes recursively and returns the differences
*/
public Difference compareObjects(MutablePath field1,
ObjectType node1,
MutablePath field2,
ObjectType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference ret=new Difference<>();
// Field by field comparison of obj1 to obj2.
for(Iterator> fields=getFields(node1);fields.hasNext();) {
Map.Entry field=fields.next();
String fieldName=field.getKey();
field1.push(fieldName);
BaseType value1=field.getValue();
if(hasField(node2,fieldName)) {
// If both obj1 and obj2 have the same field, compare recursively
field2.push(fieldName);
BaseType value2=getField(node2,fieldName);
ret.add(compareNodes(field1,value1,field2,value2));
field2.pop();
} else {
// obj1.field1 exists, obj2.field1 does not, so it is removed
ret.add(new Removal(field1,value1));
}
field1.pop();
}
// Now compare any new nodes added to obj2
for(Iterator> fields=getFields(node2);fields.hasNext();) {
Map.Entry field=fields.next();
String fieldName=field.getKey();
if(!hasField(node1,fieldName)) {
field2.push(fieldName);
ret.add(new Addition(field2,field.getValue()));
field2.pop();
}
}
return ret;
}
public IdentityExtractor getArrayIdentityExtractor(Path arrayField) {
MutablePath p=new MutablePath();
int n=arrayField.numSegments();
for(int i=0;i compareArrays(MutablePath field1,
ArrayType node1,
MutablePath field2,
ArrayType node2)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
IdentityExtractor ext=getArrayIdentityExtractor(field1);
if(ext==null) {
return compareArraysNoId(field1,node1,field2,node2);
} else {
return compareArraysWithId(field1,node1,field2,node2,ext);
}
}
/**
* Computes difference between arrays whose elements can be identitied by a unique identifier
*/
public Difference compareArraysWithId(MutablePath field1,
ArrayType node1,
MutablePath field2,
ArrayType node2,
IdentityExtractor idex)
throws InvalidArrayIdentity, DuplicateArrayIdentity {
Difference ret=new Difference<>();
// Build a map of identity -> index for both arrays
final Map
© 2015 - 2025 Weber Informatics LLC | Privacy Policy