All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.redhat.lightblue.util.DocComparator Maven / Gradle / Ivy

There is a newer version: 2.18.0
Show newest version
/*
 Copyright 2013 Red Hat, Inc. and/or its affiliates.

 This file is part of lightblue.

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see .
 */
package com.redhat.lightblue.util;

import java.util.List;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.Map;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.HashSet;

/**
 * Compares two documents represented as an object tree, and builds an
 * list of all changes
 *
 * Containers (objects and arrays) are compared recursively. The
 * comparison algorithm works like this:
 *
 * Objects: A field-by-field comparison is done. If a field exists in
 * the first document but not in the second, that field is removed. If
 * a field exists in the second document but not the first, that field
 * is added. If a field exists in both documents with different
 * values, that field is modified.
 *
 * Arrays: There are two possible algorithms to compare arrays. If
 * array elements contain a unique identifier (which is defined by the
 * caller), then array elelements of the first and the second document
 * are matched using the unique identifiers of array elements. Then
 * each matching array element is compared to generate the detailed
 * difference. If array elements don't have unique identifiers, then
 * each element of the first array is compared to each element of the
 * second array, and the elements with minimal number of changes are
 * associated. Elements that are too different from each other are not
 * associated.
 *
 * Differences: 
 *
 * An Addition denotes a new field or array element. Addition.field1
 * is null, meaning the field does not exist in document1, and
 * Addition.field2 denotes the new field, or array element.
 *
 * A Removal denotes a removed field or array element. Removal.field1
 * denotes the element in document1, and Removal.field2 is null.
 *
 * A Modification denotes a content modification of a field, or array
 * element. Both field1 and field2 are non-null, and set to the name
 * of the modified field.
 *
 * A Move denotes an array element move. field1 denotes the old index
 * of the array element, and field2 denotes the new index.
 *
 * If new elements are added to an array, or existing elements are
 * removed, the addition and removal appear as diff, and any node that
 * shifted during the operation appears within a Move.
 */
public abstract class DocComparator {

    /**
     * Thrown if there is an array whose elements contain identities,
     * but for at least one element identity cannot be retrieved
     */
    public static final class InvalidArrayIdentity extends Exception {
        public InvalidArrayIdentity(Path p) {
            super(p.toString());
        }
    }

    /**
     * Thrown if there is an array whose elements contain identities,
     * but they are not unique
     */
    public static final class DuplicateArrayIdentity extends Exception {
        public DuplicateArrayIdentity(Path p) {
            super(p.toString());
        }
    }

    /**
     * The array element identity extractor interface. 
     */
    public interface IdentityExtractor {

        /**
         * It should return an identity object from the given array
         * element. The returned object should implement equals and
         * hashCode methods.
         */
        Object getIdentity(T element);
    }


    /**
     * Contains the edit script, and number of changed and unchanged
     * fields. The number of changed fields does not include array
     * element moves, it only includes additions, removals, and
     * modifications.
     */
    public static class Difference {
        private final List> delta;
        private int numUnchangedFields;
        private int numChangedFields;

        /**
         * Default ctor, sets numUnchangedFields to zero, initializes an empty list
         */
        public Difference() {
            delta=new ArrayList<>();
            numUnchangedFields=0;
        }
        
        public Difference(List> delta) {
            this.delta=delta;
            for(Delta d:this.delta) {
                if(d instanceof Addition||
                   d instanceof Removal ||
                   d instanceof Modification)
                    numChangedFields++;
            }
        }

        /**
         * Constructs a difference with one modification, no unchanged fields
         */
        public Difference(Delta d) {
            this(new ArrayList>(1));
            add(d);
            
        }

        /**
         * Constructs a Difference denoting no difference
         */
        public Difference(int numFields) {
            delta=new ArrayList<>();
            numUnchangedFields=numFields;
        }

        /**
         * Returns the number of unmodified fields
         */
        public int getNumUnchangedFields() {
            return numUnchangedFields;
        }

        /**
         * Returns the number of modified fields, excluding array element moves
         */
        public int getNumChangedFields() {
            return numChangedFields;
        }

        /**
         * Returns the list of changes
         */
        public List> getDelta() {
            return delta;
        }

        public void add(Difference diff) {
            delta.addAll(diff.delta);
            numUnchangedFields+=diff.numUnchangedFields;
            numChangedFields+=diff.numChangedFields;
        }

        public void add(Delta d) {
            delta.add(d);
            if(d instanceof Addition||
               d instanceof Removal ||
               d instanceof Modification)
                numChangedFields++;
        }

        /**
         * A numeric value between 0 and 1 denoting how much an object
         * is changed. 0 means no change, 1 means everything is
         * changed.
         */
        public double getChangeAmount() {
            double d=numChangedFields+numUnchangedFields;
            return d==0?0:numChangedFields/d;
        }

        /**
         * Returns true if there are not changes
         */
        public boolean same() {
            return delta.isEmpty();
        }

        @Override
        public String toString() {
            StringBuilder bld=new StringBuilder();
            for(Delta x:delta) {
                bld.append(x.toString()).append('\n');
            }
            return bld.toString();
        }
    }
    
    /**
     * Base class for a delta
     */
    public static abstract class Delta {
        protected final Path field1;
        protected final Path field2;

        public Delta(Path field1,Path field2) {
            this.field1=field1==null?null:field1.immutableCopy();
            this.field2=field2==null?null:field2.immutableCopy();
        }

        public Path getField1() {
            return field1;
        }

        public Path getField2() {
            return field2;
        }

        /**
         * Return the non-null field, or field1 if both are non-null
         */
        public Path getField() {
            return field1==null?field2:field1;
        }
    }

    /**
     * Denotes an addition of a field or array element that isn't present in doc1
     */
    public static class Addition extends Delta {
        private final T addedNode;

        public Addition(Path field2,T addedNode) {
            super(null,field2);
            this.addedNode=addedNode;
        }

        public T getAddedNode() {
            return addedNode;
        }

        @Override
        public String toString() {
            return "+ "+field2+":"+addedNode;
        }
    }

    /**
     * Denotes a removal of a field or array element that is present in doc1 but not in doc2
     */
    public static class Removal extends Delta {
        private final T removedNode;

        public Removal(Path field1,T removedNode) {
            super(field1,null);
            this.removedNode=removedNode;
        }

        public T getRemovedNode() {
            return removedNode;
        }

        @Override
        public String toString() {
            return "- "+field1+":"+removedNode;
        }
    }

    /**
     * Denotes an array element move from one element index to another
     */
    public static class Move extends Delta {
        private final T movedNode;
        
        public Move(Path field1,Path field2,T movedNode) {
            super(field1,field2);
            this.movedNode=movedNode;
        }

        public T getMovedNode() {
            return movedNode;
        }

        @Override
        public String toString() {
            return "* "+field1+"->"+field2+":"+movedNode;
        }
    }

    /**
     * Denotes a field modification
     */
    public static class Modification extends Delta {
        private final T node1;
        private final T node2;

        public Modification(Path field1,T node1,Path field2,T node2) {
            super(field1,field2);
            this.node1=node1;
            this.node2=node2;
        }

        public T getUnmodifiedNode() {
            return node1;
        }

        public T getModifiedNode() {
            return node2;
        }

        @Override
        public String toString() {
            return "* "+field1+"->"+field2+":"+node1+" -> "+node2;
        }
   }


    /**
     * Contains a list of fields contained in array elements that
     * uniquely identify array elements
     */
    public static class ArrayIdentityFields {
        private Path[] fields;

        public ArrayIdentityFields(Path...fields) {
            this.fields=fields;
        }

        public Path[] getFields() {
            return fields;
        }

        @Override
        public String toString() {
            return Arrays.toString(fields);
        }
    }

    /**
     * Default array identity object. Contains the identity values,
     * computes hashcode from them
     */
    public class DefaultIdentity {
        private final BaseType[] nodes;
        private Integer hcode;

        public DefaultIdentity(BaseType[] nodes) {
            this.nodes=nodes;
        }

        @Override
        public int hashCode() {
            if(hcode==null) {
                int code=0;
                for(int i=0;i arrayIdentities=new HashMap<>();

    protected abstract boolean isValue(BaseType value);
    protected abstract boolean isArray(BaseType value);
    protected abstract boolean isObject(BaseType value);
    protected abstract boolean isNull(BaseType value);
    protected abstract ValueType asValue(BaseType value);
    protected abstract ArrayType asArray(BaseType value);
    protected abstract ObjectType asObject(BaseType value);        
    protected abstract boolean equals(ValueType v1,ValueType v2);    
    protected abstract Iterator> getFields(ObjectType o);
    protected abstract boolean hasField(ObjectType value,String field);
    protected abstract BaseType getField(ObjectType value,String field);
    protected abstract IdentityExtractor getArrayIdentityExtractorImpl(ArrayIdentityFields fields);
    protected abstract BaseType getElement(ArrayType value,int index);
    protected abstract int size(ArrayType value);

    /**
     * Adds a group of fields that can uniquely identify array elements for object arrays
     *
     * @param array The name of the array field
     * @param identities The fields of the array element that can identiy an element
     *
     * In the following document:
     * 
     * {
     * ...
     *   "aField": [
     *      { "_id":1,"field":...},
     *      { "_id":2,"field":...}
     *   ]
     * }
     * 
     * the call looks like
     * 
     *    jsonCompare.addArrayIdentity(new Path("aField"),new Path("_id"));
     * 
* If there are more than one fields that uniquely identify an eleent, list those in * the argument list. */ public void addArrayIdentity(Path array,Path...identities) { arrayIdentities.put(array,new ArrayIdentityFields(identities)); } public Map getArrayIdentities() { return arrayIdentities; } /** * Compares two documents and returns the difference */ public Difference compareNodes(BaseType node1,BaseType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { return compareNodes(new MutablePath(),node1,new MutablePath(),node2); } public Difference compareNodes(MutablePath field1, BaseType node1, MutablePath field2, BaseType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { if(isValue(node1) && isValue(node2)) { if(!equals(asValue(node1),asValue(node2))) { return new Difference(new Modification(field1,node1,field2,node2)); } } else if(isArray(node1) && isArray(node2)) { return compareArrays(field1,asArray(node1),field2,asArray(node2)); } else if(isObject(node1) && isObject(node2)) { return compareObjects(field1,asObject(node1),field2,asObject(node2)); } else { if(!(isNull(node1) && isNull(node2)) ) { return new Difference<>(new Modification(field1, node1, field2, node2)); } } return new Difference<>(1); } /** * Compares two object nodes recursively and returns the differences */ public Difference compareObjects(MutablePath field1, ObjectType node1, MutablePath field2, ObjectType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { Difference ret=new Difference<>(); // Field by field comparison of obj1 to obj2. for(Iterator> fields=getFields(node1);fields.hasNext();) { Map.Entry field=fields.next(); String fieldName=field.getKey(); field1.push(fieldName); BaseType value1=field.getValue(); if(hasField(node2,fieldName)) { // If both obj1 and obj2 have the same field, compare recursively field2.push(fieldName); BaseType value2=getField(node2,fieldName); ret.add(compareNodes(field1,value1,field2,value2)); field2.pop(); } else { // obj1.field1 exists, obj2.field1 does not, so it is removed ret.add(new Removal(field1,value1)); } field1.pop(); } // Now compare any new nodes added to obj2 for(Iterator> fields=getFields(node2);fields.hasNext();) { Map.Entry field=fields.next(); String fieldName=field.getKey(); if(!hasField(node1,fieldName)) { field2.push(fieldName); ret.add(new Addition(field2,field.getValue())); field2.pop(); } } return ret; } public IdentityExtractor getArrayIdentityExtractor(Path arrayField) { MutablePath p=new MutablePath(); int n=arrayField.numSegments(); for(int i=0;i compareArrays(MutablePath field1, ArrayType node1, MutablePath field2, ArrayType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { IdentityExtractor ext=getArrayIdentityExtractor(field1); if(ext==null) { return compareArraysNoId(field1,node1,field2,node2); } else { return compareArraysWithId(field1,node1,field2,node2,ext); } } /** * Computes difference between arrays whose elements can be identitied by a unique identifier */ public Difference compareArraysWithId(MutablePath field1, ArrayType node1, MutablePath field2, ArrayType node2, IdentityExtractor idex) throws InvalidArrayIdentity, DuplicateArrayIdentity { Difference ret=new Difference<>(); // Build a map of identity -> index for both arrays final Map identities1=getIdentityMap(field1,node1,idex); final Map identities2=getIdentityMap(field2,node2,idex); // Iterate all elements of array 1 for(Map.Entry entry1:identities1.entrySet()) { // Append index to the field name field1.push(entry1.getValue()); // If array2 doesn't have an element with the same ID, this is a deletion Integer index2=identities2.get(entry1.getKey()); if(index2==null) { ret.add(new Removal(field1,getElement(node1,entry1.getValue()))); } else { field2.push(index2); // array2 has the same element // If it is at a different index, this is a move if(index2!=entry1.getValue()) { ret.add(new Move(field1,field2,getElement(node1,entry1.getValue()))); } // Recursively compare contents to get detailed diff ret.add(compareNodes(field1,getElement(node1,entry1.getValue()), field2,getElement(node2,index2))); field2.pop(); } field1.pop(); } // Now check elements of array 2 that are not in array 1 for(Map.Entry entry2:identities2.entrySet()) { if(!identities1.containsKey(entry2.getKey())) { // entry2 is not in array 1: addition field2.push(entry2.getValue()); ret.add(new Addition(field2,getElement(node2,entry2.getValue()))); field2.pop(); } } return ret; } private static class Pair { private final int i1,i2; public int hashCode() { return i1*1001+i2; } public boolean equals(Object o) { try { return ((Pair)o).i1==i1&& ((Pair)o).i2==i2; } catch (Exception e) { return false; } } public Pair(int i1,int i2) { this.i1=i1; this.i2=i2; } } /** * Computes difference between arrays by comparing every element * recursively and trying to find the closest match */ public Difference compareArraysNoId(MutablePath field1, ArrayType node1, MutablePath field2, ArrayType node2) throws InvalidArrayIdentity, DuplicateArrayIdentity { Difference ret=new Difference<>(); IndexAssoc assoc=new IndexAssoc(size(node1),size(node2)); HashSet comparedPairs=new HashSet<>(); // First associate exact matches // We loop through the unassociated elements of node1, and node2 // If the nodes are equal, we associate them // if they are not, we note the distance between the two, so later // we don't need to re-compare them for(assoc.start1();assoc.hasNext1();) { int index1=assoc.next1(); BaseType element1=getElement(node1,index1); field1.push(index1); for(assoc.start2();assoc.hasNext2();) { int index2=assoc.next2(); BaseType element2=getElement(node2,index2); field2.push(index2); comparedPairs.add(new Pair(index1,index2)); Difference diff=compareNodes(field1,element1,field2,element2); if(diff.same()) { assoc.associate(index1,index2); field2.pop(); break; } else { assoc.recordDistance(index1,index2,diff); } field2.pop(); } field1.pop(); } // Here, we associated all exact matching nodes // All remaining nodes need to be compared to each other // First compare all node1 elements to node2 elements for(assoc.start1();assoc.hasNext1();) { int index1=assoc.next1(); BaseType element1=getElement(node1,index1); field1.push(index1); for(assoc.start2();assoc.hasNext2();) { int index2=assoc.next2(); BaseType element2=getElement(node2,index2); field2.push(index2); Pair p=new Pair(index1,index2); // Do we have a distance recorded for these nodes? if(comparedPairs.contains(p)) { // No distance: compare the nodes Difference diff=compareNodes(field1,element1,field2,element2); assoc.recordDistance(index1,index2,diff); comparedPairs.add(p); } field2.pop(); } IxDiff ixdiff=assoc.getMin(index1); // If an object has changed more that 0.5 (more than half // of its fields are changed), then it is not a match if(ixdiff==null||ixdiff.change>0.5) { // No matching node for node1 ret.add(new Removal(field1,element1)); assoc.remove1(index1); } else { // Matching node assoc.associate(index1,ixdiff.index2); ret.add(ixdiff.diff); } field1.pop(); } // Anything remaining on node2 are nodes that are added for(assoc.start2();assoc.hasNext2();) { int index=assoc.next2(); BaseType element2=getElement(node2,index); field2.push(index); ret.add(new Addition(field2,element2)); field2.pop(); } // Look at associations for moved nodes for(Map.Entry entry:assoc.assoc.entrySet()) { if(entry.getKey()!=entry.getValue()) { field1.push(entry.getKey()); field2.push(entry.getValue()); BaseType node=getElement(node1,entry.getKey()); ret.add(new Move(field1,field2,node)); field2.pop(); field1.pop(); } } return ret; } /** * Keeps the distance between two array indexes. */ private static class IxDiff { private Difference diff; private double change; private int index1,index2; public IxDiff(Difference diff, double change, int index1, int index2) { this.diff=diff; this.change=change; this.index1=index1; this.index2=index2; } } /** * Keeps associations between array indexes */ private static class IndexAssoc { private final ArrayList ix1=new ArrayList<>(); private final ArrayList ix2=new ArrayList<>(); private int itr1; private int itr2; private int last1,last2; private final Map assoc=new HashMap<>(); /** * Keeps the IxDiff with the minimum change between index1 and index2, keyed on index1 */ private final Map minimums1=new HashMap<>(); /** * Construct with two arrays of size1 and size2 */ public IndexAssoc(int size1,int size2) { for(int i=0;i=0) { ix1.remove(l1); if(itr1>=l1) itr1--; } } /** * Start iterating the unassociated indexes of the second array */ public void start2() { itr2=-1; } /** * Returns true if the second array has more unassociated indexes */ public boolean hasNext2() { return (itr2+1)=0) { ix2.remove(l2); if(itr2>=l2) itr2--; } } public void associate(int index1,int index2) { remove1(index1); remove2(index2); assoc.put(index1,index2); } /** * Records the amount of difference between the nodes last * returned by next(). Also stores the minumum amount of * difference for node1 */ public void recordDistance(int index1,int index2,Difference diff) { double change=diff.getChangeAmount(); IxDiff m=minimums1.get(index1); if(m==null||m.change>change) { minimums1.put(index1,m=new IxDiff(diff,change,index1,index2)); } } public IxDiff getMin(int index1) { return minimums1.get(index1); } } private Map getIdentityMap(Path field,ArrayType array,IdentityExtractor idex) throws InvalidArrayIdentity, DuplicateArrayIdentity { final int size=size(array); final Map identities=new HashMap<>(size); // Fill up identities into identity maps for(int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy