com.gs.obevocomparer.compare.simple.SimpleDataSourceComparator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of obevo-internal-comparer Show documentation
Show all versions of obevo-internal-comparer Show documentation
POM module containing the dependencyManagement section for the modules of Obevo.
All Obevo modules except obevo-bom should inherit from this. We separate obevo-bom from this so
that clients can depend on the BOM without pulling in third-party dependencies.
/**
* Copyright 2017 Goldman Sachs.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.gs.obevocomparer.compare.simple;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.gs.obevocomparer.compare.CatoComparison;
import com.gs.obevocomparer.compare.CatoDataComparator;
import com.gs.obevocomparer.compare.CatoDataSide;
import com.gs.obevocomparer.compare.CatoDataSourceComparator;
import com.gs.obevocomparer.compare.CatoProperties;
import com.gs.obevocomparer.compare.breaks.Break;
import com.gs.obevocomparer.compare.breaks.DataObjectBreak;
import com.gs.obevocomparer.compare.breaks.FieldBreak;
import com.gs.obevocomparer.compare.breaks.GroupBreak;
import com.gs.obevocomparer.data.CatoDataObject;
import com.gs.obevocomparer.input.CatoDataSource;
import com.gs.obevocomparer.sort.Sort;
import com.gs.obevocomparer.sort.SortedGroupIterator;
import com.gs.obevocomparer.util.Factory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SimpleDataSourceComparator implements CatoDataSourceComparator {
private final CatoProperties properties;
private final Comparator dataObjectComparator;
private final CatoDataComparator dataComparator;
private final Sort sort;
private final Factory> breakCollectionFactory;
private final Factory> dataCollectionFactory;
private int groupId = 1;
private static final Logger LOG = LoggerFactory.getLogger(SimpleDataSourceComparator.class);
public SimpleDataSourceComparator(CatoProperties properties, Comparator dataObjectComparator,
CatoDataComparator dataComparator, Sort sort,
Factory> breakCollectionFactory, Factory> dataCollectionFactory) {
this.properties = properties;
this.dataObjectComparator = dataObjectComparator;
this.dataComparator = dataComparator;
this.sort = sort;
this.breakCollectionFactory = breakCollectionFactory;
this.dataCollectionFactory = dataCollectionFactory;
}
public CatoComparison compare(String comparisonName, CatoDataSource leftDataSource, CatoDataSource rightDataSource) {
LOG.info("Comparing left data set '{}' to right data set '{}'",
leftDataSource.getName(), rightDataSource.getName());
Collection leftData = this.dataCollectionFactory.create();
Collection rightData = this.dataCollectionFactory.create();
leftDataSource.open();
rightDataSource.open();
Iterator sortedLeftData = leftDataSource.isSorted() ?
leftDataSource : this.sort.sort(leftDataSource);
Iterator sortedRightData = rightDataSource.isSorted() ?
rightDataSource : this.sort.sort(rightDataSource);
Collection breaks = this.breakCollectionFactory.create();
SortedGroupIterator leftGroupIter =
new SortedGroupIterator(sortedLeftData, this.dataObjectComparator);
SortedGroupIterator rightGroupIter =
new SortedGroupIterator(sortedRightData, this.dataObjectComparator);
List leftGroup = leftGroupIter.next();
List rightGroup = rightGroupIter.next();
while (leftGroup.size() > 0 || rightGroup.size() > 0) {
int keyCompResult = this.dataObjectComparator.compare(
leftGroup.size() > 0 ? leftGroup.get(0) : null,
rightGroup.size() > 0 ? rightGroup.get(0) : null);
if (keyCompResult < 0) {
this.processLeftOnlyGroup(leftGroup, breaks, leftData, rightData);
leftGroup = leftGroupIter.next();
} else if (keyCompResult > 0) {
this.processRightOnlyGroup(rightGroup, breaks, leftData, rightData);
rightGroup = rightGroupIter.next();
} else {
this.processBothGroups(leftGroup, rightGroup, breaks, leftData, rightData);
leftGroup = leftGroupIter.next();
rightGroup = rightGroupIter.next();
}
}
leftDataSource.close();
rightDataSource.close();
this.processComparisonData(breaks, leftData, rightData);
LOG.info("Completed comparison with {} breaks", breaks.size());
return new CatoComparison(comparisonName, this.properties, breaks,
leftDataSource, leftData,
rightDataSource, rightData);
}
private void processLeftOnlyGroup(List leftGroup, Collection breaks,
Collection leftData, Collection rightData) {
leftData.addAll(leftGroup);
for (CatoDataObject obj : leftGroup) {
breaks.add(new DataObjectBreak(obj, CatoDataSide.LEFT));
}
}
private void processRightOnlyGroup(List rightGroup, Collection breaks,
Collection leftData, Collection rightData) {
rightData.addAll(rightGroup);
for (CatoDataObject obj : rightGroup) {
breaks.add(new DataObjectBreak(obj, CatoDataSide.RIGHT));
}
}
private void processBothGroups(List leftGroup, List rightGroup,
Collection breaks, Collection leftData, Collection rightData) {
leftData.addAll(leftGroup);
rightData.addAll(rightGroup);
FieldBreak fieldBreak;
if (leftGroup.size() == 1 && rightGroup.size() == 1) {
fieldBreak = this.compareDataObjects(leftGroup.get(0), rightGroup.get(0));
if (fieldBreak != null) {
breaks.add(fieldBreak);
}
return;
}
if (leftGroup.size() > 100) {
LOG.warn("Large group of size {} being compared", leftGroup.size());
}
List leftCompareGroup = new ArrayList(leftGroup);
List rightCompareGroup = new ArrayList(rightGroup);
CatoDataObject leftObj;
CatoDataObject rightObj;
for (Iterator leftIter = leftCompareGroup.iterator(); leftIter.hasNext(); ) {
leftObj = leftIter.next();
for (Iterator rightIter = rightCompareGroup.iterator(); rightIter.hasNext(); ) {
rightObj = rightIter.next();
if (this.compareDataObjects(leftObj, rightObj) == null) {
leftIter.remove();
rightIter.remove();
break;
}
}
}
Set breakFields = new LinkedHashSet();
for (CatoDataObject leftObject : leftCompareGroup) {
for (CatoDataObject rightObject : rightCompareGroup) {
fieldBreak = this.compareDataObjects(leftObject, rightObject);
breakFields.addAll(fieldBreak.getFields());
}
}
for (CatoDataObject obj : leftCompareGroup) {
breaks.add(new GroupBreak(obj, CatoDataSide.LEFT, breakFields, this.groupId));
}
for (CatoDataObject obj : rightCompareGroup) {
breaks.add(new GroupBreak(obj, CatoDataSide.RIGHT, breakFields, this.groupId));
}
this.groupId++;
}
FieldBreak compareDataObjects(CatoDataObject leftObj, CatoDataObject rightObj) {
if (leftObj == null || rightObj == null) {
LOG.error("Cannot compare null DataObjects");
throw new IllegalArgumentException("Cannot compare null DataObjects");
}
Object leftVal;
Object rightVal;
Set comparedRightFields = new HashSet();
Map fieldBreaks = new HashMap();
for (String field : leftObj.getFields()) {
if (this.properties.getExcludeFields().contains(field)) {
continue;
}
leftVal = leftObj.getValue(field);
rightVal = rightObj.getValue(this.getRightField(field));
if (this.properties.getKeyFields().contains(field)) {
if (this.dataComparator.compareKeyValues(leftVal, rightVal) != 0) {
LOG.error("Cannot compare data objects with different keys\n{}\n{}", leftObj, rightObj);
throw new IllegalArgumentException("Cannot compare data objects with different keys");
}
continue;
}
if (!this.dataComparator.compareValues(leftVal, rightVal)) {
fieldBreaks.put(field, rightVal);
}
comparedRightFields.add(this.getRightField(field));
}
for (String rightField : rightObj.getFields()) {
if (this.properties.getKeyFields().contains(rightField)
|| this.properties.getExcludeFields().contains(rightField)
|| comparedRightFields.contains(rightField)
|| this.properties.getMappedFields().containsKey(rightField)) {
continue;
}
fieldBreaks.put(rightField, rightObj.getValue(rightField));
}
if (fieldBreaks.size() > 0) {
return new FieldBreak(leftObj, fieldBreaks);
} else {
return null;
}
}
private String getRightField(String leftField) {
if (this.properties.getMappedFields().containsKey(leftField)) {
return this.properties.getMappedFields().get(leftField);
} else {
return leftField;
}
}
private void processComparisonData(Collection breaks, Collection leftData, Collection rightData) {
// This method is intended to help subclass implementations
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy