All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tbrugz.sqldiff.datadiff.ResultSetDiff Maven / Gradle / Ivy

There is a newer version: 0.9.17
Show newest version
package tbrugz.sqldiff.datadiff;

import java.io.IOException;
import java.io.Writer;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import tbrugz.sqldump.datadump.DataDumpUtils;
import tbrugz.sqldump.util.CategorizedOut;
import tbrugz.sqldump.util.SQLUtils;

/*
 * TODOne: String: option to use compareToCaseInsensitive
 */
public class ResultSetDiff {

	static final Log log = LogFactory.getLog(ResultSetDiff.class);
	
	//XXX: option to set 'logEachXLoops' 
	int logEachXLoops = 1000;
	
	long limit = 0;
	boolean useCaseInsensitive = true; //databases, by default, uses case insensitive order
	
	int identicalRowsCount, updateCount, dumpCount, deleteCount, sourceRowCount, targetRowCount;
	
	//XXX: add property for dumpInserts, dumpUpdates & dumpDeletes
	boolean dumpInserts = true,
		dumpUpdates = true,
		dumpDeletes = true;
	
	public void diff(ResultSet source, ResultSet target, String schemaName, String tableName, List keyCols,
			List dss, String coutPattern) throws SQLException, IOException {
		diff(source, target, schemaName, tableName, keyCols, dss, coutPattern, null);
	}

	/*@Deprecated
	private void diff(ResultSet source, ResultSet target, String tableName, List keyCols,
			DiffSyntax ds, Writer singleWriter) throws SQLException, IOException {
		List dss = new ArrayList(); dss.add(ds);
		diff(source, target, null, tableName, keyCols, dss, null, singleWriter);
	}*/
	
	public void diff(ResultSet source, ResultSet target, String schemaName, String tableName, List keyCols,
			DiffSyntax ds, Writer singleWriter) throws SQLException, IOException {
		List dss = new ArrayList(); dss.add(ds);
		diff(source, target, schemaName, tableName, keyCols, dss, null, singleWriter);
	}
	
	//XXX: add schemaName ?
	@SuppressWarnings("rawtypes")
	void diff(ResultSet source, ResultSet target, String schemaName, String tableName, List keyCols,
			List dss, String coutPattern, Writer singleWriter) throws SQLException, IOException {
		boolean readSource = true;
		boolean readTarget = true;
		boolean hasNextSource = true;
		boolean hasNextTarget = true;
		List sourceVals = null;
		List targetVals = null;
		final String fullTableName = (schemaName!=null?schemaName+".":"")+tableName;
		
		ResultSetMetaData md = source.getMetaData();
		int numCol = md.getColumnCount();
		List lsColNames = new ArrayList();
		List> lsColTypes = new ArrayList>();
		for(int i=0;i dscbs = new HashMap();
		Map dscouts = new HashMap();
		
		for(DiffSyntax ds: dss) {
			DataDumpUtils.SyntaxCOutCallback cb = new DataDumpUtils.SyntaxCOutCallback(ds); //XXX: callback will call DiffSyntax.dumpHeader()
			//dscbs.put(ds, cb);
			if(singleWriter!=null) {
				dscouts.put(ds, new CategorizedOut(singleWriter, cb));
			}
			else {
				dscouts.put(ds, new CategorizedOut(coutPattern, cb));
			}
			ds.initDump(schemaName, tableName, keyCols, md);
		}
		//Writer w = new PrintWriter(System.out); //XXXxx: change to COut ? - [schemaname](?), [tablename], [changetype]
		identicalRowsCount = updateCount = dumpCount = deleteCount = sourceRowCount = targetRowCount = 0;
		long count = 0;
		boolean loggedLastLine = false;

		//header fos writer-independent syntaxes
		//XXX: what about stateful syntaxes?
		for(DiffSyntax ds: dss) {
			if(ds.isWriterIndependent()) {
				ds.dumpHeader();
			}
		}
		while(true) {
			if(readSource) {
				hasNextSource = source.next();
				if(hasNextSource) {
					sourceRowCount++;
					sourceVals = cast(SQLUtils.getRowObjectListFromRS(source, lsColTypes, numCol));
				}
			}
			if(readTarget) {
				hasNextTarget = target.next();
				if(hasNextTarget) {
					targetRowCount++;
					targetVals = cast(SQLUtils.getRowObjectListFromRS(target, lsColTypes, numCol));
				}
			}
			
			if(!hasNextSource && !hasNextTarget) { break; }
			
			int compare = 0;
			try {
				if(sourceVals==null && targetVals==null) {
					log.info("both source & target have no rows? source["+sourceRowCount+"]="+sourceVals+" ; target["+targetRowCount+"]="+targetVals);
					break;
				}
				else if(sourceVals==null) { compare = -1; }
				else if(targetVals==null) { compare = 1; }
				else {
					compare = compareVals(sourceVals, targetVals, keyIndexes);
				}
			}
			catch(NullPointerException e) {
				log.error("error comparing rows: source="+sourceVals+" ; target="+targetVals+" ; [ex="+e+"]");
				//e.printStackTrace();
				return;
			}
			catch(ClassCastException e) {
				log.error("error comparing rows: source="+sourceVals+" ; target="+targetVals+" ; [ex="+e+"]");
				//e.printStackTrace();
				return;
			}
			
			if(compare==0) {
				//same key
				readSource = readTarget = true;
				boolean updated = false;
				if(dumpUpdates) {
				for(DiffSyntax ds: dss) {
					//last 'updated' that counts...
					//TODO: compare (equals) should not be dumpSyntax responsability... or should it? no! so that 'getCategorizedWriter' may not be called
					CategorizedOut cout = dscouts.get(ds);
					updated = ds.dumpUpdateRowIfNotEquals(source, target, count, cout.getCategorizedWriter("", tableName, "update", ds.getDefaultFileExtension()));
				}
				log.debug("update? "+sourceVals+" / "+targetVals+(updated?" [updated]":"")+" // "+hasNextSource+"/"+hasNextTarget);
				}
				if(updated) { updateCount++; }
				else { identicalRowsCount++; }
			}
			else if(compare<0) {
				readSource = true; readTarget = false;
				if(hasNextSource) {
					if(dumpDeletes) {
					log.debug("delete: ->"+sourceVals+" / "+targetVals+" // "+hasNextSource+"/"+hasNextTarget);
					for(DiffSyntax ds: dss) {
						CategorizedOut cout = dscouts.get(ds);
						ds.dumpDeleteRow(source, count, cout.getCategorizedWriter("", tableName, "delete", ds.getDefaultFileExtension()));
					}
					}
					deleteCount++;
				}
				else {
					readSource = false; readTarget = true;
					if(dumpInserts) {
					log.debug("insert: "+sourceVals+" / ->"+targetVals+" // "+hasNextSource+"/"+hasNextTarget);
					for(DiffSyntax ds: dss) {
						CategorizedOut cout = dscouts.get(ds);
						ds.dumpRow(target, count, cout.getCategorizedWriter("", tableName, "insert", ds.getDefaultFileExtension()));
					}
					}
					dumpCount++;
				}
			}
			else {
				readSource = false; readTarget = true;
				if(hasNextTarget) {
					if(dumpInserts) {
					log.debug("insert: "+sourceVals+" / ->"+targetVals+" // "+hasNextSource+"/"+hasNextTarget);
					for(DiffSyntax ds: dss) {
						CategorizedOut cout = dscouts.get(ds);
						ds.dumpRow(target, count, cout.getCategorizedWriter("", tableName, "insert", ds.getDefaultFileExtension()));
					}
					}
					dumpCount++;
				}
				else {
					readSource = true; readTarget = false;
					if(dumpDeletes) {
					log.debug("delete: ->"+sourceVals+" / "+targetVals+" // "+hasNextSource+"/"+hasNextTarget);
					for(DiffSyntax ds: dss) {
						CategorizedOut cout = dscouts.get(ds);
						ds.dumpDeleteRow(source, count, cout.getCategorizedWriter("", tableName, "delete", ds.getDefaultFileExtension()));
					}
					}
					deleteCount++;
				}
				
				if(!hasNextTarget) { readSource = true; readTarget = false; }
			}
			
			count++;
			
			if( (logEachXLoops>0) && (count>0) && (count%logEachXLoops==0) ) { 
				log.info("[table="+fullTableName+"] "+count+" rows compared ; stats: "+getCompactStats());
				loggedLastLine = true;
			}
			else {
				loggedLastLine = false;
			}
			
			if(limit>0 && count>=limit) {
				log.info("limit reached: "+limit+" [table="+fullTableName+"]");
				break;
			}
		}
		if(!loggedLastLine) {
			log.info("[table="+fullTableName+"] "+count+" rows compared ; stats: "+getCompactStats());
		}
		
		//footer for writer-independent syntaxes
		//XXX: what about stateful syntaxes?
		for(DiffSyntax ds: dss) {
			if(ds.isWriterIndependent()) {
				ds.dumpFooter(sourceRowCount); //XXX: sourceRowCount is the best for ds.dumpFooter(, writer)?
			}
			else {
				CategorizedOut cout = dscouts.get(ds);
				Iterator it = cout.getAllOpenedWritersIterator();
				while(it.hasNext()) {
					Writer w = it.next();
					ds.dumpStats(dumpCount, updateCount, deleteCount, identicalRowsCount, sourceRowCount, targetRowCount, w);
					ds.dumpFooter(sourceRowCount, w);
					w.close();
				}
			}
		}
		
		//XXX ds.dumpFooter(w); ??? cout.getAllWriters() ?
		//w.flush();
	}
	
	public void setLimit(long limit) {
		this.limit = limit;
	}

	public int getIdenticalRowsCount() {
		return identicalRowsCount;
	}

	public int getUpdateCount() {
		return updateCount;
	}
	
	public int getDumpCount() {
		return dumpCount;
	}

	public int getDeleteCount() {
		return deleteCount;
	}

	public int getSourceRowCount() {
		return sourceRowCount;
	}

	public int getTargetRowCount() {
		return targetRowCount;
	}
	
	public String getStats() {
		return "dumps="+dumpCount+"; updates="+updateCount+"; deletes="+deleteCount
				+"; identicalRows="+identicalRowsCount
				+" [sourceCount="+sourceRowCount+"; targetCount="+targetRowCount+"]";
	}

	String getCompactStats() {
		return "IUDE/ST="+dumpCount+","+updateCount+","+deleteCount+","+identicalRowsCount
				+" / "+sourceRowCount+","+targetRowCount;
	}
	
	/*static String getKeyValue(ResultSet rs, List keyCols) {
		List keyvalue = new ArrayList();
		
		for(String col: keyCols) {
			keyvalue.add(rs.getS)
		}
	}*/
	
	@SuppressWarnings({ "rawtypes", "unchecked" })
	int compareVals(List vals1, List vals2, int[] keyIndexes) {
		int comp = 0;
		for(int i=0;i cast(List list) {
		List ret = new ArrayList();
		for(Object o: list) {
			ret.add((Comparable)o);
		}
		return ret;
	}
}