All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.rep.util.ldiff.LDiff Maven / Gradle / Ivy

The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.rep.util.ldiff;

import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.logging.Level;

import com.sleepycat.je.Cursor;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseNotFoundException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.rep.impl.node.NameIdPair;
import com.sleepycat.je.rep.net.DataChannel;
import com.sleepycat.je.rep.net.DataChannelFactory;
import com.sleepycat.je.rep.net.DataChannelFactory.ConnectOptions;
import com.sleepycat.je.rep.utilint.BinaryProtocol.ProtocolException;
import com.sleepycat.je.rep.utilint.ServiceDispatcher;
import com.sleepycat.je.rep.utilint.ServiceDispatcher.ServiceConnectFailedException;
import com.sleepycat.je.rep.utilint.net.SimpleChannelFactory;
import com.sleepycat.je.utilint.CmdUtil;
import com.sleepycat.je.utilint.LoggerUtils;

/**
 * LDiff provides a mechanism for efficiently comparing two quiescent
 * databases, typically residing on different machines connected by a
 * network. The comparison is done at the logical level rather than the
 * physical level, so that we can compare the contents of replicated databases
 * where the logical contents may be identical, but the physical logs may be
 * very different. If the databases are found to be different, it provides
 * information that would help identify the specific nature of the differences.
 *
 * This class provides the external API used to initiate a comparison.
 *
 * For details, please review the document at:
 *
 * @see 
 *      LDiff 
 */
public class LDiff {

    private LDiffConfig cfg;
    private File home1, home2;
    private String file1, file2;
    private DiffTracker tracker;

    private static final String usageString = "usage: " +
        CmdUtil.getJavaCommand(LDiff.class) +
        "\n" +
        "  -h [,]   # environment home directory\n" +
        "  [-a]                # analyze diff\n" +
        "  [-b ]    # number of records to put in each block\n" +
        "  [-m ]    # abort diff after a number of errors\n" +
        "  [-s ,] # database(s) to compare\n" +
        "  [-q]                # be quiet, do not print to stdout";

    private static final int SOCKET_TIMEOUT_MS = 10000;

    /**
     * The main used by the LDiff utility.
     *
     * @param args The arguments accepted by the LDiff utility.
     *
     * 
     * usage: java com.sleepycat.je.rep.util.ldiff.LDiff
     *             [-s database1,database2] -h dbEnvHome1[,dbEnvHome2]
     *             [-a] [-b blockSize] [-m maxErrors] [-q]
     * 
* *

* -a - generate an analysis of the differences
* -b blockSize - the number of records to compare at one time
* -h dbEnvHome - the directory or directories containing environment(s) in * which to perform the ldiff
* -m maxErrors - the maximum number of errors to detect before declaring * the databases different and ending the operation.
* -s database1,database2 - the databases to ldiff.
* -q - be quiet, do not write to stdout *

*

* If ldiff-ing a specific database, two database names must be specified. * If no database names are given, two environments must be specified. If * two database names and two environments are specified, the first * database is opened in the first environment and the second database is * opened in the second environment. *

*/ public static void main(String[] args) { LDiff differ = new LDiff(); differ.parseArgs(args); try { if (differ.diff()) { System.exit(0); } else { System.exit(1); } } catch (Exception e) { e.printStackTrace(); System.exit(1); } } private void parseArgs(String[] argv) { cfg = new LDiffConfig(); cfg.setVerbose(true); int argc = 0; int nArgs = argv.length; while (argc < nArgs) { String thisArg = argv[argc++]; if (thisArg.equals("-a")) { cfg.setDiffAnalysis(true); } else if (thisArg.equals("-b")) { if (argc < nArgs) { try { cfg.setBlockSize(Integer.parseInt(argv[argc++])); } catch (NumberFormatException nfe) { printUsage("-b requires an integer argument"); } } else { printUsage("-b requires an argument"); } } else if (thisArg.equals("-h")) { if (argc < nArgs) { String[] envDirs = argv[argc++].split(","); if (envDirs.length > 2) { printUsage("Only 2 environments supported"); } home1 = new File(envDirs[0]); if (envDirs.length == 2) { home2 = new File(envDirs[1]); } } else { printUsage("-h requires an argument"); } } else if (thisArg.equals("-m")) { if (argc < nArgs) { try { cfg.setMaxErrors(Integer.parseInt(argv[argc++])); } catch (NumberFormatException nfe) { printUsage("-m requires an integer argument"); } } else { printUsage("-m requires an argument"); } } else if (thisArg.equals("-s")) { if (argc < nArgs) { String[] dbNames = argv[argc++].split(","); if (dbNames.length != 2) { printUsage("-s requires two database names"); } file1 = dbNames[0]; file2 = dbNames[1]; } else { printUsage("-s requires an argument"); } } else if (thisArg.equals("-q")) { cfg.setVerbose(false); } else { printUsage(thisArg + " is not a valid option."); } } if (home1 == null) { printUsage("-h is a required argument"); } if (home2 == null && file1 == null) { printUsage("2 databases must be specified with 1 environment"); } } private void printUsage(String msg) { System.err.println(msg); System.err.println(usageString); System.exit(-1); } private LDiff() { } /** * Configure a new object with which to compare two databases. * * @param cfg the configuration parameters for the new object. */ public LDiff(LDiffConfig cfg) { super(); this.cfg = cfg; } /* * Run an LDiff called from the command line. What actually gets diffed * depends upon the args passed in, either 2 environments, databases in two * separate environments or databases in the same environment. */ private boolean diff() throws Exception { EnvironmentConfig envConfiguration = new EnvironmentConfig(); envConfiguration.setReadOnly(true); envConfiguration.setCachePercent(40); Environment env1 = new Environment(home1, envConfiguration); DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setReadOnly(true); DbInternal.setUseExistingConfig(dbConfig, true); Database db2; if (home2 != null) { Environment env2 = new Environment(home2, envConfiguration); if (file1 == null) { /* No dbs given, ldiff the environments. */ boolean ret = diff(env1, env2); env1.close(); env2.close(); return ret; } db2 = env2.openDatabase(null, file2, dbConfig); } else { db2 = env1.openDatabase(null, file2, dbConfig); } Database db1 = env1.openDatabase(null, file1, dbConfig); boolean ret = diff(db1, db2); db1.close(); db2.close(); env1.close(); return ret; } /** * A mechanism for efficiently comparing all databases in two quiescent * environments. * * @param env1 a valid, open Environment handle * @param env2 a valid, open Environment handle * @return true if all databases in env1 and env2 are identical * @throws Exception */ public boolean diff(Environment env1, Environment env2) throws Exception { List env1names = env1.getDatabaseNames(); List env2names = env2.getDatabaseNames(); boolean ret = (env1names.size() == env2names.size()); if (!ret) { output("Environments have different number of databases."); } for (String dbName : env1names) { DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setReadOnly(true); DbInternal.setUseExistingConfig(dbConfig, true); Database db1, db2; try { db1 = env1.openDatabase(null, dbName, dbConfig); } catch (DatabaseNotFoundException e) { /* Should never happen, ExclusiveCreate is false. */ throw EnvironmentFailureException.unexpectedException(e); } try { db2 = env2.openDatabase(null, dbName, dbConfig); } catch (DatabaseNotFoundException e) { /* There's a database in env1 that's not in env2. */ db1.close(); output(dbName + " does not exist in " + env2.getHome().getName()); ret = false; continue; } if (!diff(db1, db2)) { ret = false; } db1.close(); db2.close(); } if (ret) { output("No differences exist between the two environments."); } else { output("Differences exist between the two environments."); } return ret; } /** * A mechanism for efficiently comparing two quiescent databases. * * @param db1 a valid, open Database handle * @param db2 a valid, open Database handle * @return true if the db1 and db2 are identical * @throws Exception */ public boolean diff(Database db1, Database db2) throws Exception { BlockBag bag = createBlockBag(db2); final boolean ret = diff(db1, bag); if (cfg.getVerbose()) { final String db1Name = db1.getDatabaseName(); final String db2Name = db2.getDatabaseName(); final boolean namesMatch = db1Name.equals(db2Name); if (ret) { if (namesMatch) { output("No differences in " + db1Name); } else { output(db1Name + " matches " + db2Name); } } else { if (namesMatch) { output("Differences in " + db1Name); } else { output(db1Name + " does not match " + db2Name); } } } /* Do the analysis for these two databases. */ if (cfg.getDiffAnalysis() && tracker.getDiffRegions().size() != 0) { DiffRecordAnalyzer.doAnalysis(db1, db2, tracker, cfg.getVerbose()); } return ret; } /** * A mechanism for efficiently comparing two quiescent environments, one * local and one on a remote machine. This method assumes that only basic, * unauthenticated communication is in use. * * @param env a valid, open Environment handle * @param addr the address of the remote machine * @return true if all the databases in both environments are the same * @throws IOException if a network error occurs * @throws ProtocolException if an unexpected message is received * @throws ServiceConnectFailedException if the remote service was busy * @throws Exception */ public boolean diff(Environment env, InetSocketAddress addr) throws IOException, ProtocolException, ServiceConnectFailedException, Exception { return diff(env, addr, new SimpleChannelFactory()); } /** * A mechanism for efficiently comparing two quiescent environments, one * local and one on a remote machine. * * @param env a valid, open Environment handle * @param addr the address of the remote machine * @param dcFactory the channel factory for connection creation * @return true if all the databases in both environments are the same * @throws IOException if a network error occurs * @throws ProtocolException if an unexpected message is received * @throws ServiceConnectFailedException if the remote service was busy * @throws Exception */ public boolean diff(Environment env, InetSocketAddress addr, DataChannelFactory dcFactory) throws IOException, ProtocolException, ServiceConnectFailedException, Exception { List envNames = env.getDatabaseNames(); boolean ret = true; DataChannel channel = connect(addr, dcFactory); final Protocol protocol = new Protocol( new NameIdPair("Ldiff", -1), DbInternal.getNonNullEnvImpl(env)); protocol.write(protocol.new EnvDiff(), channel); /* * Check that the number of local databases matches the number of * remote databases. This is how we detect a remote db that doesn't * exist locally. */ Protocol.EnvInfo msg = protocol.read(channel, Protocol.EnvInfo.class); ret = (envNames.size() == msg.getNumberOfDBs()); if (!ret) { output("Number of databases in local and remote environments " + "does not match."); } channel.close(); /* * Run LDiff for every database in the local environment. If they all * succeed, the environments match. */ for (String dbName : envNames) { channel = connect(addr, dcFactory); DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setReadOnly(true); DbInternal.setUseExistingConfig(dbConfig, true); Database db; try { db = env.openDatabase(null, dbName, dbConfig); } catch (DatabaseNotFoundException e) { /* Should never happen, ExclusiveCreate is false. */ throw EnvironmentFailureException.unexpectedException(e); } try { if (!diff(db, channel)) { ret = false; } } catch (ProtocolException pe) { output(dbName + " does not exist in remote environment."); ret = false; } finally { db.close(); if (channel.isOpen()) { channel.close(); } } } if (ret) { output("Local environment matches remote."); } else { output("Local environment does not match remote."); } return ret; } /** * A mechanism for efficiently comparing two quiescent databases, one of * which resides on a remote machine. * * @param db a valid, open Database handle * @param addr the address of the remote host * @param dcFactory the channel factory for connection creation * @return true if the local Database and the remote Database are identical * @throws IOException if a network error occurs * @throws ProtocolException if the remote database does not exist * @throws ServiceConnectFailedException if the remote service is busy * @throws Exception */ public boolean diff(Database db, InetSocketAddress addr, DataChannelFactory dcFactory) throws IOException, ProtocolException, ServiceConnectFailedException, Exception { final DataChannel channel = connect(addr, dcFactory); boolean ret; try { ret = diff(db, channel); } finally { channel.close(); } return ret; } private boolean diff(Database db, DataChannel channel) throws IOException, ProtocolException, Exception { final Protocol protocol = new Protocol( new NameIdPair("Ldiff", -1), DbInternal.getNonNullEnvImpl(db.getEnvironment())); protocol.write(protocol.new DbBlocks (db.getDatabaseName(), cfg.getBlockSize()), channel); /* * A protocol exception will be thrown here if the remote env does not * have a database of the same name. */ protocol.read(channel, Protocol.BlockListStart.class); BlockBag bag = new BlockBag(); Protocol.BlockInfo blockMsg; while (true) { try { blockMsg = protocol.read(channel, Protocol.BlockInfo.class); bag.add(blockMsg.getBlock()); } catch (ProtocolException pe) { if (pe.getUnexpectedMessage().getOp() != Protocol.BLOCK_LIST_END) { throw pe; } break; } } boolean match = diff(db, bag); if (match) { output(db.getDatabaseName() + " matches remote database."); } else { output(db.getDatabaseName() + "does not match remote database."); } if (cfg.getDiffAnalysis() && tracker.getDiffRegions().size() != 0) { DiffRecordAnalyzer.doAnalysis (db, protocol, channel, tracker, cfg.getVerbose()); } protocol.write(protocol.new Done(), channel); return match; } /** * A mechanism for efficiently comparing two quiescent databases, typically * residing on different machines connected by a network. * * @param db a valid, open Database handle * @param blkBag a bag of blocks to diff against db. * * @return true if the two comparing databases are identical. * @throws Exception */ public boolean diff(Database db, BlockBag blkBag) throws Exception { /* Suppose the two comparing databases are identical, by default. */ boolean identical = true; /* * window represents a BlockSize window into db. Initialize it to * represent the block starting at the first key in db. */ Cursor cursor = db.openCursor(null, null); long pos = 1; int numKeys = cfg.getBlockSize(); Window window = new Window(cursor, numKeys); int errors = 0; int maxerrors = cfg.getMaxErrors(); tracker = new DiffTracker(numKeys); while (window.getChecksum() != 0 && blkBag.size() > 0) { /* * Find the block in the bag whose checksum and md5 match the * current window. This block, if it exists, is match. */ Block match = findMatch(db.getEnvironment(), blkBag, window); if (match != null) { tracker.setBlockDiffBegin (blkBag.getBlock(), blkBag.getBlockIndex()); /* Remove match and any earlier blocks from the bag. */ List removed = blkBag.remove(match); if (removed != null) { identical = false; errors += removed.size(); tracker.calBlockDiffSize(blkBag.getBlockIndex()); if (maxerrors > 0 && errors >= maxerrors) { break; } } tracker.addDiffRegion(window); /* Advance the window beyond the just matched block. */ window.nextWindow(); pos += window.size(); continue; } identical = false; LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Unmatched block at position " + pos); errors++; if (maxerrors > 0 && errors >= maxerrors) { break; } /* Roll the window forward by one key. */ window.rollWindow(); if (window.getChecksum() != 0) { pos++; } } cursor.close(); if (window.getChecksum() != 0) { /* * We ran out of blocks in blkBag before we got to the end of db. * Update the unmatched key range. */ LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Local Db has addtional records starting at " + pos + "."); identical = false; tracker.addWindowAdditionalDiffs(window); } if (blkBag.size() > 0) { /* All remaining blocks in the bag are unmatched. */ for (Block b : blkBag) { LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Unmatched remote block: " + b); } identical = false; tracker.addBlockBagAdditionalDiffs(window, blkBag); } return identical; } /* For unit test only. */ public List getDiffRegions() { if (tracker == null) { return null; } return tracker.getDiffRegions(); } /** * Find the block in the bag whose rolling checksum and md5 match the given * window. The md5 for the window is computed lazily, since it's more * expensive to compute. There is a slight chance that multiple blocks in * the bag will match the checksum and the md5 hash, return the first such * block added to the bag in that case. * * @param blkBag a bag of blocks to search for a match * @param window the block sized window of the db we're diffing * @return A block which matches the window's checksum and the window's md5 * hash, or null if no block matches. */ private Block findMatch(Environment env, BlockBag blkBag, Window window) { List matches = blkBag.get(window.getChecksum()); if (matches == null) { return null; } /* Delay the computation of the hash until we know we need it. */ byte[] md5 = window.getMd5Hash(); for (Block b : matches) { if (Arrays.equals(b.getMd5Hash(), md5)) { return b; } LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(env), "Found a remote block whose rolling checksum " + "matches LB but md5 hash doesn't:" + b); } /* No matches. */ return null; } /** * Create a bag of blocks from the records in a given database, using the * configuration parameters specified when the LDiff object was created. * * @param db the database from which to create the bag of blocks * @return a bag of blocks built from the records in db */ public BlockBag createBlockBag(Database db) { BlockBag bag = new BlockBag(); /* Retrieve the key/data pairs and fill into blocks. */ long start = System.currentTimeMillis(); Iterator iter = iterator(db); while (iter.hasNext()) { bag.add(iter.next()); } long end = System.currentTimeMillis(); LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Block bag created in : " + (end - start) + " ms."); return bag; } public Iterator iterator(Database db) { return new LDiffIterator(db); } /** * Connect to addr and perform a service handshake. Retry as specified by * the config object. * * @param addr the remote address to connect to * @param dcFactory the channel factory for connection creation * @return an open DataChannel * @throws IOException if an exception occurs with the DataChannel * @throws ServiceConnectFailedException if the remote service is busy */ private DataChannel connect(InetSocketAddress addr, DataChannelFactory dcFactory) throws IOException, ServiceConnectFailedException { int triesLeft = cfg.getMaxConnectionAttempts(); DataChannel ret = null; while (true) { try { ret = dcFactory.connect(addr, null, new ConnectOptions(). setBlocking(true). setTcpNoDelay(true). setOpenTimeout(SOCKET_TIMEOUT_MS). setReadTimeout(SOCKET_TIMEOUT_MS)); ServiceDispatcher.doServiceHandshake(ret, LDiffService.NAME); break; } catch (ServiceConnectFailedException scfe) { if ((ret != null) && ret.isOpen()) { ret.close(); } /* * Unable to connect because the remote service is busy. If * the user requested it, keep re-trying. triesLeft == -1 * means never abort. */ if (triesLeft > 0) { triesLeft--; } if (!cfg.getWaitIfBusy() || triesLeft == 0) { throw scfe; } } } return ret; } private void output(String msg) { if (cfg.getVerbose()) { System.out.println(msg); } } /** * The exception that is thrown when a database diff detects differences. * * TODO: we start simple, by just using it as a boolean indicator and * perhaps a block id for unit test purposes? As as the local processing * gains in sophistication will provide block and key (insert, update, * delete) granularity identification of differences. */ @SuppressWarnings("serial") class MismatchException extends Exception { public MismatchException(String message) { super(message); } } private class LDiffIterator implements Iterator { private Block cached; private Cursor cursor; private final Database db; private DatabaseEntry lastKey, lastData; private boolean more; private int i; private final int numKeys; public LDiffIterator(Database db) { i = 0; numKeys = LDiff.this.cfg.getBlockSize(); cached = null; more = true; this.db = db; /* Prime the pump, get the first block in cached and set more. */ next(); } @Override public boolean hasNext() { return more; } @Override public void remove() { } @Override public Block next() { if (!more) { throw new NoSuchElementException(); } /* * We don't want to return a block with 0 keys, but we can't know * ahead of time whether the block will have any keys and the user * likely called hasNext() already. So when asked for block i, we * cache block i+1, check whether it's empty and return the * previously cached block i. If block i+1 is empty, the next call * to hasNext() will return false and the empty block won't be * returned. */ cursor = db.openCursor(null, null); if (lastKey == null) { lastKey = new DatabaseEntry(); lastData = new DatabaseEntry(); } else { cursor.getSearchBoth(lastKey, lastData, null); } Block ret = cached; cached = LDiffUtil.readBlock(i++, cursor, numKeys); if (cached.numRecords == 0) { more = false; } else { cursor.getCurrent(lastKey, lastData, null); } cursor.close(); return ret; } @Override protected void finalize() throws Throwable { try { cursor.close(); } finally { super.finalize(); } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy