All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.rep.util.ldiff.LDiff Maven / Gradle / Ivy

Go to download

Berkeley DB Java Edition is a open source, transactional storage solution for Java applications. The Direct Persistence Layer (DPL) API is faster and easier to develop, deploy, and manage than serialized object files or ORM-based Java persistence solutions. The Collections API enhances the standard java.util.collections classes allowing them to be persisted to a local file system and accessed concurrently while protected by ACID transactions. Data is stored by serializing objects and managing class and instance data separately so as not to waste space. Berkeley DB Java Edition is the reliable drop-in solution for complex, fast, and scalable storage. Source for this release is in 'je-4.0.92-sources.jar', the Javadoc is located at 'http://download.oracle.com/berkeley-db/docs/je/4.0.92/'.

There is a newer version: 5.0.73
Show newest version
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.rep.util.ldiff;

import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.logging.Level;

import com.sleepycat.je.Cursor;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseNotFoundException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentFailureException;
import com.sleepycat.je.rep.impl.node.NameIdPair;
import com.sleepycat.je.rep.net.DataChannel;
import com.sleepycat.je.rep.net.DataChannelFactory;
import com.sleepycat.je.rep.net.DataChannelFactory.ConnectOptions;
import com.sleepycat.je.rep.utilint.BinaryProtocol.ProtocolException;
import com.sleepycat.je.rep.utilint.ServiceDispatcher;
import com.sleepycat.je.rep.utilint.ServiceDispatcher.ServiceConnectFailedException;
import com.sleepycat.je.rep.utilint.net.SimpleChannelFactory;
import com.sleepycat.je.utilint.CmdUtil;
import com.sleepycat.je.utilint.LoggerUtils;

/**
 * LDiff provides a mechanism for efficiently comparing two quiescent
 * databases, typically residing on different machines connected by a
 * network. The comparison is done at the logical level rather than the
 * physical level, so that we can compare the contents of replicated databases
 * where the logical contents may be identical, but the physical logs may be
 * very different. If the databases are found to be different, it provides
 * information that would help identify the specific nature of the differences.
 *
 * This class provides the external API used to initiate a comparison.
 *
 * For details, please review the document at:
 *
 * @see 
 *      LDiff 
 */
public class LDiff {

    private LDiffConfig cfg;
    private File home1, home2;
    private String file1, file2;
    private DiffTracker tracker;

    private static final String usageString = "usage: " +
        CmdUtil.getJavaCommand(LDiff.class) +
        "\n" +
        "  -h [,]   # environment home directory\n" +
        "  [-a]                # analyze diff\n" +
        "  [-b ]    # number of records to put in each block\n" +
        "  [-m ]    # abort diff after a number of errors\n" +
        "  [-s ,] # database(s) to compare\n" +
        "  [-q]                # be quiet, do not print to stdout";

    private static final int SOCKET_TIMEOUT_MS = 10000;

    /**
     * The main used by the LDiff utility.
     *
     * @param args The arguments accepted by the LDiff utility.
     *
     * 
     * usage: java com.sleepycat.je.rep.util.ldiff.LDiff
     *             [-s database1,database2] -h dbEnvHome1[,dbEnvHome2]
     *             [-a] [-b blockSize] [-m maxErrors] [-q]
     * 
* *

* -a - generate an analysis of the differences
* -b blockSize - the number of records to compare at one time
* -h dbEnvHome - the directory or directories containing environment(s) in * which to perform the ldiff
* -m maxErrors - the maximum number of errors to detect before declaring * the databases different and ending the operation.
* -s database1,database2 - the databases to ldiff.
* -q - be quiet, do not write to stdout *

*

* If ldiff-ing a specific database, two database names must be specified. * If no database names are given, two environments must be specified. If * two database names and two environments are specified, the first * database is opened in the first environment and the second database is * opened in the second environment. *

*/ public static void main(String[] args) { LDiff differ = new LDiff(); differ.parseArgs(args); try { if (differ.diff()) { System.exit(0); } else { System.exit(1); } } catch (Exception e) { e.printStackTrace(); System.exit(1); } } private void parseArgs(String[] argv) { cfg = new LDiffConfig(); cfg.setVerbose(true); int argc = 0; int nArgs = argv.length; while (argc < nArgs) { String thisArg = argv[argc++]; if (thisArg.equals("-a")) { cfg.setDiffAnalysis(true); } else if (thisArg.equals("-b")) { if (argc < nArgs) { try { cfg.setBlockSize(Integer.parseInt(argv[argc++])); } catch (NumberFormatException nfe) { printUsage("-b requires an integer argument"); } } else { printUsage("-b requires an argument"); } } else if (thisArg.equals("-h")) { if (argc < nArgs) { String[] envDirs = argv[argc++].split(","); if (envDirs.length > 2) { printUsage("Only 2 environments supported"); } home1 = new File(envDirs[0]); if (envDirs.length == 2) { home2 = new File(envDirs[1]); } } else { printUsage("-h requires an argument"); } } else if (thisArg.equals("-m")) { if (argc < nArgs) { try { cfg.setMaxErrors(Integer.parseInt(argv[argc++])); } catch (NumberFormatException nfe) { printUsage("-m requires an integer argument"); } } else { printUsage("-m requires an argument"); } } else if (thisArg.equals("-s")) { if (argc < nArgs) { String[] dbNames = argv[argc++].split(","); if (dbNames.length != 2) { printUsage("-s requires two database names"); } file1 = dbNames[0]; file2 = dbNames[1]; } else { printUsage("-s requires an argument"); } } else if (thisArg.equals("-q")) { cfg.setVerbose(false); } else { printUsage(thisArg + " is not a valid option."); } } if (home1 == null) { printUsage("-h is a required argument"); } if (home2 == null && file1 == null) { printUsage("2 databases must be specified with 1 environment"); } } private void printUsage(String msg) { System.err.println(msg); System.err.println(usageString); System.exit(-1); } private LDiff() { } /** * Configure a new object with which to compare two databases. * * @param cfg the configuration parameters for the new object. */ public LDiff(LDiffConfig cfg) { super(); this.cfg = cfg; } /* * Run an LDiff called from the command line. What actually gets diffed * depends upon the args passed in, either 2 environments, databases in two * separate environments or databases in the same environment. */ private boolean diff() throws Exception { EnvironmentConfig envConfiguration = new EnvironmentConfig(); envConfiguration.setReadOnly(true); envConfiguration.setCachePercent(40); Environment env1 = new Environment(home1, envConfiguration); DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setReadOnly(true); DbInternal.setUseExistingConfig(dbConfig, true); Database db2; if (home2 != null) { Environment env2 = new Environment(home2, envConfiguration); if (file1 == null) { /* No dbs given, ldiff the environments. */ boolean ret = diff(env1, env2); env1.close(); env2.close(); return ret; } db2 = env2.openDatabase(null, file2, dbConfig); } else { db2 = env1.openDatabase(null, file2, dbConfig); } Database db1 = env1.openDatabase(null, file1, dbConfig); boolean ret = diff(db1, db2); db1.close(); db2.close(); env1.close(); return ret; } /** * A mechanism for efficiently comparing all databases in two quiescent * environments. * * @param env1 a valid, open Environment handle * @param env2 a valid, open Environment handle * @return true if all databases in env1 and env2 are identical * @throws Exception */ public boolean diff(Environment env1, Environment env2) throws Exception { List env1names = env1.getDatabaseNames(); List env2names = env2.getDatabaseNames(); boolean ret = (env1names.size() == env2names.size()); if (!ret) { output("Environments have different number of databases."); } for (String dbName : env1names) { DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setReadOnly(true); DbInternal.setUseExistingConfig(dbConfig, true); Database db1, db2; try { db1 = env1.openDatabase(null, dbName, dbConfig); } catch (DatabaseNotFoundException e) { /* Should never happen, ExclusiveCreate is false. */ throw EnvironmentFailureException.unexpectedException(e); } try { db2 = env2.openDatabase(null, dbName, dbConfig); } catch (DatabaseNotFoundException e) { /* There's a database in env1 that's not in env2. */ db1.close(); output(dbName + " does not exist in " + env2.getHome().getName()); ret = false; continue; } if (!diff(db1, db2)) { ret = false; } db1.close(); db2.close(); } if (ret) { output("No differences exist between the two environments."); } else { output("Differences exist between the two environments."); } return ret; } /** * A mechanism for efficiently comparing two quiescent databases. * * @param db1 a valid, open Database handle * @param db2 a valid, open Database handle * @return true if the db1 and db2 are identical * @throws Exception */ public boolean diff(Database db1, Database db2) throws Exception { BlockBag bag = createBlockBag(db2); final boolean ret = diff(db1, bag); if (cfg.getVerbose()) { final String db1Name = db1.getDatabaseName(); final String db2Name = db2.getDatabaseName(); final boolean namesMatch = db1Name.equals(db2Name); if (ret) { if (namesMatch) { output("No differences in " + db1Name); } else { output(db1Name + " matches " + db2Name); } } else { if (namesMatch) { output("Differences in " + db1Name); } else { output(db1Name + " does not match " + db2Name); } } } /* Do the analysis for these two databases. */ if (cfg.getDiffAnalysis() && tracker.getDiffRegions().size() != 0) { DiffRecordAnalyzer.doAnalysis(db1, db2, tracker, cfg.getVerbose()); } return ret; } /** * A mechanism for efficiently comparing two quiescent environments, one * local and one on a remote machine. This method assumes that only basic, * unauthenticated communication is in use. * * @param env a valid, open Environment handle * @param addr the address of the remote machine * @return true if all the databases in both environments are the same * @throws IOException if a network error occurs * @throws ProtocolException if an unexpected message is received * @throws ServiceConnectFailedException if the remote service was busy * @throws Exception */ public boolean diff(Environment env, InetSocketAddress addr) throws IOException, ProtocolException, ServiceConnectFailedException, Exception { return diff(env, addr, new SimpleChannelFactory()); } /** * A mechanism for efficiently comparing two quiescent environments, one * local and one on a remote machine. * * @param env a valid, open Environment handle * @param addr the address of the remote machine * @param dcFactory the channel factory for connection creation * @return true if all the databases in both environments are the same * @throws IOException if a network error occurs * @throws ProtocolException if an unexpected message is received * @throws ServiceConnectFailedException if the remote service was busy * @throws Exception */ public boolean diff(Environment env, InetSocketAddress addr, DataChannelFactory dcFactory) throws IOException, ProtocolException, ServiceConnectFailedException, Exception { List envNames = env.getDatabaseNames(); boolean ret = true; DataChannel channel = connect(addr, dcFactory); final Protocol protocol = new Protocol( new NameIdPair("Ldiff", -1), DbInternal.getNonNullEnvImpl(env)); protocol.write(protocol.new EnvDiff(), channel); /* * Check that the number of local databases matches the number of * remote databases. This is how we detect a remote db that doesn't * exist locally. */ Protocol.EnvInfo msg = protocol.read(channel, Protocol.EnvInfo.class); ret = (envNames.size() == msg.getNumberOfDBs()); if (!ret) { output("Number of databases in local and remote environments " + "does not match."); } channel.close(); /* * Run LDiff for every database in the local environment. If they all * succeed, the environments match. */ for (String dbName : envNames) { channel = connect(addr, dcFactory); DatabaseConfig dbConfig = new DatabaseConfig(); dbConfig.setReadOnly(true); DbInternal.setUseExistingConfig(dbConfig, true); Database db; try { db = env.openDatabase(null, dbName, dbConfig); } catch (DatabaseNotFoundException e) { /* Should never happen, ExclusiveCreate is false. */ throw EnvironmentFailureException.unexpectedException(e); } try { if (!diff(db, channel)) { ret = false; } } catch (ProtocolException pe) { output(dbName + " does not exist in remote environment."); ret = false; } finally { db.close(); if (channel.isOpen()) { channel.close(); } } } if (ret) { output("Local environment matches remote."); } else { output("Local environment does not match remote."); } return ret; } /** * A mechanism for efficiently comparing two quiescent databases, one of * which resides on a remote machine. * * @param db a valid, open Database handle * @param addr the address of the remote host * @param dcFactory the channel factory for connection creation * @return true if the local Database and the remote Database are identical * @throws IOException if a network error occurs * @throws ProtocolException if the remote database does not exist * @throws ServiceConnectFailedException if the remote service is busy * @throws Exception */ public boolean diff(Database db, InetSocketAddress addr, DataChannelFactory dcFactory) throws IOException, ProtocolException, ServiceConnectFailedException, Exception { final DataChannel channel = connect(addr, dcFactory); boolean ret; try { ret = diff(db, channel); } finally { channel.close(); } return ret; } private boolean diff(Database db, DataChannel channel) throws IOException, ProtocolException, Exception { final Protocol protocol = new Protocol( new NameIdPair("Ldiff", -1), DbInternal.getNonNullEnvImpl(db.getEnvironment())); protocol.write(protocol.new DbBlocks (db.getDatabaseName(), cfg.getBlockSize()), channel); /* * A protocol exception will be thrown here if the remote env does not * have a database of the same name. */ protocol.read(channel, Protocol.BlockListStart.class); BlockBag bag = new BlockBag(); Protocol.BlockInfo blockMsg; while (true) { try { blockMsg = protocol.read(channel, Protocol.BlockInfo.class); bag.add(blockMsg.getBlock()); } catch (ProtocolException pe) { if (pe.getUnexpectedMessage().getOp() != Protocol.BLOCK_LIST_END) { throw pe; } break; } } boolean match = diff(db, bag); if (match) { output(db.getDatabaseName() + " matches remote database."); } else { output(db.getDatabaseName() + "does not match remote database."); } if (cfg.getDiffAnalysis() && tracker.getDiffRegions().size() != 0) { DiffRecordAnalyzer.doAnalysis (db, protocol, channel, tracker, cfg.getVerbose()); } protocol.write(protocol.new Done(), channel); return match; } /** * A mechanism for efficiently comparing two quiescent databases, typically * residing on different machines connected by a network. * * @param db a valid, open Database handle * @param blkBag a bag of blocks to diff against db. * * @return true if the two comparing databases are identical. * @throws Exception */ public boolean diff(Database db, BlockBag blkBag) throws Exception { /* Suppose the two comparing databases are identical, by default. */ boolean identical = true; /* * window represents a BlockSize window into db. Initialize it to * represent the block starting at the first key in db. */ Cursor cursor = db.openCursor(null, null); long pos = 1; int numKeys = cfg.getBlockSize(); Window window = new Window(cursor, numKeys); int errors = 0; int maxerrors = cfg.getMaxErrors(); tracker = new DiffTracker(numKeys); while (window.getChecksum() != 0 && blkBag.size() > 0) { /* * Find the block in the bag whose checksum and md5 match the * current window. This block, if it exists, is match. */ Block match = findMatch(db.getEnvironment(), blkBag, window); if (match != null) { tracker.setBlockDiffBegin (blkBag.getBlock(), blkBag.getBlockIndex()); /* Remove match and any earlier blocks from the bag. */ List removed = blkBag.remove(match); if (removed != null) { identical = false; errors += removed.size(); tracker.calBlockDiffSize(blkBag.getBlockIndex()); if (maxerrors > 0 && errors >= maxerrors) { break; } } tracker.addDiffRegion(window); /* Advance the window beyond the just matched block. */ window.nextWindow(); pos += window.size(); continue; } identical = false; LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Unmatched block at position " + pos); errors++; if (maxerrors > 0 && errors >= maxerrors) { break; } /* Roll the window forward by one key. */ window.rollWindow(); if (window.getChecksum() != 0) { pos++; } } cursor.close(); if (window.getChecksum() != 0) { /* * We ran out of blocks in blkBag before we got to the end of db. * Update the unmatched key range. */ LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Local Db has addtional records starting at " + pos + "."); identical = false; tracker.addWindowAdditionalDiffs(window); } if (blkBag.size() > 0) { /* All remaining blocks in the bag are unmatched. */ for (Block b : blkBag) { LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Unmatched remote block: " + b); } identical = false; tracker.addBlockBagAdditionalDiffs(window, blkBag); } return identical; } /* For unit test only. */ public List getDiffRegions() { if (tracker == null) { return null; } return tracker.getDiffRegions(); } /** * Find the block in the bag whose rolling checksum and md5 match the given * window. The md5 for the window is computed lazily, since it's more * expensive to compute. There is a slight chance that multiple blocks in * the bag will match the checksum and the md5 hash, return the first such * block added to the bag in that case. * * @param blkBag a bag of blocks to search for a match * @param window the block sized window of the db we're diffing * @return A block which matches the window's checksum and the window's md5 * hash, or null if no block matches. */ private Block findMatch(Environment env, BlockBag blkBag, Window window) { List matches = blkBag.get(window.getChecksum()); if (matches == null) { return null; } /* Delay the computation of the hash until we know we need it. */ byte[] md5 = window.getMd5Hash(); for (Block b : matches) { if (Arrays.equals(b.getMd5Hash(), md5)) { return b; } LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(env), "Found a remote block whose rolling checksum " + "matches LB but md5 hash doesn't:" + b); } /* No matches. */ return null; } /** * Create a bag of blocks from the records in a given database, using the * configuration parameters specified when the LDiff object was created. * * @param db the database from which to create the bag of blocks * @return a bag of blocks built from the records in db */ public BlockBag createBlockBag(Database db) { BlockBag bag = new BlockBag(); /* Retrieve the key/data pairs and fill into blocks. */ long start = System.currentTimeMillis(); Iterator iter = iterator(db); while (iter.hasNext()) { bag.add(iter.next()); } long end = System.currentTimeMillis(); LoggerUtils.envLogMsg (Level.FINE, DbInternal.getNonNullEnvImpl(db.getEnvironment()), "Block bag created in : " + (end - start) + " ms."); return bag; } public Iterator iterator(Database db) { return new LDiffIterator(db); } /** * Connect to addr and perform a service handshake. Retry as specified by * the config object. * * @param addr the remote address to connect to * @param dcFactory the channel factory for connection creation * @return an open DataChannel * @throws IOException if an exception occurs with the DataChannel * @throws ServiceConnectFailedException if the remote service is busy */ private DataChannel connect(InetSocketAddress addr, DataChannelFactory dcFactory) throws IOException, ServiceConnectFailedException { int triesLeft = cfg.getMaxConnectionAttempts(); DataChannel ret = null; while (true) { try { ret = dcFactory.connect(addr, null, new ConnectOptions(). setBlocking(true). setTcpNoDelay(true). setOpenTimeout(SOCKET_TIMEOUT_MS). setReadTimeout(SOCKET_TIMEOUT_MS)); ServiceDispatcher.doServiceHandshake(ret, LDiffService.NAME); break; } catch (ServiceConnectFailedException scfe) { if ((ret != null) && ret.isOpen()) { ret.close(); } /* * Unable to connect because the remote service is busy. If * the user requested it, keep re-trying. triesLeft == -1 * means never abort. */ if (triesLeft > 0) { triesLeft--; } if (!cfg.getWaitIfBusy() || triesLeft == 0) { throw scfe; } } } return ret; } private void output(String msg) { if (cfg.getVerbose()) { System.out.println(msg); } } /** * The exception that is thrown when a database diff detects differences. * * TODO: we start simple, by just using it as a boolean indicator and * perhaps a block id for unit test purposes? As as the local processing * gains in sophistication will provide block and key (insert, update, * delete) granularity identification of differences. */ @SuppressWarnings("serial") class MismatchException extends Exception { public MismatchException(String message) { super(message); } } private class LDiffIterator implements Iterator { private Block cached; private Cursor cursor; private final Database db; private DatabaseEntry lastKey, lastData; private boolean more; private int i; private final int numKeys; public LDiffIterator(Database db) { i = 0; numKeys = LDiff.this.cfg.getBlockSize(); cached = null; more = true; this.db = db; /* Prime the pump, get the first block in cached and set more. */ next(); } @Override public boolean hasNext() { return more; } @Override public void remove() { } @Override public Block next() { if (!more) { throw new NoSuchElementException(); } /* * We don't want to return a block with 0 keys, but we can't know * ahead of time whether the block will have any keys and the user * likely called hasNext() already. So when asked for block i, we * cache block i+1, check whether it's empty and return the * previously cached block i. If block i+1 is empty, the next call * to hasNext() will return false and the empty block won't be * returned. */ cursor = db.openCursor(null, null); if (lastKey == null) { lastKey = new DatabaseEntry(); lastData = new DatabaseEntry(); } else { cursor.getSearchBoth(lastKey, lastData, null); } Block ret = cached; cached = LDiffUtil.readBlock(i++, cursor, numKeys); if (cached.numRecords == 0) { more = false; } else { cursor.getCurrent(lastKey, lastData, null); } cursor.close(); return ret; } @Override protected void finalize() throws Throwable { try { cursor.close(); } finally { super.finalize(); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy