All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.wordnik.system.mongodb.ReplayUtil Maven / Gradle / Ivy

The newest version!
// Copyright (C) 2012  Wordnik, Inc.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or (at your 
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser 
// General Public License for more details.  You should have received a copy 
// of the GNU Lesser General Public License along with this program.  If not,
// see .

package com.wordnik.system.mongodb;

import java.io.BufferedInputStream;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.zip.GZIPInputStream;

import org.bson.BSONDecoder;
import com.mongodb.DefaultDBDecoder;
import org.bson.BSONObject;
import org.bson.BasicBSONObject;
import org.bson.types.BSONTimestamp;

import com.mongodb.BasicDBObject;
import com.wordnik.util.PrintFormat;

public class ReplayUtil extends MongoUtil {
	protected static String INPUT_DIR;
	protected static String COLLECTION_STRING;
	protected static String COLLECTION_MAPPING_STRING;
	protected static String DATABASE_MAPPING_STRING;
	protected static Map COLLECTION_MAPPING = new HashMap();
	protected static Map DATABASE_MAPPING = new HashMap();
	protected static Set COLLECTIONS_TO_SKIP = new HashSet();
	protected static Set COLLECTIONS_TO_ADD = new HashSet();
	protected static BSONTimestamp AFTER_TIMESTAMP = null;
	protected static BSONTimestamp BEFORE_TIMESTAMP = null;
	protected static boolean ONLY_COLLECTION_EXCLUSIONS = true;
	protected static Map NAMESPACE_COLLECTION_MAP = new HashMap();
	
	protected static String DEST_DATABASE_NAME = "test";
	protected static String DEST_DATABASE_USER_NAME = null;
	protected static String DEST_DATABASE_PASSWORD = null;
	protected static String DEST_DATABASE_HOST = "localhost";

	protected static long REPORT_INTERVAL = 10000;

	public static void main(String ... args){
		if(!parseArgs(args)){
			usage();
			return;
		}
		if(INPUT_DIR == null){
			usage();
			return;
		}
		new ReplayUtil().run();
	}
	
	protected static void selectCollections(){
		if(COLLECTION_STRING != null){
			String[] collectionNames = COLLECTION_STRING.split(",");
			for(String collectionName : collectionNames){
				if(collectionName.startsWith("!")){
					//	skip it
					COLLECTIONS_TO_SKIP.add(collectionName.substring(1));
				}
				else{
					ONLY_COLLECTION_EXCLUSIONS = false;
					COLLECTIONS_TO_ADD.add(collectionName);
				}
			}
		}
	}

	protected static void createMappings(String databaseMappingString, String collectionMappingString, Map databaseMappings, Map collectionMappings){
		if(databaseMappingString != null){
			StringTokenizer tk = new StringTokenizer(databaseMappingString, ",");
			while(tk.hasMoreElements()){
				String[] split = tk.nextToken().split("\\=");
				databaseMappings.put(split[0], split[1]);
			}
		}

		if(collectionMappingString != null){
			StringTokenizer tk = new StringTokenizer(collectionMappingString, ",");
			while(tk.hasMoreElements()){
				String[] split = tk.nextToken().split("\\=");
				collectionMappings.put(split[0], split[1]);
			}
		}	
	}

	protected void run(){
		long startTime = System.currentTimeMillis();
		//	decide what collections to process
		selectCollections();

		OplogReplayWriter util = new OplogReplayWriter();

		//	create any re-mappings
		Map collectionMappings = new HashMap();
		Map databaseMappings = new HashMap();
		createMappings(DATABASE_MAPPING_STRING, COLLECTION_MAPPING_STRING, databaseMappings, collectionMappings);

		//	configure the writer
		util.setCollectionMappings(collectionMappings);
		util.setDatabaseMappings(databaseMappings);
		util.setDestinationDatabaseUsername(DEST_DATABASE_USER_NAME);
		util.setDestinationDatabasePassword(DEST_DATABASE_PASSWORD);
		util.setDestinationDatabaseHost(DEST_DATABASE_HOST);

		try{
			File[] files = new File(INPUT_DIR).listFiles();
			if(files != null){
				List filesToProcess = new ArrayList();
				for(File file : files){
					if(file.getName().indexOf(".bson") > 0){
						filesToProcess.add(file);
					}
				}
				long operationsRead = 0;
				long operationsSkipped = 0;
				long lastOutput = System.currentTimeMillis();
				for(File file : filesToProcess){
					System.out.println("replaying file " + file.getName());
					BufferedInputStream inputStream = null;
					try{
						if(file.getName().endsWith(".gz")){
							InputStream is = new GZIPInputStream(new FileInputStream(file));
							inputStream = new BufferedInputStream(is);
						}
						else{
							inputStream = new BufferedInputStream(new FileInputStream(file));
						}
						BSONDecoder decoder = new DefaultDBDecoder();
						while(true){
							if(inputStream.available() == 0){
								break;
							}
							BSONObject obj = decoder.readObject(inputStream);
							if(obj == null){
								break;
							}
							BasicDBObject dbo = new BasicDBObject((BasicBSONObject)obj);

							BSONTimestamp operationTimestamp = (BSONTimestamp)dbo.get("ts");
							String namespace = dbo.getString("ns");
							String collection = util.getUnmappedCollectionFromNamespace(namespace);

							boolean shouldProcess = shouldProcessRecord(collection, operationTimestamp);

							if(collection != null && shouldProcess){
								util.processRecord(dbo);
								operationsRead++;
							}
							else{
								operationsSkipped++;
							}

							long durationSinceLastOutput = System.currentTimeMillis() - lastOutput;
							if(durationSinceLastOutput > REPORT_INTERVAL){
								report(util.getInsertCount(), util.getUpdateCount(), util.getDeleteCount(), operationsRead, operationsSkipped, System.currentTimeMillis() - startTime);
								lastOutput = System.currentTimeMillis();
							}
						}
					}
					catch(Exception ex){
						ex.printStackTrace();
					}
				}
			}
		}
		catch(Exception e){
			e.printStackTrace();
		}
	}

	protected boolean shouldProcessRecord(String collection, BSONTimestamp timestamp) {
		boolean shouldProcess = false;

		if(COLLECTIONS_TO_ADD.contains(collection)){
			shouldProcess = true;
		}
		if(COLLECTIONS_TO_SKIP.contains(collection)){
			shouldProcess = false;
		}
		else{
			if(ONLY_COLLECTION_EXCLUSIONS){
				shouldProcess = true;
			}
		}
		if(AFTER_TIMESTAMP != null){
			if(timestamp.getTime() < AFTER_TIMESTAMP.getTime()){
				shouldProcess = false;
			}
		}
		if(BEFORE_TIMESTAMP != null){
			if(timestamp.getTime() >= BEFORE_TIMESTAMP.getTime()){
				shouldProcess = false;
			}
		}
		return shouldProcess;
	}

	public static boolean parseArgs(String...args){
		for (int i = 0; i < args.length; i++) {
			switch (args[i].charAt(1)) {
			case 'i':
				INPUT_DIR = args[++i];
				break;
			case 'c':
				COLLECTION_STRING = args[++i];
				break;
			case 'R':
				DATABASE_MAPPING_STRING = args[++i];
				break;
			case 'r':
				COLLECTION_MAPPING_STRING = args[++i];
				break;
			case 'a':
				try{
					SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
					Date date = sdf.parse(args[++i]);
					AFTER_TIMESTAMP = new BSONTimestamp((int)(date.getTime()/1000), 0);
				}
				catch(Exception e){
					throw new RuntimeException("invalid date supplied");
				}
				break;
			case 'b':
				try{
					SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
					Date date = sdf.parse(args[++i]);
					BEFORE_TIMESTAMP = new BSONTimestamp((int)(date.getTime()/1000), 0);
				}
				catch(Exception e){
					throw new RuntimeException("invalid date supplied");
				}
				break;
			case 'u':
				DEST_DATABASE_USER_NAME = args[++i];
				break;
			case 'p':
				DEST_DATABASE_PASSWORD = args[++i];
				break;
			case 'h':
				DEST_DATABASE_HOST = args[++i];
				break;
			default:
				return false;
			}
		}
		return true;
	}

	void report(long inserts, long updates, long deletes, long totalCount, long skips, long duration){
		double brate = (double)totalCount / ((duration) / 1000.0);
		System.out.println("inserts: " + PrintFormat.LONG_FORMAT.format(inserts) + ", updates: " + PrintFormat.LONG_FORMAT.format(updates) + ", deletes: " + PrintFormat.LONG_FORMAT.format(deletes) + ", skips: " + PrintFormat.LONG_FORMAT.format(skips) + " (" + PrintFormat.LONG_FORMAT.format(brate) + " req/sec)");
	}

	public static void usage(){
		System.out.println("usage: ReplayUtil");
		System.out.println(" -i : input directory");
		System.out.println(" -c : CSV collection string (prefix with ! to exclude)");
		System.out.println(" -r : collection re-targeting (format: {SOURCE}={TARGET}");
		System.out.println(" -R : database re-targeting (format: {SOURCE}={TARGET}");
		System.out.println(" -a : only process entries after this timestamp");
		System.out.println(" -b : only process entries before this timestamp");
		System.out.println(" -h : destination hostname");
		System.out.println(" [-u : username]");
		System.out.println(" [-p : password]");
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy