All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.wordnik.system.mongodb.Analyzer Maven / Gradle / Ivy

The newest version!
// Copyright (C) 2012  Wordnik, Inc.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or (at your 
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty 
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser 
// General Public License for more details.  You should have received a copy 
// of the GNU Lesser General Public License along with this program.  If not,
// see .

package com.wordnik.system.mongodb;

import java.io.BufferedInputStream;

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.zip.GZIPInputStream;

import org.bson.BSONDecoder;
import com.mongodb.DefaultDBDecoder;
import org.bson.BSONObject;
import org.bson.BasicBSONObject;
import org.bson.types.BSONTimestamp;

import com.mongodb.BasicDBObject;
import com.wordnik.util.PrintFormat;

public class Analyzer extends MongoUtil {
  protected static String INPUT_DIR;
  protected static String COLLECTION_STRING;
  protected static String COLLECTION_MAPPING_STRING;
  protected static String DATABASE_MAPPING_STRING;
  protected static Map COLLECTION_MAPPING = new HashMap();
  protected static Map DATABASE_MAPPING = new HashMap();
  protected static Set COLLECTIONS_TO_SKIP = new HashSet();
  protected static Set COLLECTIONS_TO_ADD = new HashSet();
  protected static BSONTimestamp AFTER_TIMESTAMP = null;
  protected static BSONTimestamp BEFORE_TIMESTAMP = null;
  protected static boolean ONLY_COLLECTION_EXCLUSIONS = true;
  protected static Map NAMESPACE_COLLECTION_MAP = new HashMap();
  
  protected static String DEST_DATABASE_NAME = "test";
  protected static String DEST_DATABASE_USER_NAME = null;
  protected static String DEST_DATABASE_PASSWORD = null;
  protected static String DEST_DATABASE_HOST = "localhost";

  protected static long REPORT_INTERVAL = 10000;

  public static void main(String ... args){
    if(!parseArgs(args)){
      usage();
      return;
    }
    if(INPUT_DIR == null){
      usage();
      return;
    }
    new Analyzer().run();
  }
  
  protected static void selectCollections(){
    if(COLLECTION_STRING != null){
      String[] collectionNames = COLLECTION_STRING.split(",");
      for(String collectionName : collectionNames){
        if(collectionName.startsWith("!")){
          //  skip it
          COLLECTIONS_TO_SKIP.add(collectionName.substring(1));
        }
        else{
          ONLY_COLLECTION_EXCLUSIONS = false;
          COLLECTIONS_TO_ADD.add(collectionName);
        }
      }
    }
  }

  protected static void createMappings(String databaseMappingString, String collectionMappingString, Map databaseMappings, Map collectionMappings){
    if(databaseMappingString != null){
      StringTokenizer tk = new StringTokenizer(databaseMappingString, ",");
      while(tk.hasMoreElements()){
        String[] split = tk.nextToken().split("\\=");
        databaseMappings.put(split[0], split[1]);
      }
    }

    if(collectionMappingString != null){
      StringTokenizer tk = new StringTokenizer(collectionMappingString, ",");
      while(tk.hasMoreElements()){
        String[] split = tk.nextToken().split("\\=");
        collectionMappings.put(split[0], split[1]);
      }
    } 
  }

  protected void run(){
    long startTime = System.currentTimeMillis();
    //  decide what collections to process
    selectCollections();

    //  create any re-mappings
    Map collectionMappings = new HashMap();
    Map databaseMappings = new HashMap();
    createMappings(DATABASE_MAPPING_STRING, COLLECTION_MAPPING_STRING, databaseMappings, collectionMappings);

    try{
      File[] files = new File(INPUT_DIR).listFiles();
      if(files != null){
        List filesToProcess = new ArrayList();
        for(File file : files){
          if(file.getName().indexOf(".bson") > 0){
            filesToProcess.add(file);
          }
        }
        long operationsRead = 0;
        long operationsSkipped = 0;
        long lastOutput = System.currentTimeMillis();
        for(File file : filesToProcess){
          System.out.println("analyzing file " + file.getName());
          BufferedInputStream inputStream = null;
          try{
            if(file.getName().endsWith(".gz")){
              InputStream is = new GZIPInputStream(new FileInputStream(file));
              inputStream = new BufferedInputStream(is);
            }
            else{
              inputStream = new BufferedInputStream(new FileInputStream(file));
            }
            BSONDecoder decoder = new DefaultDBDecoder();
            while(true){
              if(inputStream.available() == 0){
                break;
              }
              BSONObject obj = decoder.readObject(inputStream);
              if(obj == null){
                break;
              }
              BasicDBObject dbo = new BasicDBObject((BasicBSONObject)obj);

              BSONTimestamp operationTimestamp = (BSONTimestamp)dbo.get("ts");
              String namespace = dbo.getString("ns");

              processRecord(dbo);
              operationsRead++;

              long durationSinceLastOutput = System.currentTimeMillis() - lastOutput;
              if(durationSinceLastOutput > REPORT_INTERVAL){
                report(operationsRead, System.currentTimeMillis() - startTime);
                lastOutput = System.currentTimeMillis();
              }
            }
          }
          catch(Exception ex){
            ex.printStackTrace();
          }
        }
      }
    }
    catch(Exception e){
      e.printStackTrace();
    }
    report(0, System.currentTimeMillis() - startTime);
  }

  Map counters = new HashMap();
  protected void processRecord(BasicDBObject dbo) {
    String operationType = dbo.getString("op");
    String namespace = dbo.getString("ns");

    String key = operationType + " - " + namespace;
    Long count = new Long(1);
    if(counters.containsKey(key)){
      count = new Long(counters.get(key).longValue() + 1);
    }
    counters.put(key, count);
  }

  protected boolean shouldProcessRecord(String collection, BSONTimestamp timestamp) {
    boolean shouldProcess = false;

    if(COLLECTIONS_TO_ADD.contains(collection)){
      shouldProcess = true;
    }
    if(COLLECTIONS_TO_SKIP.contains(collection)){
      shouldProcess = false;
    }
    else{
      if(ONLY_COLLECTION_EXCLUSIONS){
        shouldProcess = true;
      }
    }
    if(AFTER_TIMESTAMP != null){
      if(timestamp.getTime() < AFTER_TIMESTAMP.getTime()){
        shouldProcess = false;
      }
    }
    if(BEFORE_TIMESTAMP != null){
      if(timestamp.getTime() >= BEFORE_TIMESTAMP.getTime()){
        shouldProcess = false;
      }
    }
    return shouldProcess;
  }

  public static boolean parseArgs(String...args){
    for (int i = 0; i < args.length; i++) {
      switch (args[i].charAt(1)) {
      case 'i':
        INPUT_DIR = args[++i];
        break;
      case 'c':
        COLLECTION_STRING = args[++i];
        break;
      case 'R':
        DATABASE_MAPPING_STRING = args[++i];
        break;
      case 'r':
        COLLECTION_MAPPING_STRING = args[++i];
        break;
      case 'a':
        try{
          SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
          Date date = sdf.parse(args[++i]);
          AFTER_TIMESTAMP = new BSONTimestamp((int)(date.getTime()/1000), 0);
        }
        catch(Exception e){
          throw new RuntimeException("invalid date supplied");
        }
        break;
      case 'b':
        try{
          SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
          Date date = sdf.parse(args[++i]);
          BEFORE_TIMESTAMP = new BSONTimestamp((int)(date.getTime()/1000), 0);
        }
        catch(Exception e){
          throw new RuntimeException("invalid date supplied");
        }
        break;
      case 'u':
        DEST_DATABASE_USER_NAME = args[++i];
        break;
      case 'p':
        DEST_DATABASE_PASSWORD = args[++i];
        break;
      case 'h':
        DEST_DATABASE_HOST = args[++i];
        break;
      default:
        return false;
      }
    }
    return true;
  }

  void report(long totalCount, long duration){
    double brate = (double)totalCount / ((duration) / 1000.0);
    // System.out.println("inserts: " + PrintFormat.LONG_FORMAT.format(inserts) + ", updates: " + PrintFormat.LONG_FORMAT.format(updates) + ", deletes: " + PrintFormat.LONG_FORMAT.format(deletes) + ", skips: " + PrintFormat.LONG_FORMAT.format(skips) + " (" + PrintFormat.LONG_FORMAT.format(brate) + " req/sec)");

    Writer writer = null;
    try{
      OutputStream out = new FileOutputStream(new File("stats.txt"));
      writer = new OutputStreamWriter(out, "UTF-8");
      for(String key : counters.keySet()) {
        writer.write(key + "|" + counters.get(key).toString() + "\n");
      }
    }
    catch(Exception e){
      e.printStackTrace();
    }
    finally{
      if(writer != null){
        try{writer.close();}
        catch(Exception e){}
      }
    }
  }

  public static void usage(){
    System.out.println("usage: ReplayUtil");
    System.out.println(" -i : input directory");
    System.out.println(" -c : CSV collection string (prefix with ! to exclude)");
    System.out.println(" -r : collection re-targeting (format: {SOURCE}={TARGET}");
    System.out.println(" -R : database re-targeting (format: {SOURCE}={TARGET}");
    System.out.println(" -a : only process entries after this timestamp");
    System.out.println(" -b : only process entries before this timestamp");
    System.out.println(" -h : destination hostname");
    System.out.println(" [-u : username]");
    System.out.println(" [-p : password]");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy