All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mongodb.hadoop.output.MongoRecordWriter Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2011 10gen Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.mongodb.hadoop.output;

import com.mongodb.*;
import com.mongodb.hadoop.*;
import com.mongodb.hadoop.io.BSONWritable;
import com.mongodb.hadoop.io.MongoUpdateWritable;
import org.apache.hadoop.mapreduce.*;
import org.bson.*;

import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.logging.*;


public class MongoRecordWriter extends RecordWriter {

    private static final Log log = LogFactory.getLog( MongoRecordWriter.class );
    private final String[] updateKeys;
    private final boolean multiUpdate;
    private int roundRobinCounter = 0;

    public MongoRecordWriter( DBCollection c, TaskAttemptContext ctx ){
        this(c, ctx, null);
    }

    public MongoRecordWriter( DBCollection c, TaskAttemptContext ctx, String[] updateKeys) {
        this(c, ctx, updateKeys, false);
    }

    public MongoRecordWriter( DBCollection c, TaskAttemptContext ctx, String[] updateKeys, boolean multi) {
        this(Arrays.asList(c), ctx, updateKeys, multi);
    }

    public MongoRecordWriter( List c, TaskAttemptContext ctx ){
        this(c, ctx, null);
    }

    public MongoRecordWriter( List c, TaskAttemptContext ctx, String[] updateKeys) {
        this(c, ctx, updateKeys, false);
    }

    public MongoRecordWriter( List c, TaskAttemptContext ctx, String[] updateKeys, boolean multi) {
        _collections = new ArrayList(c);
        _context = ctx;
        this.updateKeys = updateKeys;
        this.multiUpdate = false;
        this.numberOfHosts = c.size();

        //authenticate if necessary - but don't auth twice on same DB
        MongoConfig config = new MongoConfig(ctx.getConfiguration());
        if(config.getAuthURI() != null){
            MongoURI authURI = config.getAuthURI();
            if(authURI.getUsername() != null &&
                authURI.getPassword() != null ){
                // need to verify that it is not already one of the collections we are writing to.
                DBCollection _collection = _collections.get(0);
                DB targetAuthDB = _collection.getDB().getSisterDB(authURI.getDatabase());
                if(!(targetAuthDB.getName().equals(_collection.getDB().getName()))){
                    targetAuthDB.authenticate(authURI.getUsername(), authURI.getPassword());
                }
            }
        }
    }

    public void close( TaskAttemptContext context ){
        for (DBCollection collection : _collections) {
            collection.getDB().getLastError();
        }
    }

    public void write( K key, V value ) throws IOException{
        final DBObject o = new BasicDBObject();

        if( value instanceof MongoUpdateWritable ){
            //ignore the key - just use the update directly.
            MongoUpdateWritable muw = (MongoUpdateWritable)value;
            try{
                DBCollection dbCollection = getDbCollectionByRoundRobin();
                dbCollection.update(new BasicDBObject(muw.getQuery()),
                                    new BasicDBObject(muw.getModifiers()),
                                    muw.isUpsert(),
                                    muw.isMultiUpdate());
                return;
            } catch ( final MongoException e ) {
                e.printStackTrace();
                throw new IOException( "can't write to mongo", e );
            }
        }

        if ( key instanceof BSONWritable ){
            o.put("_id", ((BSONWritable)key).getDoc());
        } else if ( key instanceof BSONObject ){
            o.put( "_id", key );
        } else {
            o.put( "_id", BSONWritable.toBSON(key) );
        }

        if (value instanceof BSONWritable ){
            o.putAll( ((BSONWritable)value).getDoc() );
        } else if ( value instanceof MongoOutput ){
            ( (MongoOutput) value ).appendAsValue( o );
        } else if ( value instanceof BSONObject ){
            o.putAll( (BSONObject) value );
        } else{
            o.put( "value", BSONWritable.toBSON( value ) );
        }

        try {
            DBCollection dbCollection = getDbCollectionByRoundRobin();

            if (updateKeys == null) {
                dbCollection.save(o);
            } else {
                // Form the query fields
                DBObject query = new BasicDBObject(updateKeys.length);
                for (String updateKey : updateKeys) {
                    query.put(updateKey, o.get(updateKey));
                    o.removeField(updateKey);
                }
                // If _id is null remove it, we don't want to override with null _id
                if (o.get("_id") == null) {
                    o.removeField("_id");
                }
                DBObject set = new BasicDBObject().append("$set", o);
                dbCollection.update(query, set, true, multiUpdate);
            }
        }
        catch ( final MongoException e ) {
            e.printStackTrace();
            throw new IOException( "can't write to mongo", e );
        }
    }

    private synchronized DBCollection getDbCollectionByRoundRobin() {
        int hostIndex = (roundRobinCounter++ & 0x7FFFFFFF) % numberOfHosts;
        return _collections.get(hostIndex);
    }

    public void ensureIndex(DBObject index, DBObject options) {
        // just do it on one mongodS
        DBCollection dbCollection = _collections.get(0);
        dbCollection.ensureIndex(index, options);
    }

    public TaskAttemptContext getContext(){
        return _context;
    }

    final List _collections;
    final int numberOfHosts;
    final TaskAttemptContext _context;
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy