All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.microsoft.azure.documentdb.hadoop.DocumentDBOutputFormat Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation.  All rights reserved.
//------------------------------------------------------------
package com.microsoft.azure.documentdb.hadoop;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

/**
 * An output format that is used to write data to documentdb.
 */
public class DocumentDBOutputFormat extends OutputFormat {
    
    /**
     * Validates the required properties needed to write to documentdb.
     */
    @Override
    public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();
        final String endpoint = ConfigurationUtil.getDBEndpoint(conf);
        final String key = ConfigurationUtil.getDBKey(conf);
        final String dbName = ConfigurationUtil.getDBName(conf);
        final String[] collectionNames = ConfigurationUtil.getOutputCollectionNames(conf);

        if (endpoint == null)
            throw new IOException("DB_HOST must be set for the jobconf");
        if (key == null)
            throw new IOException("DB_KEY must be set for the jobconf");
        if (dbName == null)
            throw new IOException("DB_NAME must be set for the jobconf");
        if (collectionNames == null || collectionNames.length == 0)
            throw new IOException("OUTPUT_COLLECTION_NAMES must be set for the jobconf as comma separated names");
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
        return new DocumentDBOutputCommitter();
    }

    /**
     * Creates an instance of DocumentDBRecordWriter.
     */
    @Override
    public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException,
            InterruptedException {
        Configuration conf = context.getConfiguration();
        return new DocumentDBRecordWriter(conf, ConfigurationUtil.getDBEndpoint(conf),
                ConfigurationUtil.getDBKey(conf), ConfigurationUtil.getDBName(conf),
                ConfigurationUtil.getOutputCollectionNames(conf), ConfigurationUtil.getRangeIndex(conf),
                ConfigurationUtil.getUpsert(conf));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy