All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.data.prepare.spark.dataset.database.MongodbHandler Maven / Gradle / Ivy

The newest version!
package com.datastax.data.prepare.spark.dataset.database;

import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;

import java.util.HashMap;

public class MongodbHandler implements Operator {

    @InsightComponent( name = "loadMongodb", description = "loadMongodb")
    public static Dataset load(
            @InsightComponentArg(name = "host", description = "host", request = true) String host,
            @InsightComponentArg(name = "database", description = "database", request = true) String database,
            @InsightComponentArg(name = "table", description = "table", request = true) String table) {
        String inputUri = "mongodb://" + host + "/" + database + "." + table;
        Dataset ds = SparkContextBuilder.getSession().read()
                .format("com.mongodb.spark.sql")
                .options(new HashMap(){
                    {
                        put("spark.mongodb.input.uri", inputUri);
                        put("spark.mongodb.input.partitionerOptions.partitionKey", "_id");
                        put("spark.mongodb.input.partitionerOptions.partitionSizeMB","32");
                    }
                }).load();
        return ds;
    }
    @InsightComponent( name = "saveMongodb", description = "saveMongodb")
    public static void save(
            @InsightComponentArg(externalInput = true, name = "数据集", description = "数据集",request = true,defaultValue = "${output}") Dataset dataset,
            @InsightComponentArg(name = "host", description = "host", request = true) String host,
            @InsightComponentArg(name = "database", description = "database", request = true) String database,
            @InsightComponentArg(name = "table", description = "table", request = true) String table,
            @InsightComponentArg(name = "saveMode", description = "saveMode", request = true, defaultValue = "ignore", items = "append;overwrite;errorIfExists;ignore") String saveMode) {

        String outputUri = "mongodb://" + host + "/" + database + "." + table;
        dataset.write()
                .format("com.mongodb.spark.sql")
                .options(new HashMap(){
                    {
                        put("spark.mongodb.output.uri", outputUri);
                    }
                }).mode(getSaveMode(saveMode))
                .save();
    }

    private static SaveMode getSaveMode(String saveMode) {
        switch (saveMode) {
            case "append":
                return SaveMode.Append;
            case "overwrite":
                return SaveMode.Overwrite;
            case "errorIfExists":
                return SaveMode.ErrorIfExists;
            case "ignore":
                return SaveMode.Ignore;
            default:
                return SaveMode.Ignore;
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy