com.datastax.data.prepare.spark.dataset.database.MongodbHandler Maven / Gradle / Ivy
The newest version!
package com.datastax.data.prepare.spark.dataset.database;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.core.driver.SparkContextBuilder;
import com.datastax.insight.spec.Operator;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import java.util.HashMap;
public class MongodbHandler implements Operator {
@InsightComponent( name = "loadMongodb", description = "loadMongodb")
public static Dataset load(
@InsightComponentArg(name = "host", description = "host", request = true) String host,
@InsightComponentArg(name = "database", description = "database", request = true) String database,
@InsightComponentArg(name = "table", description = "table", request = true) String table) {
String inputUri = "mongodb://" + host + "/" + database + "." + table;
Dataset ds = SparkContextBuilder.getSession().read()
.format("com.mongodb.spark.sql")
.options(new HashMap(){
{
put("spark.mongodb.input.uri", inputUri);
put("spark.mongodb.input.partitionerOptions.partitionKey", "_id");
put("spark.mongodb.input.partitionerOptions.partitionSizeMB","32");
}
}).load();
return ds;
}
@InsightComponent( name = "saveMongodb", description = "saveMongodb")
public static void save(
@InsightComponentArg(externalInput = true, name = "数据集", description = "数据集",request = true,defaultValue = "${output}") Dataset dataset,
@InsightComponentArg(name = "host", description = "host", request = true) String host,
@InsightComponentArg(name = "database", description = "database", request = true) String database,
@InsightComponentArg(name = "table", description = "table", request = true) String table,
@InsightComponentArg(name = "saveMode", description = "saveMode", request = true, defaultValue = "ignore", items = "append;overwrite;errorIfExists;ignore") String saveMode) {
String outputUri = "mongodb://" + host + "/" + database + "." + table;
dataset.write()
.format("com.mongodb.spark.sql")
.options(new HashMap(){
{
put("spark.mongodb.output.uri", outputUri);
}
}).mode(getSaveMode(saveMode))
.save();
}
private static SaveMode getSaveMode(String saveMode) {
switch (saveMode) {
case "append":
return SaveMode.Append;
case "overwrite":
return SaveMode.Overwrite;
case "errorIfExists":
return SaveMode.ErrorIfExists;
case "ignore":
return SaveMode.Ignore;
default:
return SaveMode.Ignore;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy