![JAR search and dependency download from the Maven repository](/logo.png)
com.mongodb.hadoop.util.MapredMongoConfigUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mongo-hadoop-core Show documentation
Show all versions of mongo-hadoop-core Show documentation
The MongoDB Connector for Hadoop is a plugin for Hadoop that provides the ability to use MongoDB as an input source and/or an output destination.
The newest version!
/*
* Copyright 2010-2013 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.hadoop.util;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.MongoURI;
import com.mongodb.hadoop.splitter.MongoSplitter;
import com.mongodb.util.JSON;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* Configuration helper tool for MongoDB related Map/Reduce jobs
*/
public final class MapredMongoConfigUtil {
private static final Log LOG = LogFactory.getLog(MapredMongoConfigUtil.class);
/**
* The JOB_* values are entirely optional and disregarded unless you use the MongoTool base toolset... If you don't, feel free to ignore
* these
*/
public static final String JOB_VERBOSE = "mongo.job.verbose";
public static final String JOB_BACKGROUND = "mongo.job.background";
public static final String JOB_MAPPER = "mongo.job.mapper";
public static final String JOB_COMBINER = "mongo.job.combiner";
public static final String JOB_PARTITIONER = "mongo.job.partitioner";
public static final String JOB_REDUCER = "mongo.job.reducer";
public static final String JOB_SORT_COMPARATOR = "mongo.job.sort_comparator";
public static final String JOB_MAPPER_OUTPUT_KEY = "mongo.job.mapper.output.key";
public static final String JOB_MAPPER_OUTPUT_VALUE = "mongo.job.mapper.output.value";
public static final String JOB_INPUT_FORMAT = "mongo.job.input.format";
public static final String JOB_OUTPUT_FORMAT = "mongo.job.output.format";
public static final String JOB_OUTPUT_KEY = "mongo.job.output.key";
public static final String JOB_OUTPUT_VALUE = "mongo.job.output.value";
public static final String INPUT_URI = "mongo.input.uri";
public static final String INPUT_MONGOS_HOSTS = "mongo.input.mongos_hosts";
public static final String OUTPUT_URI = "mongo.output.uri";
public static final String MONGO_SPLITTER_CLASS = "mongo.splitter.class";
/**
* The MongoDB field to read from for the Mapper Input.
*
* This will be fed to your mapper as the "Key" for the input.
*
* Defaults to {@code _id}
*/
public static final String INPUT_KEY = "mongo.input.key";
public static final String INPUT_NOTIMEOUT = "mongo.input.notimeout";
public static final String INPUT_QUERY = "mongo.input.query";
public static final String INPUT_FIELDS = "mongo.input.fields";
public static final String INPUT_SORT = "mongo.input.sort";
public static final String INPUT_LIMIT = "mongo.input.limit";
public static final String INPUT_SKIP = "mongo.input.skip";
public static final String INPUT_LAZY_BSON = "mongo.input.lazy_bson";
//Settings specific to bson reading/writing.
public static final String BSON_READ_SPLITS = "bson.split.read_splits";
public static final String BSON_WRITE_SPLITS = "bson.split.write_splits";
public static final String BSON_OUTPUT_BUILDSPLITS = "bson.output.build_splits";
public static final String BSON_PATHFILTER = "bson.pathfilter.class";
/**
* A username and password to use.
*
* This is necessary when running jobs with a sharded cluster, as access to the config database is needed to get
*/
public static final String AUTH_URI = "mongo.auth.uri";
/**
* When *not* using 'read_from_shards' or 'read_shard_chunks' The number of megabytes per Split to create for the input data.
*
* Currently defaults to 8MB, tweak it as necessary for your code.
*
* This default will likely change as we research better options.
*/
public static final String INPUT_SPLIT_SIZE = "mongo.input.split_size";
public static final int DEFAULT_SPLIT_SIZE = 8; // 8 mb per manual (non-sharding) split
/**
* If CREATE_INPUT_SPLITS is true but SPLITS_USE_CHUNKS is false, Mongo-Hadoop will attempt to create custom input splits for you. By
* default it will split on {@code _id}, which is a reasonable/sane default.
*
* If you want to customize that split point for efficiency reasons (such as different distribution) you may set this to any valid field
* name. The restriction on this key name are the *exact same rules* as when sharding an existing MongoDB Collection. You must have an
* index on the field, and follow the other rules outlined in the docs.
*
* This must be a JSON document, and not just a field name!
*
* @link http://www.mongodb.org/display/DOCS/Sharding+Introduction#ShardingIntroduction-ShardKeys
*/
public static final String INPUT_SPLIT_KEY_PATTERN = "mongo.input.split.split_key_pattern";
/**
* If {@code true}, the driver will attempt to split the MongoDB Input data (if reading from Mongo) into multiple InputSplits to allow
* parallelism/concurrency in processing within Hadoop. That is to say, Hadoop will assign one InputSplit per mapper.
*
* This is {@code true} by default now, but if {@code false}, only one InputSplit (your whole collection) will be assigned to Hadoop –
* severely reducing parallel mapping.
*/
public static final String CREATE_INPUT_SPLITS = "mongo.input.split.create_input_splits";
/**
* If {@code true} in a sharded setup splits will be made to connect to individual backend {@code mongod}s. This can be unsafe. If
* {@code mongos} is moving chunks around you might see duplicate data, or miss some data entirely. Defaults to {@code false}
*/
public static final String SPLITS_USE_SHARDS = "mongo.input.split.read_from_shards";
/**
* If {@code true} have one split = one shard chunk. If {@link #SPLITS_USE_SHARDS} is not true splits will still use chunks, but will
* connect through {@code mongos} instead of the individual backend {@code mongod}s (the safe thing to do). If {@link
* #SPLITS_USE_SHARDS} is {@code true} but this is {@code false} one split will be made for each backend shard. THIS IS UNSAFE and may
* result in data being run multiple times Defaults to {@code true }
*/
public static final String SPLITS_USE_CHUNKS = "mongo.input.split.read_shard_chunks";
/**
* If true then shards are replica sets run queries on slaves. If set this will override any option passed on the URI.
*
* Defaults to {@code false}
*/
public static final String SPLITS_SLAVE_OK = "mongo.input.split.allow_read_from_secondaries";
/**
* If true then queries for splits will be constructed using $lt/$gt instead of $min and $max.
*
* Defaults to {@code false}
*/
public static final String SPLITS_USE_RANGEQUERY = "mongo.input.split.use_range_queries";
private MapredMongoConfigUtil() {
}
public static boolean isJobVerbose(final Configuration conf) {
return conf.getBoolean(JOB_VERBOSE, false);
}
public static void setJobVerbose(final Configuration conf, final boolean val) {
conf.setBoolean(JOB_VERBOSE, val);
}
public static boolean isJobBackground(final Configuration conf) {
return conf.getBoolean(JOB_BACKGROUND, false);
}
public static void setJobBackground(final Configuration conf, final boolean val) {
conf.setBoolean(JOB_BACKGROUND, val);
}
// TODO - In light of key/value specifics should we have a base MongoMapper
// class?
public static Class extends Mapper> getMapper(final Configuration conf) {
/** TODO - Support multiple inputs via getClasses ? **/
return conf.getClass(JOB_MAPPER, null, Mapper.class);
}
public static void setMapper(final Configuration conf, final Class extends Mapper> val) {
conf.setClass(JOB_MAPPER, val, Mapper.class);
}
public static Class> getMapperOutputKey(final Configuration conf) {
return conf.getClass(JOB_MAPPER_OUTPUT_KEY, null);
}
public static void setMapperOutputKey(final Configuration conf, final Class> val) {
conf.setClass(JOB_MAPPER_OUTPUT_KEY, val, Object.class);
}
public static Class> getMapperOutputValue(final Configuration conf) {
return conf.getClass(JOB_MAPPER_OUTPUT_VALUE, null);
}
public static void setMapperOutputValue(final Configuration conf, final Class> val) {
conf.setClass(JOB_MAPPER_OUTPUT_VALUE, val, Object.class);
}
public static Class extends Reducer> getCombiner(final Configuration conf) {
return conf.getClass(JOB_COMBINER, null, Reducer.class);
}
public static void setCombiner(final Configuration conf, final Class extends Reducer> val) {
conf.setClass(JOB_COMBINER, val, Reducer.class);
}
// TODO - In light of key/value specifics should we have a base MongoReducer
// class?
public static Class extends Reducer> getReducer(final Configuration conf) {
/** TODO - Support multiple outputs via getClasses ? **/
return conf.getClass(JOB_REDUCER, null, Reducer.class);
}
public static void setReducer(final Configuration conf, final Class extends Reducer> val) {
conf.setClass(JOB_REDUCER, val, Reducer.class);
}
public static Class extends Partitioner> getPartitioner(final Configuration conf) {
return conf.getClass(JOB_PARTITIONER, null, Partitioner.class);
}
public static void setPartitioner(final Configuration conf, final Class extends Partitioner> val) {
conf.setClass(JOB_PARTITIONER, val, Partitioner.class);
}
public static Class extends RawComparator> getSortComparator(final Configuration conf) {
return conf.getClass(JOB_SORT_COMPARATOR, null, RawComparator.class);
}
public static void setSortComparator(final Configuration conf, final Class extends RawComparator> val) {
conf.setClass(JOB_SORT_COMPARATOR, val, RawComparator.class);
}
public static Class extends OutputFormat> getOutputFormat(final Configuration conf) {
return conf.getClass(JOB_OUTPUT_FORMAT, null, OutputFormat.class);
}
public static void setOutputFormat(final Configuration conf, final Class extends OutputFormat> val) {
conf.setClass(JOB_OUTPUT_FORMAT, val, OutputFormat.class);
}
public static Class> getOutputKey(final Configuration conf) {
return conf.getClass(JOB_OUTPUT_KEY, null);
}
public static void setOutputKey(final Configuration conf, final Class> val) {
conf.setClass(JOB_OUTPUT_KEY, val, Object.class);
}
public static Class> getOutputValue(final Configuration conf) {
return conf.getClass(JOB_OUTPUT_VALUE, null);
}
public static void setOutputValue(final Configuration conf, final Class> val) {
conf.setClass(JOB_OUTPUT_VALUE, val, Object.class);
}
public static Class extends InputFormat> getInputFormat(final Configuration conf) {
return conf.getClass(JOB_INPUT_FORMAT, null, InputFormat.class);
}
public static void setInputFormat(final Configuration conf, final Class extends InputFormat> val) {
conf.setClass(JOB_INPUT_FORMAT, val, InputFormat.class);
}
public static List getMongoURIs(final Configuration conf, final String key) {
final String raw = conf.get(key);
if (raw != null && !raw.trim().isEmpty()) {
List result = new LinkedList();
String[] split = StringUtils.split(raw, ", ");
for (String mongoURI : split) {
result.add(new MongoClientURI(mongoURI));
}
return result;
} else {
return Collections.emptyList();
}
}
/**
* @deprecated use {@link #getMongoClientURI(Configuration, String)} instead
*/
@Deprecated
@SuppressWarnings("deprecation")
public static MongoURI getMongoURI(final Configuration conf, final String key) {
final String raw = conf.get(key);
if (raw != null && !raw.trim().isEmpty()) {
return new MongoURI(raw);
} else {
return null;
}
}
public static MongoClientURI getMongoClientURI(final Configuration conf, final String key) {
final String raw = conf.get(key);
return raw != null && !raw.trim().isEmpty() ? new MongoClientURI(raw) : null;
}
public static MongoClientURI getInputURI(final Configuration conf) {
return getMongoClientURI(conf, INPUT_URI);
}
public static MongoClientURI getAuthURI(final Configuration conf) {
return getMongoClientURI(conf, AUTH_URI);
}
public static List getCollections(final List uris, final MongoClientURI authURI) {
List dbCollections = new LinkedList();
for (MongoClientURI uri : uris) {
if (authURI != null) {
dbCollections.add(getCollectionWithAuth(uri, authURI));
} else {
dbCollections.add(getCollection(uri));
}
}
return dbCollections;
}
/**
* @deprecated use {@link #getCollection(MongoClientURI)}
*/
@Deprecated
public static DBCollection getCollection(final MongoURI uri) {
return getCollection(new MongoClientURI(uri.toString()));
}
public static DBCollection getCollection(final MongoClientURI uri) {
try {
return new MongoClient(uri).getDB(uri.getDatabase()).getCollection(uri.getCollection());
} catch (Exception e) {
throw new IllegalArgumentException("Couldn't connect and authenticate to get collection", e);
}
}
/**
* @deprecated use {@link #getCollectionWithAuth(MongoClientURI, MongoClientURI)} instead
*/
@Deprecated
public static DBCollection getCollectionWithAuth(final MongoURI uri, final MongoURI authURI) {
return getCollectionWithAuth(new MongoClientURI(uri.toString()), new MongoClientURI(authURI.toString()));
}
public static DBCollection getCollectionWithAuth(final MongoClientURI uri, final MongoClientURI authURI) {
//Make sure auth uri is valid and actually has a username/pw to use
if (authURI == null || authURI.getUsername() == null || authURI.getPassword() == null) {
throw new IllegalArgumentException("auth URI is empty or does not contain a valid username/password combination.");
}
DBCollection coll;
try {
Mongo mongo = new MongoClient(authURI);
coll = mongo.getDB(uri.getDatabase()).getCollection(uri.getCollection());
return coll;
} catch (Exception e) {
throw new IllegalArgumentException("Couldn't connect and authenticate to get collection", e);
}
}
public static DBCollection getOutputCollection(final Configuration conf) {
try {
return getCollection(getOutputURI(conf));
} catch (final Exception e) {
throw new IllegalArgumentException("Unable to connect to MongoDB Output Collection.", e);
}
}
public static List getOutputCollections(final Configuration conf) {
try {
return getCollections(getOutputURIs(conf), getAuthURI(conf));
} catch (final Exception e) {
throw new IllegalArgumentException("Unable to connect to MongoDB Output Collection.", e);
}
}
public static DBCollection getInputCollection(final Configuration conf) {
try {
return getCollection(getInputURI(conf));
} catch (final Exception e) {
throw new IllegalArgumentException(
"Unable to connect to MongoDB Input Collection at '" + getInputURI(conf) + "'", e);
}
}
/**
* @deprecated use {@link #setMongoURI(Configuration, String, MongoClientURI)} instead
*/
@Deprecated
public static void setMongoURI(final Configuration conf, final String key, final MongoURI value) {
conf.set(key, value.toString()); // todo - verify you can toString a
// URI object
}
public static void setMongoURI(final Configuration conf, final String key, final MongoClientURI value) {
conf.set(key, value.toString()); // todo - verify you can toString a
// URI object
}
public static void setMongoURIString(final Configuration conf, final String key, final String value) {
setMongoURI(conf, key, new MongoClientURI(value));
}
public static void setAuthURI(final Configuration conf, final String uri) {
setMongoURIString(conf, AUTH_URI, uri);
}
public static void setInputURI(final Configuration conf, final String uri) {
setMongoURIString(conf, INPUT_URI, uri);
}
/**
* @deprecated use {@link #setInputURI(Configuration, MongoClientURI)} instead
*/
@Deprecated
@SuppressWarnings("deprecation")
public static void setInputURI(final Configuration conf, final MongoURI uri) {
setMongoURI(conf, INPUT_URI, uri);
}
public static void setInputURI(final Configuration conf, final MongoClientURI uri) {
setMongoURI(conf, INPUT_URI, uri);
}
public static List getOutputURIs(final Configuration conf) {
return getMongoURIs(conf, OUTPUT_URI);
}
public static MongoClientURI getOutputURI(final Configuration conf) {
return getMongoClientURI(conf, OUTPUT_URI);
}
public static void setOutputURI(final Configuration conf, final String uri) {
setMongoURIString(conf, OUTPUT_URI, uri);
}
/**
* @deprecated use {@link #setOutputURI(Configuration, MongoClientURI)} instead
*/
@Deprecated
@SuppressWarnings("deprecation")
public static void setOutputURI(final Configuration conf, final MongoURI uri) {
setMongoURI(conf, OUTPUT_URI, uri);
}
public static void setOutputURI(final Configuration conf, final MongoClientURI uri) {
setMongoURI(conf, OUTPUT_URI, uri);
}
/**
* Set JSON but first validate it's parsable into a DBObject
*/
public static void setJSON(final Configuration conf, final String key, final String value) {
try {
final Object dbObj = JSON.parse(value);
setDBObject(conf, key, (DBObject) dbObj);
} catch (final Exception e) {
LOG.error("Cannot parse JSON...", e);
throw new IllegalArgumentException("Provided JSON String is not representable/parseable as a DBObject.",
e);
}
}
public static DBObject getDBObject(final Configuration conf, final String key) {
try {
final String json = conf.get(key);
final DBObject obj = (DBObject) JSON.parse(json);
if (obj == null) {
return new BasicDBObject();
} else {
return obj;
}
} catch (final Exception e) {
throw new IllegalArgumentException("Provided JSON String is not representable/parseable as a DBObject.",
e);
}
}
public static void setDBObject(final Configuration conf, final String key, final DBObject value) {
conf.set(key, JSON.serialize(value));
}
public static void setQuery(final Configuration conf, final String query) {
setJSON(conf, INPUT_QUERY, query);
}
public static void setQuery(final Configuration conf, final DBObject query) {
setDBObject(conf, INPUT_QUERY, query);
}
/**
* Returns the configured query as a DBObject... If you want a string call toString() on the returned object. or use JSON.serialize()
*/
public static DBObject getQuery(final Configuration conf) {
return getDBObject(conf, INPUT_QUERY);
}
public static void setFields(final Configuration conf, final String fields) {
setJSON(conf, INPUT_FIELDS, fields);
}
public static void setFields(final Configuration conf, final DBObject fields) {
setDBObject(conf, INPUT_FIELDS, fields);
}
/**
* Returns the configured fields as a DBObject... If you want a string call toString() on the returned object. or use JSON.serialize()
*/
public static DBObject getFields(final Configuration conf) {
return getDBObject(conf, INPUT_FIELDS);
}
public static void setSort(final Configuration conf, final String sort) {
setJSON(conf, INPUT_SORT, sort);
}
public static void setSort(final Configuration conf, final DBObject sort) {
setDBObject(conf, INPUT_SORT, sort);
}
/**
* Returns the configured sort as a DBObject... If you want a string call toString() on the returned object. or use JSON.serialize()
*/
public static DBObject getSort(final Configuration conf) {
return getDBObject(conf, INPUT_SORT);
}
public static int getLimit(final Configuration conf) {
return conf.getInt(INPUT_LIMIT, 0);
}
public static void setLimit(final Configuration conf, final int limit) {
conf.setInt(INPUT_LIMIT, limit);
}
public static int getSkip(final Configuration conf) {
return conf.getInt(INPUT_SKIP, 0);
}
public static void setSkip(final Configuration conf, final int skip) {
conf.setInt(INPUT_SKIP, skip);
}
public static boolean getLazyBSON(final Configuration conf) {
return conf.getBoolean(INPUT_LAZY_BSON, false);
}
public static void setLazyBSON(final Configuration conf, final boolean lazy) {
conf.setBoolean(INPUT_LAZY_BSON, lazy);
}
public static int getSplitSize(final Configuration conf) {
return conf.getInt(INPUT_SPLIT_SIZE, DEFAULT_SPLIT_SIZE);
}
public static void setSplitSize(final Configuration conf, final int value) {
conf.setInt(INPUT_SPLIT_SIZE, value);
}
/**
* if TRUE, Splits will be queried using $lt/$gt instead of $max and $min. This allows the database's query optimizer to choose the best
* index, instead of being forced to use the one in the $max/$min keys. This will only work if the key used for splitting is *not* a
* compound key. Make sure that all values under the splitting key are of the same type, or this will cause incomplete results.
*/
public static boolean isRangeQueryEnabled(final Configuration conf) {
return conf.getBoolean(SPLITS_USE_RANGEQUERY, false);
}
public static void setRangeQueryEnabled(final Configuration conf, final boolean value) {
conf.setBoolean(SPLITS_USE_RANGEQUERY, value);
}
/**
* if TRUE, Splits will be read by connecting to the individual shard servers, Only use this ( issue has to do with chunks moving /
* relocating during balancing phases)
*/
public static boolean canReadSplitsFromShards(final Configuration conf) {
return conf.getBoolean(SPLITS_USE_SHARDS, false);
}
public static void setReadSplitsFromShards(final Configuration conf, final boolean value) {
conf.setBoolean(SPLITS_USE_SHARDS, value);
}
/**
* If sharding is enabled, Use the sharding configured chunks to split up data.
*/
public static boolean isShardChunkedSplittingEnabled(final Configuration conf) {
return conf.getBoolean(SPLITS_USE_CHUNKS, true);
}
public static void setShardChunkSplittingEnabled(final Configuration conf, final boolean value) {
conf.setBoolean(SPLITS_USE_CHUNKS, value);
}
public static boolean canReadSplitsFromSecondary(final Configuration conf) {
return conf.getBoolean(SPLITS_SLAVE_OK, false);
}
public static void setReadSplitsFromSecondary(final Configuration conf, final boolean value) {
conf.getBoolean(SPLITS_SLAVE_OK, value);
}
public static boolean createInputSplits(final Configuration conf) {
return conf.getBoolean(CREATE_INPUT_SPLITS, true);
}
public static void setCreateInputSplits(final Configuration conf, final boolean value) {
conf.setBoolean(CREATE_INPUT_SPLITS, value);
}
public static void setInputSplitKeyPattern(final Configuration conf, final String pattern) {
setJSON(conf, INPUT_SPLIT_KEY_PATTERN, pattern);
}
public static void setInputSplitKey(final Configuration conf, final DBObject key) {
setDBObject(conf, INPUT_SPLIT_KEY_PATTERN, key);
}
public static String getInputSplitKeyPattern(final Configuration conf) {
return conf.get(INPUT_SPLIT_KEY_PATTERN, "{ \"_id\": 1 }");
}
public static DBObject getInputSplitKey(final Configuration conf) {
try {
final String json = getInputSplitKeyPattern(conf);
final DBObject obj = (DBObject) JSON.parse(json);
if (obj == null) {
return new BasicDBObject("_id", 1);
} else {
return obj;
}
} catch (final Exception e) {
throw new IllegalArgumentException("Provided JSON String is not representable/parsable as a DBObject.", e);
}
}
public static void setInputKey(final Configuration conf, final String fieldName) {
// TODO (bwm) - validate key rules?
conf.set(INPUT_KEY, fieldName);
}
public static String getInputKey(final Configuration conf) {
return conf.get(INPUT_KEY, "_id");
}
public static void setNoTimeout(final Configuration conf, final boolean value) {
conf.setBoolean(INPUT_NOTIMEOUT, value);
}
public static boolean isNoTimeout(final Configuration conf) {
return conf.getBoolean(INPUT_NOTIMEOUT, false);
}
//BSON-specific config functions.
public static boolean getBSONReadSplits(final Configuration conf) {
return conf.getBoolean(BSON_READ_SPLITS, true);
}
public static void setBSONReadSplits(final Configuration conf, final boolean val) {
conf.setBoolean(BSON_READ_SPLITS, val);
}
public static boolean getBSONWriteSplits(final Configuration conf) {
return conf.getBoolean(BSON_WRITE_SPLITS, true);
}
public static void setBSONWriteSplits(final Configuration conf, final boolean val) {
conf.setBoolean(BSON_WRITE_SPLITS, val);
}
public static boolean getBSONOutputBuildSplits(final Configuration conf) {
return conf.getBoolean(BSON_OUTPUT_BUILDSPLITS, false);
}
public static void setBSONOutputBuildSplits(final Configuration conf, final boolean val) {
conf.setBoolean(BSON_OUTPUT_BUILDSPLITS, val);
}
public static void setBSONPathFilter(final Configuration conf, final Class extends PathFilter> val) {
conf.setClass(BSON_PATHFILTER, val, PathFilter.class);
}
public static Class> getBSONPathFilter(final Configuration conf) {
return conf.getClass(BSON_PATHFILTER, null);
}
public static Class extends MongoSplitter> getSplitterClass(final Configuration conf) {
return conf.getClass(MONGO_SPLITTER_CLASS, null, MongoSplitter.class);
}
public static void setSplitterClass(final Configuration conf, final Class extends MongoSplitter> val) {
conf.setClass(MONGO_SPLITTER_CLASS, val, MongoSplitter.class);
}
public static List getInputMongosHosts(final Configuration conf) {
String raw = conf.get(INPUT_MONGOS_HOSTS, null);
if (raw == null || raw.length() == 0) {
return Collections.emptyList(); // empty list - no mongos specified
}
// List of hostnames delimited by whitespace
return Arrays.asList(StringUtils.split(raw));
}
public static void setInputMongosHosts(final Configuration conf, final List hostnames) {
String raw = "";
if (hostnames != null) {
raw = StringUtils.join(hostnames, ' ');
}
conf.set(INPUT_MONGOS_HOSTS, raw);
}
/**
* Fetch a class by its actual class name, rather than by a key name in the configuration properties. We still need to pass in a
* Configuration object here, since the Configuration class maintains an internal cache of class names for performance on some hadoop
* versions. It also ensures that the same classloader is used across all keys.
*/
public static Class extends U> getClassByName(final Configuration conf,
final String className,
final Class xface) {
if (className == null) {
return null;
}
try {
Class> theClass = conf.getClassByName(className);
if (theClass != null && !xface.isAssignableFrom(theClass)) {
throw new RuntimeException(theClass + " not " + xface.getName());
} else if (theClass != null) {
return theClass.asSubclass(xface);
} else {
return null;
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static Configuration buildConfiguration(final Map data) {
Configuration newConf = new Configuration();
for (Map.Entry entry : data.entrySet()) {
String key = entry.getKey();
Object val = entry.getValue();
if (val instanceof String) {
newConf.set(key, (String) val);
} else if (val instanceof Boolean) {
newConf.setBoolean(key, (Boolean) val);
} else if (val instanceof Integer) {
newConf.setInt(key, (Integer) val);
} else if (val instanceof Float) {
newConf.setFloat(key, (Float) val);
} else if (val instanceof DBObject) {
setDBObject(newConf, key, (DBObject) val);
} else {
throw new RuntimeException("can't convert " + val.getClass() + " into any type for Configuration");
}
}
return newConf;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy