com.mongodb.hadoop.pig.MongoUpdateStorage Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mongo-hadoop-pig Show documentation
Show all versions of mongo-hadoop-pig Show documentation
The MongoDB Connector for Hadoop is a plugin for Hadoop that provides the ability to use MongoDB as an input source and/or an output destination.
The newest version!
/*
* Copyright 2011 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.hadoop.pig;
import com.mongodb.MongoClientURI;
import com.mongodb.hadoop.MongoOutputFormat;
import com.mongodb.hadoop.io.MongoUpdateWritable;
import com.mongodb.hadoop.output.MongoRecordWriter;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.StoreFunc;
import org.apache.pig.StoreMetadata;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;
import org.bson.BasicBSONObject;
import java.io.IOException;
import java.util.Properties;
/*
* MongoUpdateStorage : used to update documents in a collection
*/
public class MongoUpdateStorage extends StoreFunc implements StoreMetadata {
private static final Log LOG = LogFactory.getLog(MongoUpdateStorage.class);
// Pig specific settings
static final String SCHEMA_SIGNATURE = "mongoupdate.pig.output.schema";
private ResourceSchema schema = null;
private String signature = null;
// private final MongoStorageOptions options;
private final MongoOutputFormat outputFormat = new MongoOutputFormat();
// MongoRecordWriter to use for updating MongoDB documents
private MongoRecordWriter, MongoUpdateWritable> recordWriter = null;
// JSONPigReplace setup
private JSONPigReplace pigReplace;
private String schemaStr;
private String unnamedStr;
// Single instance of MongoUpdateWritable for result output.
private MongoUpdateWritable muw;
/**
* First constructor
*
* @param query JSON string representing 'query' parameter in MongoDB update
* @param update JSON string representing 'update' parameter in MongoDB update
*/
public MongoUpdateStorage(final String query, final String update) {
this(query, update, null);
}
/**
* Second constructor
*
* @param query JSON string representing 'query' parameter in MongoDB update
* @param update JSON string representing 'update' parameter in MongoDB update
* @param schema string representing schema of pig output
*/
public MongoUpdateStorage(final String query, final String update, final String schema) {
this(query, update, schema, "");
}
/**
* Third constructor
*
* @param query JSON string representing 'query' parameter in MongoDB update
* @param update JSON string representing 'update' parameter in MongoDB update
* @param schema string representing schema of pig output
* @param toIgnore string representing "unnamed" objects
*/
public MongoUpdateStorage(final String query, final String update, final String schema, final String toIgnore) {
this(query, update, schema, toIgnore, "");
}
/**
* Third constructor
*
* @param query JSON string representing 'query' parameter in MongoDB update
* @param update JSON string representing 'update' parameter in MongoDB update
* @param schema string representing schema of pig output
* @param toIgnore string representing "unnamed" objects
* @param updateOptions JSON string representing 'extra' MongoDB update options
*/
public MongoUpdateStorage(final String query, final String update, final String schema, final String toIgnore,
final String updateOptions) {
pigReplace = new JSONPigReplace(new String[]{query, update, updateOptions});
schemaStr = schema;
unnamedStr = toIgnore.isEmpty() ? null : toIgnore;
muw = new MongoUpdateWritable();
}
@Override
public void checkSchema(final ResourceSchema s) throws IOException {
schema = s;
UDFContext udfContext = UDFContext.getUDFContext();
Properties p = udfContext.getUDFProperties(getClass(), new String[]{signature});
p.setProperty(SCHEMA_SIGNATURE, schema.toString());
}
@Override
public void storeSchema(final ResourceSchema schema, final String location, final Job job) {
// not implemented
}
@Override
public void storeStatistics(final ResourceStatistics stats, final String location, final Job job) {
// not implemented
}
@Override
public void putNext(final Tuple tuple) throws IOException {
try {
// perform substitution on variables "marked" for replacements
BasicBSONObject[] toUpdate = pigReplace.substitute(tuple, schema, unnamedStr);
// 'query' JSON
BasicBSONObject q = toUpdate[0];
// 'update' JSON
BasicBSONObject u = toUpdate[1];
// update options
BasicBSONObject mu = toUpdate.length > 2 ? toUpdate[2] : null;
boolean isUpsert = true;
boolean isMulti = false;
boolean isReplace = false;
if (mu != null) {
isUpsert = mu.getBoolean("upsert", true);
isMulti = mu.getBoolean("multi", false);
isReplace = mu.getBoolean("replace", false);
}
muw.setQuery(q);
muw.setModifiers(u);
muw.setUpsert(isUpsert);
muw.setMultiUpdate(isMulti);
muw.setReplace(isReplace);
recordWriter.write(null, muw);
} catch (Exception e) {
throw new IOException("Couldn't convert tuple to bson: ", e);
}
}
@Override
public void prepareToWrite(final RecordWriter writer) throws IOException {
//noinspection unchecked
recordWriter = (MongoRecordWriter, MongoUpdateWritable>) writer;
LOG.info("Preparing to write to " + recordWriter);
if (recordWriter == null) {
throw new IOException("Invalid Record Writer");
}
UDFContext context = UDFContext.getUDFContext();
Properties p = context.getUDFProperties(getClass(), new String[]{signature});
/*
* In determining the schema to use, the user-defined schema should take
* precedence over the "inferred" schema
*/
if (schemaStr != null) {
try {
schema = new ResourceSchema(Utils.getSchemaFromString(schemaStr));
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
} else {
String s = p.getProperty(SCHEMA_SIGNATURE);
if (s == null) {
throw new IOException("Could not find schema in UDF context. You'd have to explicitly specify a Schema.");
}
schema = new ResourceSchema(Utils.getSchemaFromString(s));
}
}
@Override
public OutputFormat getOutputFormat() throws IOException {
return outputFormat;
}
@Override
public String relToAbsPathForStoreLocation(final String location, final Path curDir) throws IOException {
return location;
}
@Override
public void setStoreLocation(final String location, final Job job) throws IOException {
final Configuration config = job.getConfiguration();
if (!location.startsWith("mongodb://")) {
throw new IllegalArgumentException("Invalid URI Format. URIs must begin with a mongodb:// protocol string.");
}
MongoClientURI locURI = new MongoClientURI(location);
LOG.info(String.format(
"Store location config: %s; for namespace: %s.%s; hosts: %s",
config, locURI.getDatabase(), locURI.getCollection(),
locURI.getHosts()));
MongoConfigUtil.setOutputURI(config, locURI);
}
@Override
public void setStoreFuncUDFContextSignature(final String signature) {
this.signature = signature;
}
}