com.mongodb.hadoop.pig.MongoInsertStorage Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mongo-hadoop-pig Show documentation
Show all versions of mongo-hadoop-pig Show documentation
The MongoDB Connector for Hadoop is a plugin for Hadoop that provides the ability to use MongoDB as an input source and/or an output destination.
The newest version!
/*
* Copyright 2011 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.hadoop.pig;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.MongoClientURI;
import com.mongodb.hadoop.MongoOutputFormat;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.StoreFunc;
import org.apache.pig.StoreMetadata;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
@SuppressWarnings("unchecked")
public class MongoInsertStorage extends StoreFunc implements StoreMetadata {
// Pig specific settings
static final String SCHEMA_SIGNATURE = "mongoinsert.pig.output.schema";
private static final Log LOG = LogFactory.getLog(MongoStorage.class);
private final MongoOutputFormat outputFormat = new MongoOutputFormat();
//CHECKSTYLE:OFF
protected ResourceSchema schema = null;
//CHECKSTYLE:ON
private RecordWriter out;
private String udfcSignature = null;
private String idField = null;
public MongoInsertStorage() {
}
/**
* @param idField the field standing in for {@code _id}
* @param useUpsert is parameter is unused
* @deprecated useUpsert is unused. Use {@link #MongoInsertStorage(String)}
* instead.
*/
@Deprecated
@SuppressWarnings("UnusedParameters")
public MongoInsertStorage(final String idField, final String useUpsert) {
this.idField = idField;
}
/**
* Create a new MongoInsertStorage.
*
* @param idField the field standing in for {@code _id}
*/
public MongoInsertStorage(final String idField) {
this.idField = idField;
}
public String relToAbsPathForStoreLocation(final String location, final Path curDir) throws IOException {
// Don't convert anything - override to keep base from messing with URI
return location;
}
public OutputFormat getOutputFormat() throws IOException {
return outputFormat;
//final MongoOutputFormat outputFmt = options == null ? new MongoOutputFormat() : new MongoOutputFormat(options.getUpdate().keys,
// options.getUpdate().multi);
//LOG.info( "OutputFormat... " + outputFmt );
//return outputFmt;
}
public void setStoreLocation(final String location, final Job job) throws IOException {
final Configuration config = job.getConfiguration();
if (!location.startsWith("mongodb://")) {
throw new IllegalArgumentException("Invalid URI Format. URIs must begin with a mongodb:// protocol string.");
}
MongoClientURI locURI = new MongoClientURI(location);
LOG.info(String.format(
"Store location config: %s; for namespace: %s.%s; hosts: %s",
config, locURI.getDatabase(), locURI.getCollection(),
locURI.getHosts()));
MongoConfigUtil.setOutputURI(config, locURI);
}
@Override
public void checkSchema(final ResourceSchema schema) throws IOException {
this.schema = schema;
UDFContext udfc = UDFContext.getUDFContext();
Properties p = udfc.getUDFProperties(getClass(), new String[]{udfcSignature});
p.setProperty(SCHEMA_SIGNATURE, schema.toString());
}
public void prepareToWrite(final RecordWriter writer) throws IOException {
out = writer;
if (out == null) {
throw new IOException("Invalid Record Writer");
}
UDFContext udfc = UDFContext.getUDFContext();
Properties p = udfc.getUDFProperties(getClass(), new String[]{udfcSignature});
String strSchema = p.getProperty(SCHEMA_SIGNATURE);
if (strSchema == null) {
LOG.warn("Could not find schema in UDF context. Interpreting each tuple as containing a single map.");
} else {
try {
// Parse the schema from the string stored in the properties object.
schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
} catch (Exception e) {
schema = null;
LOG.warn(e.getMessage());
}
if (LOG.isDebugEnabled()) {
LOG.debug("GOT A SCHEMA " + schema + " " + strSchema);
}
}
}
@Override
public void putNext(final Tuple tuple) throws IOException {
try {
final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
ResourceFieldSchema[] fields = null;
if (schema != null) {
fields = schema.getFields();
}
if (fields != null) {
for (int i = 0; i < fields.length; i++) {
writeField(builder, fields[i], tuple.get(i));
}
} else {
// Assume that the tuple contains only a map, as produced by
// MongoLoader, for example.
if (tuple.size() != 1) {
throw new IOException("Could not retrieve schema, but tuples did not contain a single item: " + tuple);
}
Object result = BSONStorage.getTypeForBSON(
tuple.get(0), null, null);
if (!(result instanceof Map)) {
throw new IOException("Could not retrieve schema, but tuples contained something other than a Map: " + tuple);
}
Map documentMap = (Map) result;
for (Map.Entry entry : documentMap.entrySet()) {
builder.add(entry.getKey(), entry.getValue());
}
}
out.write(null, builder.get());
} catch (Exception e) {
throw new IOException("Couldn't convert tuple to bson: ", e);
}
}
@Override
public void setStoreFuncUDFContextSignature(final String signature) {
udfcSignature = signature;
}
@Override
public void storeStatistics(final ResourceStatistics stats, final String location, final Job job) {
// not implemented
}
@Override
public void storeSchema(final ResourceSchema schema, final String location, final Job job) {
// not implemented
}
protected void writeField(final BasicDBObjectBuilder builder,
final ResourceFieldSchema field,
final Object d) throws IOException {
Object convertedType = BSONStorage.getTypeForBSON(d, field, null);
if (field.getName() != null && field.getName().equals(idField)) {
builder.add("_id", convertedType);
} else {
builder.add(field.getName(), convertedType);
}
}
}