All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mongodb.hadoop.pig.MongoUpdateStorage Maven / Gradle / Ivy

Go to download

The MongoDB Connector for Hadoop is a plugin for Hadoop that provides the ability to use MongoDB as an input source and/or an output destination.

The newest version!
/*
 * Copyright 2011 10gen Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.mongodb.hadoop.pig;

import com.mongodb.MongoClientURI;
import com.mongodb.hadoop.MongoOutputFormat;
import com.mongodb.hadoop.io.MongoUpdateWritable;
import com.mongodb.hadoop.output.MongoRecordWriter;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.StoreFunc;
import org.apache.pig.StoreMetadata;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;
import org.bson.BasicBSONObject;

import java.io.IOException;
import java.util.Properties;

/*
 * MongoUpdateStorage : used to update documents in a collection
 */
public class MongoUpdateStorage extends StoreFunc implements StoreMetadata {

    private static final Log LOG = LogFactory.getLog(MongoUpdateStorage.class);

    // Pig specific settings
    static final String SCHEMA_SIGNATURE = "mongoupdate.pig.output.schema";
    private ResourceSchema schema = null;
    private String signature = null;

    // private final MongoStorageOptions options;
    private final MongoOutputFormat outputFormat = new MongoOutputFormat();

    // MongoRecordWriter to use for updating MongoDB documents
    private MongoRecordWriter recordWriter = null;

    // JSONPigReplace setup
    private JSONPigReplace pigReplace;
    private String schemaStr;
    private String unnamedStr;

    // Single instance of MongoUpdateWritable for result output.
    private MongoUpdateWritable muw;

    /**
     * First constructor
     *
     * @param query  JSON string representing 'query' parameter in MongoDB update
     * @param update JSON string representing 'update' parameter in MongoDB update
     */
    public MongoUpdateStorage(final String query, final String update) {
        this(query, update, null);
    }

    /**
     * Second constructor
     *
     * @param query  JSON string representing 'query' parameter in MongoDB update
     * @param update JSON string representing 'update' parameter in MongoDB update
     * @param schema string representing schema of pig output
     */
    public MongoUpdateStorage(final String query, final String update, final String schema) {
        this(query, update, schema, "");
    }

    /**
     * Third constructor
     *
     * @param query    JSON string representing 'query' parameter in MongoDB update
     * @param update   JSON string representing 'update' parameter in MongoDB update
     * @param schema   string representing schema of pig output
     * @param toIgnore string representing "unnamed" objects
     */
    public MongoUpdateStorage(final String query, final String update, final String schema, final String toIgnore) {
        this(query, update, schema, toIgnore, "");
    }

    /**
     * Third constructor
     *
     * @param query         JSON string representing 'query' parameter in MongoDB update
     * @param update        JSON string representing 'update' parameter in MongoDB update
     * @param schema        string representing schema of pig output
     * @param toIgnore      string representing "unnamed" objects
     * @param updateOptions JSON string representing 'extra' MongoDB update options
     */
    public MongoUpdateStorage(final String query, final String update, final String schema, final String toIgnore,
                              final String updateOptions) {
        pigReplace = new JSONPigReplace(new String[]{query, update, updateOptions});
        schemaStr = schema;
        unnamedStr = toIgnore.isEmpty() ? null : toIgnore;
        muw = new MongoUpdateWritable();
    }

    @Override
    public void checkSchema(final ResourceSchema s) throws IOException {
        schema = s;
        UDFContext udfContext = UDFContext.getUDFContext();

        Properties p = udfContext.getUDFProperties(getClass(), new String[]{signature});
        p.setProperty(SCHEMA_SIGNATURE, schema.toString());
    }

    @Override
    public void storeSchema(final ResourceSchema schema, final String location, final Job job) {
        // not implemented
    }

    @Override
    public void storeStatistics(final ResourceStatistics stats, final String location, final Job job) {
        // not implemented
    }

    @Override
    public void putNext(final Tuple tuple) throws IOException {
        try {
            // perform substitution on variables "marked" for replacements
            BasicBSONObject[] toUpdate = pigReplace.substitute(tuple, schema, unnamedStr);
            // 'query' JSON
            BasicBSONObject q = toUpdate[0];
            // 'update' JSON
            BasicBSONObject u = toUpdate[1];
            // update options
            BasicBSONObject mu = toUpdate.length > 2 ? toUpdate[2] : null;
            boolean isUpsert = true;
            boolean isMulti = false;
            boolean isReplace = false;
            if (mu != null) {
                isUpsert = mu.getBoolean("upsert", true);
                isMulti = mu.getBoolean("multi", false);
                isReplace = mu.getBoolean("replace", false);
            }

            muw.setQuery(q);
            muw.setModifiers(u);
            muw.setUpsert(isUpsert);
            muw.setMultiUpdate(isMulti);
            muw.setReplace(isReplace);
            recordWriter.write(null, muw);
        } catch (Exception e) {
            throw new IOException("Couldn't convert tuple to bson: ", e);
        }
    }

    @Override
    public void prepareToWrite(final RecordWriter writer) throws IOException {
        //noinspection unchecked
        recordWriter = (MongoRecordWriter) writer;
        LOG.info("Preparing to write to " + recordWriter);
        if (recordWriter == null) {
            throw new IOException("Invalid Record Writer");
        }

        UDFContext context = UDFContext.getUDFContext();
        Properties p = context.getUDFProperties(getClass(), new String[]{signature});

        /*
         * In determining the schema to use, the user-defined schema should take
         * precedence over the "inferred" schema
         */
        if (schemaStr != null) {
            try {
                schema = new ResourceSchema(Utils.getSchemaFromString(schemaStr));
            } catch (Exception e) {
                LOG.error(e.getMessage(), e);
            }
        } else {
            String s = p.getProperty(SCHEMA_SIGNATURE);
            if (s == null) {
                throw new IOException("Could not find schema in UDF context. You'd have to explicitly specify a Schema.");
            }
            schema = new ResourceSchema(Utils.getSchemaFromString(s));
        }
    }

    @Override
    public OutputFormat getOutputFormat() throws IOException {
        return outputFormat;
    }

    @Override
    public String relToAbsPathForStoreLocation(final String location, final Path curDir) throws IOException {
        return location;
    }

    @Override
    public void setStoreLocation(final String location, final Job job) throws IOException {
        final Configuration config = job.getConfiguration();
        if (!location.startsWith("mongodb://")) {
            throw new IllegalArgumentException("Invalid URI Format.  URIs must begin with a mongodb:// protocol string.");
        }
        MongoClientURI locURI = new MongoClientURI(location);
        LOG.info(String.format(
            "Store location config: %s; for namespace: %s.%s; hosts: %s",
            config, locURI.getDatabase(), locURI.getCollection(),
            locURI.getHosts()));
        MongoConfigUtil.setOutputURI(config, locURI);
    }

    @Override
    public void setStoreFuncUDFContextSignature(final String signature) {
        this.signature = signature;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy