
com.mongodb.hadoop.pig.BSONStorage Maven / Gradle / Ivy
/*
* Copyright 2011 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.hadoop.pig;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.hadoop.BSONFileOutputFormat;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.pig.LoadFunc;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.ResourceStatistics;
import org.apache.pig.StoreFunc;
import org.apache.pig.StoreMetadata;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.UDFContext;
import org.apache.pig.impl.util.Utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public class BSONStorage extends StoreFunc implements StoreMetadata {
private static final Log LOG = LogFactory.getLog(MongoStorage.class);
static final String SCHEMA_SIGNATURE = "bson.pig.output.schema";
//CHECKSTYLE:OFF
protected ResourceSchema schema = null;
//CHECKSTYLE:ON
private RecordWriter out;
private String udfcSignature = null;
private String idField = null;
private final BSONFileOutputFormat outputFormat = new BSONFileOutputFormat();
public BSONStorage() {
}
public BSONStorage(final String idField) {
this.idField = idField;
}
/**
* Returns object more suited for BSON storage. Object o corresponds to a field value in pig.
*
* @param o object representing pig type to convert to BSON-like object
* @param field field to place o in
* @param toIgnore name of field in Object o to ignore
*/
public static Object getTypeForBSON(final Object o, final ResourceFieldSchema field, final String toIgnore)
throws IOException {
byte dataType = field != null ? field.getType() : DataType.UNKNOWN;
ResourceSchema s = null;
if (field == null) {
if (o instanceof Map) {
dataType = DataType.MAP;
} else if (o instanceof List) {
dataType = DataType.BAG;
} else {
dataType = DataType.UNKNOWN;
}
} else {
s = field.getSchema();
if (dataType == DataType.UNKNOWN) {
if (o instanceof Map) {
dataType = DataType.MAP;
}
if (o instanceof List) {
dataType = DataType.BAG;
}
}
}
if (dataType == DataType.BYTEARRAY && o instanceof Map) {
dataType = DataType.MAP;
}
switch (dataType) {
case DataType.NULL:
return null;
case DataType.INTEGER:
case DataType.LONG:
case DataType.FLOAT:
case DataType.DOUBLE:
return o;
case DataType.BYTEARRAY:
return o.toString();
case DataType.CHARARRAY:
return o;
//Given a TUPLE, create a Map so BSONEncoder will eat it
case DataType.TUPLE:
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field "
+ field.getName());
}
ResourceFieldSchema[] fs = s.getFields();
Map m = new LinkedHashMap();
for (int j = 0; j < fs.length; j++) {
m.put(fs[j].getName(), getTypeForBSON(((Tuple) o).get(j), fs[j], toIgnore));
}
return m;
// Given a BAG, create an Array so BSONEncoder will eat it.
case DataType.BAG:
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field "
+ field);
}
fs = s.getFields();
if (fs.length != 1 || fs[0].getType() != DataType.TUPLE) {
throw new IOException("Found a bag without a tuple inside!");
}
// Drill down the next level to the tuple's schema.
s = fs[0].getSchema();
if (s == null) {
throw new IOException("Schemas must be fully specified to use this storage function. No schema found for field "
+ field.getName());
}
fs = s.getFields();
ArrayList
© 2015 - 2025 Weber Informatics LLC | Privacy Policy