com.mongodb.hadoop.pig.BSONStorage Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2011 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.hadoop.pig;
import org.bson.*;
import org.bson.types.*;
import com.mongodb.*;
import com.mongodb.hadoop.*;
import com.mongodb.hadoop.output.*;
import com.mongodb.hadoop.util.*;
import org.apache.commons.logging.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.pig.*;
import org.apache.pig.data.*;
import org.apache.pig.impl.util.*;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import java.io.*;
import java.text.ParseException;
import java.util.*;
public class BSONStorage extends StoreFunc implements StoreMetadata {
private static final Log log = LogFactory.getLog( MongoStorage.class );
static final String SCHEMA_SIGNATURE = "bson.pig.output.schema";
protected ResourceSchema schema = null;
private RecordWriter out;
private String udfcSignature = null;
private String idField = null;
private boolean useUpsert = false;
private final BSONFileOutputFormat outputFormat = new BSONFileOutputFormat();
public BSONStorage(){ }
public BSONStorage(String idField){
this.idField = idField;
}
public static Object getTypeForBSON(Object o, ResourceSchema.ResourceFieldSchema field) throws IOException{
byte dataType = field != null ? field.getType() : DataType.UNKNOWN;
ResourceSchema s = null;
if( field == null ){
if(o instanceof Map){
dataType = DataType.MAP;
}else if(o instanceof List){
dataType = DataType.BAG;
} else {
dataType = DataType.UNKNOWN;
}
}else{
s = field.getSchema();
if(dataType == DataType.UNKNOWN ){
if(o instanceof Map) dataType = DataType.MAP;
if(o instanceof List) dataType = DataType.BAG;
}
}
if(dataType == DataType.BYTEARRAY && o instanceof Map){
dataType = DataType.MAP;
}
switch (dataType) {
case DataType.NULL:
return null;
case DataType.INTEGER:
case DataType.LONG:
case DataType.FLOAT:
case DataType.DOUBLE:
return o;
case DataType.BYTEARRAY:
return o.toString();
case DataType.CHARARRAY:
return (String)o;
// Given a TUPLE, create a Map so BSONEncoder will eat it
case DataType.TUPLE:
if (s == null) {
throw new IOException("Schemas must be fully specified to use "
+ "this storage function. No schema found for field " +
field.getName());
}
ResourceSchema.ResourceFieldSchema[] fs = s.getFields();
LinkedHashMap m = new java.util.LinkedHashMap();
for (int j = 0; j < fs.length; j++) {
m.put(fs[j].getName(), getTypeForBSON(((Tuple) o).get(j), fs[j]));
}
return m;
// Given a BAG, create an Array so BSONEnconder will eat it.
case DataType.BAG:
if (s == null) {
throw new IOException("Schemas must be fully specified to use "
+ "this storage function. No schema found for field " +
field.getName());
}
fs = s.getFields();
if (fs.length != 1 || fs[0].getType() != DataType.TUPLE) {
throw new IOException("Found a bag without a tuple "
+ "inside!");
}
// Drill down the next level to the tuple's schema.
s = fs[0].getSchema();
if (s == null) {
throw new IOException("Schemas must be fully specified to use "
+ "this storage function. No schema found for field " +
field.getName());
}
fs = s.getFields();
ArrayList a = new ArrayList
© 2015 - 2025 Weber Informatics LLC | Privacy Policy