All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.mongodb.spark.pickle.BSONPickler Maven / Gradle / Ivy
package com.mongodb.spark.pickle;
import com.mongodb.DBRef;
import net.razorvine.pickle.IObjectPickler;
import net.razorvine.pickle.Opcodes;
import net.razorvine.pickle.PickleException;
import net.razorvine.pickle.Pickler;
import org.bson.types.BSONTimestamp;
import org.bson.types.Binary;
import org.bson.types.Code;
import org.bson.types.CodeWScope;
import org.bson.types.CodeWithScope;
import org.bson.types.MaxKey;
import org.bson.types.MinKey;
import org.bson.types.ObjectId;
import java.io.IOException;
import java.io.OutputStream;
import java.util.regex.Pattern;
/**
* Implementation of {@link net.razorvine.pickle.IObjectPickler} that pickles
* BSON types only, so that they can be correctly unpickled into PyMongo
* objects.
*
* For documentation on Python's pickle protocol, see the following:
* - https://docs.python.org/2/library/pickle.html
* - http://svn.python.org/projects/python/trunk/Lib/pickletools.py
*/
public class BSONPickler implements IObjectPickler {
private void putBinstring(final byte[] bytes, final OutputStream out)
throws IOException {
int binLen = bytes.length;
if (binLen <= 0xff) {
out.write(Opcodes.SHORT_BINSTRING);
out.write(binLen);
} else {
out.write(Opcodes.BINSTRING);
out.write(binLen & 0xff);
out.write(binLen >>> 8 & 0xff);
out.write(binLen >>> 16 & 0xff);
out.write(binLen >>> 24 & 0xff);
}
out.write(bytes);
}
private void putBinstring(final String string, final OutputStream out)
throws IOException {
putBinstring(string.getBytes(), out);
}
/**
* Translate flags from java.util.regex.Pattern into their respetive values
* in Python's "re" library.
*
* @param javaFlags flags from a java.util.regex.Pattern
* @return equivalent flags in Python
*/
private int translateRegexFlags(final int javaFlags) {
int pyFlags = 0;
if ((javaFlags & Pattern.CASE_INSENSITIVE) > 0) {
pyFlags |= 2;
}
if ((javaFlags & Pattern.COMMENTS) > 0) {
pyFlags |= 64;
}
if ((javaFlags & Pattern.DOTALL) > 0) {
pyFlags |= 16;
}
if ((javaFlags & Pattern.MULTILINE) > 0) {
pyFlags |= 8;
}
// 0x100 == Pattern.UNICODE_CHARACTER_CLASS in Java >= 7.
// Python doesn't have separate flags for these. Even if only one of
// these is set, enabling the UNICODE flag is still probably the closest
// approximation in Python.
if (((javaFlags & Pattern.UNICODE_CASE) | (javaFlags & 0x100)) > 0) {
pyFlags |= 32;
}
return pyFlags;
}
private void pickleRegex(final Pattern pattern, final OutputStream out,
final Pickler pickler)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.regex\nRegex\n".getBytes());
out.write(Opcodes.EMPTY_TUPLE);
out.write(Opcodes.NEWOBJ);
out.write(Opcodes.EMPTY_DICT);
out.write(Opcodes.MARK);
putBinstring("pattern", out);
pickler.save(pattern.pattern());
putBinstring("flags", out);
pickler.save(translateRegexFlags(pattern.flags()));
out.write(Opcodes.SETITEMS);
out.write(Opcodes.BUILD);
}
private void pickleBSONTimestamp(final BSONTimestamp timestamp,
final OutputStream out,
final Pickler pickler)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.timestamp\nTimestamp\n".getBytes());
out.write(Opcodes.EMPTY_TUPLE);
out.write(Opcodes.NEWOBJ);
out.write(Opcodes.EMPTY_DICT);
out.write(Opcodes.MARK);
putBinstring("_Timestamp__time", out);
pickler.save(timestamp.getTime());
putBinstring("_Timestamp__inc", out);
pickler.save(timestamp.getInc());
out.write(Opcodes.SETITEMS);
out.write(Opcodes.BUILD);
}
private void pickleCode(final Code code, final OutputStream out,
final Pickler pickler)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.code\nCode\n".getBytes());
pickler.save(code.getCode());
out.write(Opcodes.TUPLE1);
out.write(Opcodes.NEWOBJ);
// PyMongo's bson.code.Code always has a scope, even it if is empty.
out.write(Opcodes.EMPTY_DICT);
putBinstring("_Code__scope", out);
if (code instanceof CodeWithScope) {
pickler.save(((CodeWithScope) code).getScope());
} else if (code instanceof CodeWScope) {
pickler.save(((CodeWScope) code).getScope().toMap());
} else {
out.write(Opcodes.EMPTY_DICT);
}
out.write(Opcodes.SETITEM);
out.write(Opcodes.BUILD);
}
private void writeMinKey(final OutputStream out)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.min_key\nMinKey\n".getBytes());
out.write(Opcodes.EMPTY_TUPLE);
out.write(Opcodes.NEWOBJ);
out.write(Opcodes.EMPTY_DICT);
out.write(Opcodes.BUILD);
}
private void writeMaxKey(final OutputStream out)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.max_key\nMaxKey\n".getBytes());
out.write(Opcodes.EMPTY_TUPLE);
out.write(Opcodes.NEWOBJ);
out.write(Opcodes.EMPTY_DICT);
out.write(Opcodes.BUILD);
}
private void pickleDBRef(final DBRef dbref, final OutputStream out,
final Pickler pickler)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.dbref\nDBRef\n".getBytes());
out.write(Opcodes.EMPTY_TUPLE);
out.write(Opcodes.NEWOBJ);
out.write(Opcodes.EMPTY_DICT);
out.write(Opcodes.MARK);
putBinstring("_DBRef__kwargs", out);
out.write(Opcodes.EMPTY_DICT);
putBinstring("_DBRef__collection", out);
pickler.save(dbref.getCollectionName());
putBinstring("_DBRef__database", out);
// org.bson.types.DBRef stores neither database name nor extra "kwargs".
out.write(Opcodes.NONE);
putBinstring("_DBRef__id", out);
// Not saving this in memo, because a DBRef that uses itself as its own
// id can't be saved to MongoDB anyway.
pickler.save(dbref.getId());
out.write(Opcodes.SETITEMS);
out.write(Opcodes.BUILD);
}
private void pickleBinary(final Binary binary, final OutputStream out,
final Pickler pickler)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.binary\nBinary\n".getBytes());
putBinstring(binary.getData(), out);
pickler.save(binary.getType());
out.write(Opcodes.TUPLE2);
out.write(Opcodes.NEWOBJ);
out.write(Opcodes.EMPTY_DICT);
putBinstring("_Binary__subtype", out);
pickler.save(binary.getType());
out.write(Opcodes.SETITEM);
out.write(Opcodes.BUILD);
}
private void pickleObjectId(final ObjectId oid, final OutputStream out)
throws IOException {
out.write(Opcodes.GLOBAL);
out.write("bson.objectid\nObjectId\n".getBytes());
out.write(Opcodes.EMPTY_TUPLE);
out.write(Opcodes.NEWOBJ);
out.write(Opcodes.SHORT_BINSTRING);
out.write(12);
out.write(oid.toByteArray());
out.write(Opcodes.BUILD);
}
/**
* Write the Python "pickle" representation of a BSON type.
*
* @param o the object to be pickled
* @param out the OutputStream to which to write
* @param currentPickler the current Pickler instance
* @throws PickleException if an issue is encountered while serializing a
* BSON object
* @throws IOException if an issue is encountered writing to the
* OutputStream
*/
@Override
public void pickle(
final Object obj, final OutputStream out, final Pickler currentPickler)
throws IOException {
Object o = obj;
if (obj instanceof BSONValueBox) {
o = ((BSONValueBox) obj).get();
}
if (o instanceof ObjectId) {
pickleObjectId((ObjectId) o, out);
} else if (o instanceof Binary) {
pickleBinary((Binary) o, out, currentPickler);
} else if (o instanceof DBRef) {
pickleDBRef((DBRef) o, out, currentPickler);
} else if (o instanceof MaxKey) {
writeMaxKey(out);
} else if (o instanceof MinKey) {
writeMinKey(out);
} else if (o instanceof Code) {
pickleCode((Code) o, out, currentPickler);
} else if (o instanceof BSONTimestamp) {
pickleBSONTimestamp((BSONTimestamp) o, out, currentPickler);
} else if (o instanceof Pattern) {
// Since the Hadoop connector is in Java, regular expressions will
// be of this class, rather than scala.util.matching.Regex.
pickleRegex((Pattern) o, out, currentPickler);
} else {
throw new PickleException("Can't pickle this: " + o);
}
}
}