com.mongodb.hadoop.input.GridFSSplit Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mongo-hadoop-core Show documentation
Show all versions of mongo-hadoop-core Show documentation
The MongoDB Connector for Hadoop is a plugin for Hadoop that provides the ability to use MongoDB as an input source and/or an output destination.
package com.mongodb.hadoop.input;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import com.mongodb.MongoClientURI;
import com.mongodb.gridfs.GridFS;
import com.mongodb.gridfs.GridFSDBFile;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.hadoop.mapreduce.InputSplit;
import org.bson.types.ObjectId;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
public class GridFSSplit extends InputSplit
implements org.apache.hadoop.mapred.InputSplit {
private ObjectId fileId;
private int chunkSize;
private long fileLength;
private int chunkId;
private MongoClientURI inputURI;
private GridFS gridFS;
private GridFSDBFile file;
// Zero-args constructor required for mapred.InputSplit.
public GridFSSplit() {}
public GridFSSplit(
final MongoClientURI inputURI,
final ObjectId fileId,
final int chunkSize,
final long fileLength) {
this(inputURI, fileId, chunkSize, fileLength, 0);
}
public GridFSSplit(
final MongoClientURI inputURI,
final ObjectId fileId,
final int chunkSize,
final long fileLength,
final int chunkId) {
this.inputURI = inputURI;
this.fileId = fileId;
this.chunkSize = chunkSize;
this.fileLength = fileLength;
this.chunkId = chunkId;
}
private GridFS getGridFS() {
if (null == gridFS) {
DBCollection rootCollection =
MongoConfigUtil.getCollection(inputURI);
gridFS = new GridFS(
rootCollection.getDB(), rootCollection.getName());
}
return gridFS;
}
private GridFSDBFile getFile() throws IOException {
if (null == file) {
file = getGridFS().find(fileId);
if (null == file) {
throw new IOException("No file found for id " + fileId);
}
}
return file;
}
/**
* Get the value for a key in this file.
* @param key the key to look up
* @return the value for the key
*/
public Object get(final String key) throws IOException {
return getFile().get(key);
}
/**
* Get the metadata associated with this file.
* @return the metadata as a {@code DBObject}.
*/
public DBObject getMetadata() throws IOException {
return getFile().getMetaData();
}
/**
* Get the id of the chunk this split represents.
* @return the chunk id
*/
public int getChunkId() {
return chunkId;
}
/**
* Get the size of the chunk this split represents, in bytes.
* @return the size in bytes of the chunk
*/
public int getChunkSize() {
return chunkSize;
}
/**
* Read data from the GridFS chunk represented by this split.
* @return The decoded data from the chunk
* @throws IOException if there is an exception reading the data.
*/
public InputStream getData() throws IOException {
InputStream fileStream = getFile().getInputStream();
// Skip to chunk. GridFSInputStream will do what we want here.
// noinspection ResultOfMethodCallIgnored
fileStream.skip(chunkSize * chunkId);
return fileStream;
}
@Override
public long getLength() throws IOException {
return fileLength;
}
@Override
public String[] getLocations() throws IOException {
List hosts = inputURI.getHosts();
return hosts.toArray(new String[hosts.size()]);
}
// Implement mapred.InputSplit.
@Override
public void write(final DataOutput out) throws IOException {
out.writeUTF(inputURI.toString());
out.write(fileId.toByteArray());
out.writeInt(chunkId);
out.writeLong(fileLength);
out.writeInt(chunkSize);
}
@Override
public void readFields(final DataInput in) throws IOException {
inputURI = new MongoClientURI(in.readUTF());
byte[] oidBytes = new byte[12];
in.readFully(oidBytes);
fileId = new ObjectId(oidBytes);
chunkId = in.readInt();
fileLength = in.readLong();
chunkSize = in.readInt();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("GridFSSplit{");
if (inputURI != null) {
sb.append("inputURI hosts=")
.append(inputURI.getHosts())
.append(", ");
}
if (fileId != null) {
sb.append("fileId=").append(fileId).append(", ");
}
return sb
.append("chunkSize=").append(chunkSize)
.append(", fileLength=").append(fileLength)
.append(", chunkId=").append(chunkId).append("}").toString();
}
@Override
public int hashCode() {
int hash = (null == inputURI ? 0 : inputURI.hashCode());
hash = hash * 31 + (null == fileId ? 0 : fileId.hashCode());
return hash * 31 + chunkId;
}
@Override
public boolean equals(final Object obj) {
if (obj instanceof GridFSSplit) {
GridFSSplit other = (GridFSSplit) obj;
return (null == inputURI
? null == other.inputURI : inputURI .equals(other.inputURI))
&& fileId.equals(other.fileId)
&& chunkId == other.chunkId;
}
return false;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy