![JAR search and dependency download from the Maven repository](/logo.png)
com.martinkl.warc.WARCWritable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of warc-hadoop Show documentation
Show all versions of warc-hadoop Show documentation
Java library for working with WARC (Web Archive) files in Hadoop MapReduce
The newest version!
package com.martinkl.warc;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/**
* A mutable wrapper around a {@link WARCRecord} implementing the Hadoop Writable interface.
* This allows WARC records to be used throughout Hadoop (e.g. written to sequence files
* when shuffling data between mappers and reducers). The record is encoded as a single
* record in standard WARC/1.0 format.
*/
public class WARCWritable implements Writable {
private WARCRecord record;
/** Creates an empty writable (with a null record). */
public WARCWritable() {
this.record = null;
}
/** Creates a writable wrapper around a given WARCRecord. */
public WARCWritable(WARCRecord record) {
this.record = record;
}
/** Returns the record currently wrapped by this writable. */
public WARCRecord getRecord() {
return record;
}
/** Updates the record held within this writable wrapper. */
public void setRecord(WARCRecord record) {
this.record = record;
}
/** Appends the current record to a {@link DataOutput} stream. */
@Override
public void write(DataOutput out) throws IOException {
if (record != null) record.write(out);
}
/**
* Parses a {@link WARCRecord} out of a {@link DataInput} stream, and makes it the
* writable's current record.
*/
@Override
public void readFields(DataInput in) throws IOException {
record = new WARCRecord(in);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy