gobblin.writer.SimpleDataWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gobblin-core Show documentation
Show all versions of gobblin-core Show documentation
Gobblin Ingestion Framework
/*
* Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package gobblin.writer;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;
import org.apache.commons.lang3.ArrayUtils;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Longs;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
/**
* An implementation of {@link DataWriter} that writes bytes directly to HDFS.
*
* This class accepts two new configuration parameters:
*
* - {@link ConfigurationKeys#SIMPLE_WRITER_PREPEND_SIZE} is a boolean configuration option. If true, for each record,
* it will write out a big endian long representing the record size and then write the record. i.e. the file format
* will be the following:
* r := >long<>record<
* file := empty | r file
*
- {@link ConfigurationKeys#SIMPLE_WRITER_DELIMITER} accepts a byte value. If specified, this byte will be used
* as a separator between records. If unspecified, no delimiter will be used between records.
*
* @author [email protected]
*/
public class SimpleDataWriter extends FsDataWriter {
private final Optional recordDelimiter; // optional byte to place between each record write
private final boolean prependSize;
private int recordsWritten;
private int bytesWritten;
private final OutputStream stagingFileOutputStream;
public SimpleDataWriter(SimpleDataWriterBuilder builder, State properties) throws IOException {
super(builder, properties);
String delim;
if ((delim = properties.getProp(ConfigurationKeys.SIMPLE_WRITER_DELIMITER, null)) == null || delim.length() == 0) {
this.recordDelimiter = Optional.absent();
} else {
this.recordDelimiter = Optional.of(delim.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING)[0]);
}
this.prependSize = properties.getPropAsBoolean(ConfigurationKeys.SIMPLE_WRITER_PREPEND_SIZE, false);
this.recordsWritten = 0;
this.bytesWritten = 0;
this.stagingFileOutputStream = createStagingFileOutputStream();
setStagingFileGroup();
}
/**
* Write a source record to the staging file
*
* @param record data record to write
* @throws java.io.IOException if there is anything wrong writing the record
*/
@Override
public void write(byte[] record) throws IOException {
Preconditions.checkNotNull(record);
byte[] toWrite = record;
if (this.recordDelimiter.isPresent()) {
toWrite = Arrays.copyOf(record, record.length + 1);
toWrite[toWrite.length - 1] = this.recordDelimiter.get();
}
if (this.prependSize) {
long recordSize = toWrite.length;
ByteBuffer buf = ByteBuffer.allocate(Longs.BYTES);
buf.putLong(recordSize);
toWrite = ArrayUtils.addAll(buf.array(), toWrite);
}
this.stagingFileOutputStream.write(toWrite);
this.bytesWritten += toWrite.length;
this.recordsWritten++;
}
/**
* Get the number of records written.
*
* @return number of records written
*/
@Override
public long recordsWritten() {
return this.recordsWritten;
}
/**
* Get the number of bytes written.
*
* @return number of bytes written
*/
@Override
public long bytesWritten() throws IOException {
return this.bytesWritten;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy