All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gobblin.writer.SimpleDataWriter Maven / Gradle / Ivy

There is a newer version: 0.8.0
Show newest version
/*
 * Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package gobblin.writer;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;

import org.apache.commons.lang3.ArrayUtils;

import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Longs;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;


/**
 * An implementation of {@link DataWriter} that writes bytes directly to HDFS.
 *
 * This class accepts two new configuration parameters:
 * 
    *
  • {@link ConfigurationKeys#SIMPLE_WRITER_PREPEND_SIZE} is a boolean configuration option. If true, for each record, * it will write out a big endian long representing the record size and then write the record. i.e. the file format * will be the following: * r := >long<>record< * file := empty | r file *
  • {@link ConfigurationKeys#SIMPLE_WRITER_DELIMITER} accepts a byte value. If specified, this byte will be used * as a separator between records. If unspecified, no delimiter will be used between records. *
* @author [email protected] */ public class SimpleDataWriter extends FsDataWriter { private final Optional recordDelimiter; // optional byte to place between each record write private final boolean prependSize; private int recordsWritten; private int bytesWritten; private final OutputStream stagingFileOutputStream; public SimpleDataWriter(SimpleDataWriterBuilder builder, State properties) throws IOException { super(builder, properties); String delim; if ((delim = properties.getProp(ConfigurationKeys.SIMPLE_WRITER_DELIMITER, null)) == null || delim.length() == 0) { this.recordDelimiter = Optional.absent(); } else { this.recordDelimiter = Optional.of(delim.getBytes(ConfigurationKeys.DEFAULT_CHARSET_ENCODING)[0]); } this.prependSize = properties.getPropAsBoolean(ConfigurationKeys.SIMPLE_WRITER_PREPEND_SIZE, false); this.recordsWritten = 0; this.bytesWritten = 0; this.stagingFileOutputStream = createStagingFileOutputStream(); setStagingFileGroup(); } /** * Write a source record to the staging file * * @param record data record to write * @throws java.io.IOException if there is anything wrong writing the record */ @Override public void write(byte[] record) throws IOException { Preconditions.checkNotNull(record); byte[] toWrite = record; if (this.recordDelimiter.isPresent()) { toWrite = Arrays.copyOf(record, record.length + 1); toWrite[toWrite.length - 1] = this.recordDelimiter.get(); } if (this.prependSize) { long recordSize = toWrite.length; ByteBuffer buf = ByteBuffer.allocate(Longs.BYTES); buf.putLong(recordSize); toWrite = ArrayUtils.addAll(buf.array(), toWrite); } this.stagingFileOutputStream.write(toWrite); this.bytesWritten += toWrite.length; this.recordsWritten++; } /** * Get the number of records written. * * @return number of records written */ @Override public long recordsWritten() { return this.recordsWritten; } /** * Get the number of bytes written. * * @return number of bytes written */ @Override public long bytesWritten() throws IOException { return this.bytesWritten; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy