gobblin.writer.PartitionAwareDataWriterBuilder Maven / Gradle / Ivy
Show all versions of gobblin-core Show documentation
/*
* Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package gobblin.writer;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import com.google.common.base.Optional;
import gobblin.writer.partitioner.WriterPartitioner;
/**
* A {@link DataWriterBuilder} used with a {@link WriterPartitioner}. When provided with a partitioner, Gobblin will create a
* {@link gobblin.writer.DataWriter} per partition. All partitions will be build with identical builders, except
* that {@link #forPartition} will specify the partition.
*
*
* The contract with the {@link PartitionAwareDataWriterBuilder} is as follows:
* * Gobblin will call {@link #validatePartitionSchema(Schema)} before calling build().
* * Gobblin is guaranteed to call {@link #validatePartitionSchema(Schema)} for some instance of
* {@link PartitionAwareDataWriterBuilder} with the same class, but not necessarily for the specific instance
* that will be used to build the {@link DataWriter}.
* * If !partition1.equals(partition2), then Gobblin may call build a writer for partition1 and a writer for
* partition2 in the same job. This should not cause an exception.
* * If partition1.equals(partition2), a single fork will not build writers for both partitions.
*
*
*
* The summary is:
* * Make sure {@link #validatePartitionSchema} returns false if the writer can't handle the schema.
* * {@link #validatePartitionSchema} should not have any side effects on the {@link PartitionAwareDataWriterBuilder}.
* * Different partitions should generate non-colliding writers.
*
*/
public abstract class PartitionAwareDataWriterBuilder extends DataWriterBuilder {
protected Optional partition = Optional.absent();
/**
* Sets the partition that the build {@link DataWriter} will handle.
* @param partition A {@link GenericRecord} specifying the partition.
* @return A {@link PartitionAwareDataWriterBuilder}.
*/
public PartitionAwareDataWriterBuilder forPartition(GenericRecord partition) {
this.partition = Optional.fromNullable(partition);
return this;
}
/**
* Checks whether the {@link PartitionAwareDataWriterBuilder} is compatible with a given partition {@link Schema}.
* If this method returns false, the execution will crash with an error. If this method returns true, the
* {@link DataWriterBuilder} is expected to be able to understand the partitioning schema and handle it correctly.
* @param partitionSchema {@link Schema} of {@link GenericRecord} objects that will be passed to {@link #forPartition}.
* @return true if the {@link DataWriterBuilder} can understand the schema and is able to generate partitions from
* this schema.
*/
public abstract boolean validatePartitionSchema(Schema partitionSchema);
}