parquet.hadoop.api.WriteSupport Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of parquet-hadoop Show documentation
There is a newer version: 1.6.0
/**
 * Copyright 2012 Twitter, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package parquet.hadoop.api;

import java.util.Collections;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;

import parquet.io.api.RecordConsumer;
import parquet.schema.MessageType;


/**
 * Abstraction to use with {@link parquet.hadoop.ParquetOutputFormat} to convert incoming records
 *
 * @author Julien Le Dem
 *
 * @param  the type of the incoming records
 */
abstract public class WriteSupport {

  /**
   * information to be persisted in the file
   *
   * @author Julien Le Dem
   *
   */
  public static final class WriteContext {
    private final MessageType schema;
    private final Map extraMetaData;

    public WriteContext(MessageType schema, Map extraMetaData) {
      super();
      if (schema == null) {
        throw new NullPointerException("schema");
      }
      if (extraMetaData == null) {
        throw new NullPointerException("extraMetaData");
      }
      this.schema = schema;
      this.extraMetaData = Collections.unmodifiableMap(extraMetaData);
    }
    /**
     * @return the schema of the file
     */
    public MessageType getSchema() {
      return schema;
    }
    /**
     * @return application specific metadata
     */
    public Map getExtraMetaData() {
      return extraMetaData;
    }

  }

  /**
   * called first in the task
   * @param configuration the job's configuration
   * @return the information needed to write the file
   */
  public abstract WriteContext init(Configuration configuration);

  /**
   * This will be called once per row group
   * @param recordConsumer the recordConsumer to write to
   */
  public abstract void prepareForWrite(RecordConsumer recordConsumer);

  /**
   * called once per record
   * @param record one record to write to the previously provided record consumer
   */
  public abstract void write(T record);

}