org.apache.hadoop.hive.ql.io.AcidOutputFormat Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.Reporter;
/**
* An extension for OutputFormats that want to implement ACID transactions.
* @param the row type of the file
*/
public interface AcidOutputFormat extends HiveOutputFormat {
/**
* Options to control how the files are written
*/
public static class Options implements Cloneable {
private final Configuration configuration;
private FileSystem fs;
private ObjectInspector inspector;
private boolean writingBase = false;
private boolean writingDeleteDelta = false;
private boolean isCompressed = false;
private Properties properties;
private Reporter reporter;
private long minimumWriteId;
private long maximumWriteId;
private int bucketId;
/**
* Based on {@link org.apache.hadoop.hive.ql.metadata.Hive#mvFile(HiveConf, FileSystem, Path, FileSystem, Path, boolean, boolean)}
* _copy_N starts with 1.
*/
private int copyNumber = 0;
private PrintStream dummyStream = null;
private boolean oldStyle = false;
private int recIdCol = -1; // Column the record identifier is in, -1 indicates no record id
//unique within a transaction
private int statementId = 0;
private Path finalDestination;
/**
* Create the options object.
* @param conf Use the given configuration
*/
public Options(Configuration conf) {
this.configuration = conf;
}
/**
* shallow clone
*/
@Override
public Options clone() {
try {
return (Options)super.clone();
}
catch(CloneNotSupportedException ex) {
throw new RuntimeException("clone() not properly implemented: " + ex.getMessage(), ex);
}
}
/**
* Use the given ObjectInspector for each record written.
* @param inspector the inspector to use.
* @return this
*/
public Options inspector(ObjectInspector inspector) {
this.inspector = inspector;
return this;
}
/**
* Is this writing a base directory? Should only be used by the compactor,
* or when implementing insert overwrite.
* @param val is this a base file?
* @return this
*/
public Options writingBase(boolean val) {
this.writingBase = val;
return this;
}
/**
* Is this writing a delete delta directory?
* @param val is this a delete delta file?
* @return this
*/
public Options writingDeleteDelta(boolean val) {
this.writingDeleteDelta = val;
return this;
}
/**
* Provide a file system to the writer. Otherwise, the filesystem for the
* path will be used.
* @param fs the file system that corresponds to the the path
* @return this
*/
public Options filesystem(FileSystem fs) {
this.fs = fs;
return this;
}
/**
* Should the output be compressed?
* @param isCompressed is the output compressed?
* @return this
*/
public Options isCompressed(boolean isCompressed) {
this.isCompressed = isCompressed;
return this;
}
/**
* Provide the table properties for the table.
* @param properties the table's properties
* @return this
*/
public Options tableProperties(Properties properties) {
this.properties = properties;
return this;
}
/**
* Provide the MapReduce reporter.
* @param reporter the reporter object
* @return this
*/
public Options reporter(Reporter reporter) {
this.reporter = reporter;
return this;
}
/**
* The minimum write id that is included in this file.
* @param min minimum write id
* @return this
*/
public Options minimumWriteId(long min) {
this.minimumWriteId = min;
return this;
}
/**
* The maximum write id that is included in this file.
* @param max maximum write id
* @return this
*/
public Options maximumWriteId(long max) {
this.maximumWriteId = max;
return this;
}
/**
* The bucketId that is included in this file.
* @param bucket the bucketId number
* @return this
*/
public Options bucket(int bucket) {
this.bucketId = bucket;
return this;
}
/**
* Multiple inserts into legacy (pre-acid) tables can generate multiple copies of each bucket
* file.
* @see org.apache.hadoop.hive.ql.exec.Utilities#COPY_KEYWORD
* @param copyNumber the number of the copy ( > 0)
* @return this
*/
public Options copyNumber(int copyNumber) {
this.copyNumber = copyNumber;
return this;
}
/**
* Whether it should use the old style (0000000_0) filenames.
* @param value should use the old style names
* @return this
*/
Options setOldStyle(boolean value) {
oldStyle = value;
return this;
}
/**
* Which column the row id field is in.
* @param recIdCol
* @return this
*/
public Options recordIdColumn(int recIdCol) {
this.recIdCol = recIdCol;
return this;
}
/**
* Temporary switch while we are in development that replaces the
* implementation with a dummy one that just prints to stream.
* @param stream the stream to print to
* @return this
*/
public Options useDummy(PrintStream stream) {
this.dummyStream = stream;
return this;
}
/**
* @since 1.3.0
* This can be set to -1 to make the system generate old style (delta_xxxx_yyyy) file names.
* This is primarily needed for testing to make sure 1.3 code can still read files created
* by older code. Also used by Comactor.
*/
public Options statementId(int id) {
if(id >= AcidUtils.MAX_STATEMENTS_PER_TXN) {
throw new RuntimeException("Too many statements for writeId: " + maximumWriteId);
}
if(id < -1) {
throw new IllegalArgumentException("Illegal statementId value: " + id);
}
this.statementId = id;
return this;
}
/**
* @param p where the data for this operation will eventually end up;
* basically table or partition directory in FS
*/
public Options finalDestination(Path p) {
this.finalDestination = p;
return this;
}
public Configuration getConfiguration() {
return configuration;
}
public FileSystem getFilesystem() {
return fs;
}
public ObjectInspector getInspector() {
return inspector;
}
public boolean isCompressed() {
return isCompressed;
}
public Properties getTableProperties() {
return properties;
}
public Reporter getReporter() {
return reporter;
}
public long getMinimumWriteId() {
return minimumWriteId;
}
public long getMaximumWriteId() {
return maximumWriteId;
}
public boolean isWritingBase() {
return writingBase;
}
public boolean isWritingDeleteDelta() {
return writingDeleteDelta;
}
public int getBucketId() {
return bucketId;
}
public int getRecordIdColumn() {
return recIdCol;
}
public PrintStream getDummyStream() {
return dummyStream;
}
boolean getOldStyle() {
return oldStyle;
}
public int getStatementId() {
return statementId;
}
public int getCopyNumber() {
return copyNumber;
}
public Path getFinalDestination() {
return finalDestination;
}
}
/**
* Create a RecordUpdater for inserting, updating, or deleting records.
* @param path the partition directory name
* @param options the options for the writer
* @return the RecordUpdater for the output file
*/
public RecordUpdater getRecordUpdater(Path path,
Options options) throws IOException;
/**
* Create a raw writer for ACID events.
* This is only intended for the compactor.
* @param path the root directory
* @param options options for writing the file
* @return a record writer
* @throws IOException
*/
public RecordWriter getRawRecordWriter(Path path,
Options options) throws IOException;
}