org.apache.hcatalog.mapreduce.FileOutputCommitterContainer Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hcatalog.mapreduce;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hive.hcatalog.mapreduce.HCatMapRedUtil;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobStatus.State;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hcatalog.common.ErrorType;
import org.apache.hcatalog.common.HCatConstants;
import org.apache.hcatalog.common.HCatException;
import org.apache.hcatalog.common.HCatUtil;
import org.apache.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hcatalog.data.schema.HCatSchema;
import org.apache.hcatalog.data.schema.HCatSchemaUtils;
import org.apache.hcatalog.har.HarOutputCommitterPostProcessor;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Part of the FileOutput*Container classes
* See {@link FileOutputFormatContainer} for more information
* @deprecated Use/modify {@link org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer} instead
*/
class FileOutputCommitterContainer extends OutputCommitterContainer {
private static final String TEMP_DIR_NAME = "_temporary";
private static final String LOGS_DIR_NAME = "_logs";
private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class);
private final boolean dynamicPartitioningUsed;
private boolean partitionsDiscovered;
private Map> partitionsDiscoveredByPath;
private Map contextDiscoveredByPath;
private final HCatStorageHandler cachedStorageHandler;
HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor();
private String ptnRootLocation = null;
private OutputJobInfo jobInfo = null;
/**
* @param context current JobContext
* @param baseCommitter OutputCommitter to contain
* @throws IOException
*/
public FileOutputCommitterContainer(JobContext context,
org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException {
super(context, baseCommitter);
jobInfo = HCatOutputFormat.getJobInfo(context);
dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed();
this.partitionsDiscovered = !dynamicPartitioningUsed;
cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo());
}
@Override
public void abortTask(TaskAttemptContext context) throws IOException {
if (!dynamicPartitioningUsed) {
getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context));
}
}
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
if (!dynamicPartitioningUsed) {
//See HCATALOG-499
FileOutputFormatContainer.setWorkOutputPath(context);
getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context));
}
}
@Override
public boolean needsTaskCommit(TaskAttemptContext context) throws IOException {
if (!dynamicPartitioningUsed) {
return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context));
} else {
// called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default
return false;
}
}
@Override
public void setupJob(JobContext context) throws IOException {
if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context));
}
// in dynamic usecase, called through FileRecordWriterContainer
}
@Override
public void setupTask(TaskAttemptContext context) throws IOException {
if (!dynamicPartitioningUsed) {
getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context));
}
}
@Override
public void abortJob(JobContext jobContext, State state) throws IOException {
try {
if (dynamicPartitioningUsed) {
discoverPartitions(jobContext);
}
org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil
.createJobContext(jobContext);
if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
getBaseOutputCommitter().abortJob(mapRedJobContext, state);
} else if (dynamicPartitioningUsed) {
for (JobContext currContext : contextDiscoveredByPath.values()) {
try {
new JobConf(currContext.getConfiguration())
.getOutputCommitter().abortJob(currContext,
state);
} catch (Exception e) {
throw new IOException(e);
}
}
}
Path src;
OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext);
if (dynamicPartitioningUsed) {
src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable()
.getPartitionKeysSize()));
} else {
src = new Path(jobInfo.getLocation());
}
FileSystem fs = src.getFileSystem(jobContext.getConfiguration());
LOG.info("Job failed. Cleaning up temporary directory [{}].", src);
fs.delete(src, true);
} finally {
cancelDelegationTokens(jobContext);
}
}
public static final String SUCCEEDED_FILE_NAME = "_SUCCESS";
static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER =
"mapreduce.fileoutputcommitter.marksuccessfuljobs";
private static boolean getOutputDirMarking(Configuration conf) {
return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER,
false);
}
@Override
public void commitJob(JobContext jobContext) throws IOException {
try {
if (dynamicPartitioningUsed) {
discoverPartitions(jobContext);
// Commit each partition so it gets moved out of the job work
// dir
for (JobContext context : contextDiscoveredByPath.values()) {
new JobConf(context.getConfiguration())
.getOutputCommitter().commitJob(context);
}
}
if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) {
getBaseOutputCommitter().commitJob(
HCatMapRedUtil.createJobContext(jobContext));
}
registerPartitions(jobContext);
// create _SUCCESS FILE if so requested.
OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext);
if (getOutputDirMarking(jobContext.getConfiguration())) {
Path outputPath = new Path(jobInfo.getLocation());
FileSystem fileSys = outputPath.getFileSystem(jobContext
.getConfiguration());
// create a file in the folder to mark it
if (fileSys.exists(outputPath)) {
Path filePath = new Path(outputPath,
SUCCEEDED_FILE_NAME);
if (!fileSys.exists(filePath)) { // may have been
// created by
// baseCommitter.commitJob()
fileSys.create(filePath).close();
}
}
}
} finally {
cancelDelegationTokens(jobContext);
}
}
@Override
public void cleanupJob(JobContext context) throws IOException {
throw new IOException("The method cleanupJob is deprecated and should not be called.");
}
private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) {
if (ptnRootLocation == null) {
// we only need to calculate it once, it'll be the same for other partitions in this job.
Path ptnRoot = new Path(ptnLocn);
for (int i = 0; i < numPtnKeys; i++) {
// LOG.info("Getting parent of "+ptnRoot.getName());
ptnRoot = ptnRoot.getParent();
}
ptnRootLocation = ptnRoot.toString();
}
// LOG.info("Returning final parent : "+ptnRootLocation);
return ptnRootLocation;
}
/**
* Generate partition metadata object to be used to add to metadata.
* @param context The job context.
* @param jobInfo The OutputJobInfo.
* @param partLocnRoot The table-equivalent location root of the partition
* (temporary dir if dynamic partition, table dir if static)
* @param partKVs The keyvalue pairs that form the partition
* @param outputSchema The output schema for the partition
* @param params The parameters to store inside the partition
* @param table The Table metadata object under which this Partition will reside
* @param fs FileSystem object to operate on the underlying filesystem
* @param grpName Group name that owns the table dir
* @param perms FsPermission that's the default permission of the table dir.
* @return Constructed Partition metadata object
* @throws java.io.IOException
*/
private Partition constructPartition(
JobContext context, OutputJobInfo jobInfo,
String partLocnRoot, Map partKVs,
HCatSchema outputSchema, Map params,
Table table, FileSystem fs,
String grpName, FsPermission perms) throws IOException {
Partition partition = new Partition();
partition.setDbName(table.getDbName());
partition.setTableName(table.getTableName());
partition.setSd(new StorageDescriptor(table.getTTable().getSd()));
List fields = new ArrayList();
for (HCatFieldSchema fieldSchema : outputSchema.getFields()) {
fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema));
}
partition.getSd().setCols(fields);
partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));
partition.setParameters(params);
// Sets permissions and group name on partition dirs and files.
Path partPath;
if (Boolean.valueOf((String)table.getProperty("EXTERNAL"))
&& jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) {
// honor external table that specifies the location
partPath = new Path(jobInfo.getLocation());
} else {
partPath = new Path(partLocnRoot);
int i = 0;
for (FieldSchema partKey : table.getPartitionKeys()) {
if (i++ != 0) {
applyGroupAndPerms(fs, partPath, perms, grpName, false);
}
partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
}
}
// Apply the group and permissions to the leaf partition and files.
// Need not bother in case of HDFS as permission is taken care of by setting UMask
if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) {
applyGroupAndPerms(fs, partPath, perms, grpName, true);
}
// Set the location in the StorageDescriptor
if (dynamicPartitioningUsed) {
String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs);
if (harProcessor.isEnabled()) {
harProcessor.exec(context, partition, partPath);
partition.getSd().setLocation(
harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination)));
} else {
partition.getSd().setLocation(dynamicPartitionDestination);
}
} else {
partition.getSd().setLocation(partPath.toString());
}
return partition;
}
private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission,
String group, boolean recursive)
throws IOException {
fs.setPermission(dir, permission);
if (recursive) {
for (FileStatus fileStatus : fs.listStatus(dir)) {
if (fileStatus.isDir()) {
applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true);
} else {
fs.setPermission(fileStatus.getPath(), permission);
}
}
}
}
private String getFinalDynamicPartitionDestination(Table table, Map partKVs) {
// file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA ->
// file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA
Path partPath = new Path(table.getTTable().getSd().getLocation());
for (FieldSchema partKey : table.getPartitionKeys()) {
partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
}
return partPath.toString();
}
private Map getStorerParameterMap(StorerInfo storer) {
Map params = new HashMap();
//Copy table level hcat.* keys to the partition
for (Entry
© 2015 - 2025 Weber Informatics LLC | Privacy Policy