com.uber.hoodie.io.HoodieCreateHandle Maven / Gradle / Ivy
/*
* Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.io;
import com.uber.hoodie.WriteStatus;
import com.uber.hoodie.common.model.HoodiePartitionMetadata;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.model.HoodieWriteStat;
import com.uber.hoodie.common.model.HoodieWriteStat.RuntimeStats;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.ReflectionUtils;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.exception.HoodieInsertException;
import com.uber.hoodie.io.storage.HoodieStorageWriter;
import com.uber.hoodie.io.storage.HoodieStorageWriterFactory;
import com.uber.hoodie.table.HoodieTable;
import java.io.IOException;
import java.util.Optional;
import java.util.UUID;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.TaskContext;
public class HoodieCreateHandle extends HoodieIOHandle {
private static Logger logger = LogManager.getLogger(HoodieCreateHandle.class);
private final WriteStatus status;
private final HoodieStorageWriter storageWriter;
private final Path path;
private Path tempPath = null;
private long recordsWritten = 0;
private long recordsDeleted = 0;
public HoodieCreateHandle(HoodieWriteConfig config, String commitTime, HoodieTable hoodieTable,
String partitionPath) {
super(config, commitTime, hoodieTable);
this.status = ReflectionUtils.loadClass(config.getWriteStatusClassName());
status.setFileId(UUID.randomUUID().toString());
status.setPartitionPath(partitionPath);
final int sparkPartitionId = TaskContext.getPartitionId();
this.path = makeNewPath(partitionPath, sparkPartitionId, status.getFileId());
if (config.shouldUseTempFolderForCopyOnWriteForCreate()) {
this.tempPath = makeTempPath(partitionPath, sparkPartitionId, status.getFileId(),
TaskContext.get().stageId(), TaskContext.get().taskAttemptId());
}
try {
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs, commitTime,
new Path(config.getBasePath()), new Path(config.getBasePath(), partitionPath));
partitionMetadata.trySave(TaskContext.getPartitionId());
this.storageWriter = HoodieStorageWriterFactory
.getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, schema);
} catch (IOException e) {
throw new HoodieInsertException(
"Failed to initialize HoodieStorageWriter for path " + getStorageWriterPath(), e);
}
logger.info("New InsertHandle for partition :" + partitionPath);
}
/**
* Determines whether we can accept the incoming records, into the current file, depending on
*
* - Whether it belongs to the same partitionPath as existing records - Whether the current file
* written bytes lt max file size
*/
public boolean canWrite(HoodieRecord record) {
return storageWriter.canWrite() && record.getPartitionPath().equals(status.getPartitionPath());
}
/**
* Perform the actual writing of the given record into the backing file.
*/
public void write(HoodieRecord record, Optional avroRecord) {
Optional recordMetadata = record.getData().getMetadata();
try {
if (avroRecord.isPresent()) {
storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
// update the new location of record, so we know where to find it next
record.setNewLocation(new HoodieRecordLocation(commitTime, status.getFileId()));
recordsWritten++;
} else {
recordsDeleted++;
}
status.markSuccess(record, recordMetadata);
// deflate record payload after recording success. This will help users access payload as a
// part of marking
// record successful.
record.deflate();
} catch (Throwable t) {
// Not throwing exception from here, since we don't want to fail the entire job
// for a single record
status.markFailure(record, t, recordMetadata);
logger.error("Error writing record " + record, t);
}
}
/**
* Performs actions to durably, persist the current changes and returns a WriteStatus object
*/
public WriteStatus close() {
logger.info("Closing the file " + status.getFileId() + " as we are done with all the records "
+ recordsWritten);
try {
storageWriter.close();
HoodieWriteStat stat = new HoodieWriteStat();
stat.setNumWrites(recordsWritten);
stat.setNumDeletes(recordsDeleted);
stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
stat.setFileId(status.getFileId());
stat.setPaths(new Path(config.getBasePath()), path, tempPath);
stat.setTotalWriteBytes(FSUtils.getFileSize(fs, getStorageWriterPath()));
stat.setTotalWriteErrors(status.getFailedRecords().size());
RuntimeStats runtimeStats = new RuntimeStats();
runtimeStats.setTotalCreateTime(timer.endTimer());
stat.setRuntimeStats(runtimeStats);
status.setStat(stat);
return status;
} catch (IOException e) {
throw new HoodieInsertException("Failed to close the Insert Handle for path " + path, e);
}
}
private Path getStorageWriterPath() {
// Use tempPath for storage writer if possible
return (this.tempPath == null) ? this.path : this.tempPath;
}
}