All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.io.HoodieAppendHandle Maven / Gradle / Ivy

There is a newer version: 0.4.7
Show newest version
/*
 * Copyright (c) 2016 Uber Technologies, Inc. ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *          http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.io;

import com.clearspring.analytics.util.Lists;
import com.uber.hoodie.WriteStatus;
import com.uber.hoodie.common.model.HoodieDeltaWriteStat;
import com.uber.hoodie.common.model.HoodieLogFile;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.common.model.HoodieRecordLocation;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.common.table.log.HoodieLogFormat;
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.common.util.HoodieAvroUtils;
import com.uber.hoodie.common.util.ReflectionUtils;
import com.uber.hoodie.config.HoodieWriteConfig;
import com.uber.hoodie.exception.HoodieAppendException;
import com.uber.hoodie.exception.HoodieUpsertException;
import com.uber.hoodie.table.HoodieTable;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.TaskContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;

/**
 * IO Operation to append data onto an existing file.
 *
 * @param 
 */
public class HoodieAppendHandle extends HoodieIOHandle {
    private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
    private static AtomicLong recordIndex = new AtomicLong(1);

    private final WriteStatus writeStatus;
    private final String fileId;
    private String partitionPath;
    private List> records;
    private long recordsWritten = 0;
    private long recordsDeleted = 0;
    private HoodieLogFile currentLogFile;
    private Writer writer;

    public HoodieAppendHandle(HoodieWriteConfig config,
                              String commitTime,
                              HoodieTable hoodieTable,
                              String fileId,
                              Iterator> recordItr) {
        super(config, commitTime, hoodieTable);
        WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
        writeStatus.setStat(new HoodieDeltaWriteStat());
        this.writeStatus = writeStatus;
        this.fileId = fileId;
        init(recordItr);
    }

    private void init(Iterator> recordItr) {
        List> records = Lists.newArrayList();
        recordItr.forEachRemaining(record -> {
            records.add(record);
            // extract some information from the first record
            if (partitionPath == null) {
                partitionPath = record.getPartitionPath();
                // HACK(vc) This also assumes a base file. It will break, if appending without one.
                String latestValidFilePath =
                    fileSystemView.getLatestDataFiles(record.getPartitionPath())
                            .filter(dataFile -> dataFile.getFileId().equals(fileId))
                            .findFirst().get().getFileName();
                String baseCommitTime = FSUtils.getCommitTime(latestValidFilePath);
                writeStatus.getStat().setPrevCommit(baseCommitTime);
                writeStatus.setFileId(fileId);
                writeStatus.setPartitionPath(record.getPartitionPath());
                writeStatus.getStat().setFileId(fileId);

                try {
                    this.writer = HoodieLogFormat.newWriterBuilder()
                        .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
                        .withFileId(fileId).overBaseCommit(baseCommitTime)
                        .withFs(fs).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
                    this.currentLogFile = writer.getLogFile();
                    ((HoodieDeltaWriteStat) writeStatus.getStat())
                        .setLogVersion(currentLogFile.getLogVersion());
                    ((HoodieDeltaWriteStat) writeStatus.getStat())
                        .setLogOffset(writer.getCurrentSize());
                } catch (Exception e) {
                    logger.error("Error in update task at commit " + commitTime, e);
                    writeStatus.setGlobalError(e);
                    throw new HoodieUpsertException(
                        "Failed to initialize HoodieUpdateHandle for FileId: " + fileId
                            + " on commit " + commitTime + " on HDFS path " + hoodieTable
                            .getMetaClient().getBasePath() + partitionPath, e);
                }
                Path path = new Path(record.getPartitionPath(),
                        FSUtils.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId));
                writeStatus.getStat().setPath(path.toString());
            }
            // update the new location of the record, so we know where to find it next
            record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
        });
        this.records = records;
    }

    private Optional getIndexedRecord(HoodieRecord hoodieRecord) {
        Optional recordMetadata = hoodieRecord.getData().getMetadata();
        try {
            Optional avroRecord = hoodieRecord.getData().getInsertValue(schema);

            if(avroRecord.isPresent()) {
                String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
                        recordIndex.getAndIncrement());
                HoodieAvroUtils
                        .addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
                                hoodieRecord.getPartitionPath(), fileId);
                HoodieAvroUtils
                        .addCommitMetadataToRecord((GenericRecord) avroRecord.get(), commitTime, seqId);
                recordsWritten++;
            } else {
                recordsDeleted++;
            }

            hoodieRecord.deflate();
            writeStatus.markSuccess(hoodieRecord, recordMetadata);
            return avroRecord;
        } catch (Exception e) {
            logger.error("Error writing record  " + hoodieRecord, e);
            writeStatus.markFailure(hoodieRecord, e, recordMetadata);
        }
        return Optional.empty();
    }

    public void doAppend() {

        List recordList = new ArrayList<>();
        List keysToDelete = new ArrayList<>();
        records.stream().forEach(record -> {
            Optional indexedRecord = getIndexedRecord(record);
            if(indexedRecord.isPresent()) {
                recordList.add(indexedRecord.get());
            } else {
                keysToDelete.add(record.getRecordKey());
            }
        });
        try {
            writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, schema));
            if(keysToDelete.size() > 0) {
                writer = writer.appendBlock(new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new)));
            }
        } catch (Exception e) {
            throw new HoodieAppendException(
                "Failed while appeding records to " + currentLogFile.getPath(), e);
        }
    }

    public void close() {
        try {
            if (writer != null) {
                writer.close();
            }
            writeStatus.getStat().setNumWrites(recordsWritten);
            writeStatus.getStat().setNumDeletes(recordsDeleted);
            writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
        } catch (IOException e) {
            throw new HoodieUpsertException("Failed to close UpdateHandle", e);
        }
    }

    public WriteStatus getWriteStatus() {
        return writeStatus;
    }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy