All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.io.FlinkMergeAndReplaceHandleWithChangeLog Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.io;

import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieOperation;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.table.cdc.HoodieCDCUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.table.HoodieTable;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

/**
 * A flink merge and replace handle that supports logging change logs.
 *
 * 

The cdc about logic is copied from {@link HoodieMergeHandleWithChangeLog}, * we should refactor it out when there are good abstractions. */ public class FlinkMergeAndReplaceHandleWithChangeLog extends FlinkMergeAndReplaceHandle { private static final Logger LOG = LoggerFactory.getLogger(FlinkMergeAndReplaceHandleWithChangeLog.class); private final HoodieCDCLogger cdcLogger; public FlinkMergeAndReplaceHandleWithChangeLog(HoodieWriteConfig config, String instantTime, HoodieTable hoodieTable, Iterator> recordItr, String partitionPath, String fileId, TaskContextSupplier taskContextSupplier, StoragePath basePath) { super(config, instantTime, hoodieTable, recordItr, partitionPath, fileId, taskContextSupplier, basePath); this.cdcLogger = new HoodieCDCLogger( instantTime, config, hoodieTable.getMetaClient().getTableConfig(), partitionPath, getStorage(), getWriterSchema(), createLogWriter(instantTime, HoodieCDCUtils.CDC_LOGFILE_SUFFIX), IOUtils.getMaxMemoryPerPartitionMerge(taskContextSupplier, config)); } protected boolean writeUpdateRecord(HoodieRecord newRecord, HoodieRecord oldRecord, Option combineRecordOpt, Schema writerSchema) throws IOException { // TODO [HUDI-5019] Remove these unnecessary newInstance invocations Option savedCombineRecordOp = combineRecordOpt.map(HoodieRecord::newInstance); final boolean result = super.writeUpdateRecord(newRecord, oldRecord, combineRecordOpt, writerSchema); if (result) { boolean isDelete = HoodieOperation.isDelete(newRecord.getOperation()); Option avroRecordOpt = savedCombineRecordOp.flatMap(r -> toAvroRecord(r, writerSchema, config.getPayloadConfig().getProps())); cdcLogger.put(newRecord, (GenericRecord) oldRecord.getData(), isDelete ? Option.empty() : avroRecordOpt); } return result; } protected void writeInsertRecord(HoodieRecord newRecord) throws IOException { Schema schema = preserveMetadata ? writeSchemaWithMetaFields : writeSchema; // TODO Remove these unnecessary newInstance invocations HoodieRecord savedRecord = newRecord.newInstance(); super.writeInsertRecord(newRecord); if (!HoodieOperation.isDelete(newRecord.getOperation())) { cdcLogger.put(newRecord, null, savedRecord.toIndexedRecord(schema, config.getPayloadConfig().getProps()).map(HoodieAvroIndexedRecord::getData)); newRecord.deflate(); } } @Override public List close() { List writeStatuses = super.close(); cdcLogger.close(); HoodieWriteStat stat = writeStatuses.get(0).getStat(); stat.setCdcStats(cdcLogger.getCDCWriteStats()); return writeStatuses; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy