All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.client.utils.TransactionUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.client.utils;

import org.apache.hudi.client.transaction.ConcurrentOperation;
import org.apache.hudi.client.transaction.ConflictResolutionStrategy;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieWriteConflictException;
import org.apache.hudi.table.HoodieTable;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class TransactionUtils {

  private static final Logger LOG = LoggerFactory.getLogger(TransactionUtils.class);

  /**
   * Resolve any write conflicts when committing data.
   *
   * @param table
   * @param currentTxnOwnerInstant
   * @param thisCommitMetadata
   * @param config
   * @param lastCompletedTxnOwnerInstant
   * @param pendingInstants
   *
   * @return
   * @throws HoodieWriteConflictException
   */
  public static Option resolveWriteConflictIfAny(
      final HoodieTable table,
      final Option currentTxnOwnerInstant,
      final Option thisCommitMetadata,
      final HoodieWriteConfig config,
      Option lastCompletedTxnOwnerInstant,
      boolean reloadActiveTimeline,
      Set pendingInstants) throws HoodieWriteConflictException {
    WriteOperationType operationType = thisCommitMetadata.map(HoodieCommitMetadata::getOperationType).orElse(null);
    if (config.needResolveWriteConflict(operationType)) {
      // deal with pendingInstants
      Stream completedInstantsDuringCurrentWriteOperation = getCompletedInstantsDuringCurrentWriteOperation(table.getMetaClient(), pendingInstants);

      ConflictResolutionStrategy resolutionStrategy = config.getWriteConflictResolutionStrategy();
      if (reloadActiveTimeline) {
        table.getMetaClient().reloadActiveTimeline();
      }
      Stream instantStream = Stream.concat(resolutionStrategy.getCandidateInstants(
          table.getMetaClient(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant),
              completedInstantsDuringCurrentWriteOperation);

      final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElseGet(HoodieCommitMetadata::new));
      instantStream.forEach(instant -> {
        try {
          ConcurrentOperation otherOperation = new ConcurrentOperation(instant, table.getMetaClient());
          if (resolutionStrategy.hasConflict(thisOperation, otherOperation)) {
            LOG.info("Conflict encountered between current instant = " + thisOperation + " and instant = "
                + otherOperation + ", attempting to resolve it...");
            resolutionStrategy.resolveConflict(table, thisOperation, otherOperation);
          }
        } catch (IOException io) {
          throw new HoodieWriteConflictException("Unable to resolve conflict, if present", io);
        }
      });
      LOG.info("Successfully resolved conflicts, if any");

      return thisOperation.getCommitMetadataOption();
    }
    return thisCommitMetadata;
  }

  /**
   * Get the last completed transaction hoodie instant and {@link HoodieCommitMetadata#getExtraMetadata()}.
   *
   * @param metaClient
   * @return
   */
  public static Option>> getLastCompletedTxnInstantAndMetadata(
      HoodieTableMetaClient metaClient) {
    Option hoodieInstantOption = metaClient.getActiveTimeline().getCommitsTimeline()
        .filterCompletedInstants().lastInstant();
    return getHoodieInstantAndMetaDataPair(metaClient, hoodieInstantOption);
  }

  private static Option>> getHoodieInstantAndMetaDataPair(HoodieTableMetaClient metaClient, Option hoodieInstantOption) {
    try {
      if (hoodieInstantOption.isPresent()) {
        HoodieCommitMetadata commitMetadata = TimelineUtils.getCommitMetadata(hoodieInstantOption.get(), metaClient.getActiveTimeline());
        return Option.of(Pair.of(hoodieInstantOption.get(), commitMetadata.getExtraMetadata()));
      } else {
        return Option.empty();
      }
    } catch (IOException io) {
      throw new HoodieIOException("Unable to read metadata for instant " + hoodieInstantOption.get(), io);
    }
  }

  /**
   * Get InflightAndRequest instants.
   *
   * @param metaClient
   * @return
   */
  public static Set getInflightAndRequestedInstants(HoodieTableMetaClient metaClient) {
    // collect InflightAndRequest instants for deltaCommit/commit/compaction/clustering
    Set timelineActions = CollectionUtils
        .createImmutableSet(HoodieTimeline.REPLACE_COMMIT_ACTION, HoodieTimeline.COMPACTION_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.COMMIT_ACTION);
    return metaClient
        .getActiveTimeline()
        .getTimelineOfActions(timelineActions)
        .filterInflightsAndRequested()
        .getInstantsAsStream()
        .map(HoodieInstant::getTimestamp)
        .collect(Collectors.toSet());
  }

  public static Stream getCompletedInstantsDuringCurrentWriteOperation(HoodieTableMetaClient metaClient, Set pendingInstants) {
    // deal with pendingInstants
    // some pending instants maybe finished during current write operation,
    // we should check the conflict of those pending operation
    return metaClient
        .reloadActiveTimeline()
        .getCommitsTimeline()
        .filterCompletedInstants()
        .getInstantsAsStream()
        .filter(f -> pendingInstants.contains(f.getTimestamp()));
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy