All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.cli.commands.HoodieSyncValidateCommand Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.cli.commands;

import org.apache.hudi.cli.HoodieCLI;
import org.apache.hudi.cli.utils.HiveUtil;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.exception.HoodieException;

import org.springframework.shell.standard.ShellComponent;
import org.springframework.shell.standard.ShellMethod;
import org.springframework.shell.standard.ShellOption;

import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;

import static org.apache.hudi.cli.utils.CommitUtil.countNewRecords;
import static org.apache.hudi.common.table.timeline.InstantComparison.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.InstantComparison.compareTimestamps;

/**
 * CLI command to display sync options.
 */
@ShellComponent
public class HoodieSyncValidateCommand {

  @ShellMethod(key = "sync validate", value = "Validate the sync by counting the number of records")
  public String validateSync(
      @ShellOption(value = {"--mode"}, defaultValue = "complete", help = "Check mode") final String mode,
      @ShellOption(value = {"--sourceDb"}, defaultValue = "rawdata", help = "source database") final String srcDb,
      @ShellOption(value = {"--targetDb"}, defaultValue = "dwh_hoodie",
          help = "target database") final String tgtDb,
      @ShellOption(value = {"--partitionCount"}, defaultValue = "5",
          help = "total number of recent partitions to validate") final int partitionCount,
      @ShellOption(value = {"--hiveServerUrl"},
          help = "hiveServerURL to connect to") final String hiveServerUrl,
      @ShellOption(value = {"--hiveUser"}, defaultValue = "",
          help = "hive username to connect to") final String hiveUser,
      @ShellOption(value = {"--hivePass"}, defaultValue = "",
          help = "hive password to connect to") final String hivePass)
      throws Exception {
    if (HoodieCLI.syncTableMetadata == null) {
      throw new HoodieException("Sync validate request target table not null.");
    }
    HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
    HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline();
    HoodieTableMetaClient source = HoodieCLI.getTableMetaClient();
    HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline();
    long sourceCount = 0;
    long targetCount = 0;
    if ("complete".equals(mode)) {
      sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
      targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
    } else if ("latestPartitions".equals(mode)) {
      sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
      targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
    }

    String targetLatestCommit =
        targetTimeline.getInstants().iterator().hasNext() ? targetTimeline.lastInstant().get().requestedTime() : "0";
    String sourceLatestCommit =
        sourceTimeline.getInstants().iterator().hasNext() ? sourceTimeline.lastInstant().get().requestedTime() : "0";

    if (sourceLatestCommit != null
        && compareTimestamps(targetLatestCommit, GREATER_THAN, sourceLatestCommit)) {
      // source is behind the target
      return getString(target, targetTimeline, source, sourceCount, targetCount, sourceLatestCommit);
    } else {
      return getString(source, sourceTimeline, target, targetCount, sourceCount, targetLatestCommit);

    }
  }

  private String getString(HoodieTableMetaClient target, HoodieTimeline targetTimeline, HoodieTableMetaClient source, long sourceCount, long targetCount, String sourceLatestCommit)
      throws IOException {
    List commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
        .getInstants();
    if (commitsToCatchup.isEmpty()) {
      return "Count difference now is (count(" + target.getTableConfig().getTableName() + ") - count("
          + source.getTableConfig().getTableName() + ") == " + (targetCount - sourceCount);
    } else {
      long newInserts = countNewRecords(target,
          commitsToCatchup.stream().map(HoodieInstant::requestedTime).collect(Collectors.toList()));
      return "Count difference now is (count(" + target.getTableConfig().getTableName() + ") - count("
          + source.getTableConfig().getTableName() + ") == " + (targetCount - sourceCount) + ". Catch up count is "
          + newInserts;
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy