All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.table.timeline.CompletionTimeQueryView Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.common.table.timeline;

import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.InstantRange;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.VisibleForTesting;

import org.apache.avro.generic.GenericRecord;

import java.io.Serializable;
import java.time.Instant;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.function.Function;
import java.util.stream.Collectors;

import static org.apache.hudi.common.table.read.IncrementalQueryAnalyzer.START_COMMIT_EARLIEST;
import static org.apache.hudi.common.table.timeline.HoodieArchivedTimeline.COMPLETION_TIME_ARCHIVED_META_FIELD;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN;
import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;

/**
 * Query view for instant completion time.
 */
public class CompletionTimeQueryView implements AutoCloseable, Serializable {
  private static final long serialVersionUID = 1L;

  private static final long MILLI_SECONDS_IN_THREE_DAYS = 3 * 24 * 3600 * 1000;

  private static final long MILLI_SECONDS_IN_ONE_DAY = 24 * 3600 * 1000;

  private final HoodieTableMetaClient metaClient;

  /**
   * Mapping from instant start time -> completion time.
   * Should be thread-safe data structure.
   */
  private final ConcurrentMap beginToCompletionInstantTimeMap;

  /**
   * The cursor instant time to eagerly load from, by default load last N days of completed instants.
   * It can grow dynamically with lazy loading. e.g. assuming an initial cursor instant as t10,
   * a completion query for t5 would trigger lazy loading with this cursor instant updated to t5.
   * This sliding window model amortizes redundant loading from different queries.
   */
  private volatile String cursorInstant;

  /**
   * The first write instant on the active timeline, used for query optimization.
   */
  private final String firstNonSavepointCommit;

  /**
   * The constructor.
   *
   * @param metaClient   The table meta client.
   */
  public CompletionTimeQueryView(HoodieTableMetaClient metaClient) {
    this(metaClient, HoodieInstantTimeGenerator.formatDate(new Date(Instant.now().minusMillis(MILLI_SECONDS_IN_THREE_DAYS).toEpochMilli())));
  }

  /**
   * The constructor.
   *
   * @param metaClient   The table meta client.
   * @param eagerLoadInstant The earliest instant time to eagerly load from, by default load last N days of completed instants.
   */
  public CompletionTimeQueryView(HoodieTableMetaClient metaClient, String eagerLoadInstant) {
    this.metaClient = metaClient;
    this.beginToCompletionInstantTimeMap = new ConcurrentHashMap<>();
    this.cursorInstant = HoodieTimeline.minInstant(eagerLoadInstant, metaClient.getActiveTimeline().firstInstant().map(HoodieInstant::getTimestamp).orElse(""));
    // Note: use getWriteTimeline() to keep sync with the fs view visibleCommitsAndCompactionTimeline, see AbstractTableFileSystemView.refreshTimeline.
    this.firstNonSavepointCommit = metaClient.getActiveTimeline().getWriteTimeline().getFirstNonSavepointCommit().map(HoodieInstant::getTimestamp).orElse("");
    load();
  }

  /**
   * Returns whether the instant is completed.
   */
  public boolean isCompleted(String beginInstantTime) {
    // archival does not proceed beyond the first savepoint, so any instant before that is completed.
    return this.beginToCompletionInstantTimeMap.containsKey(beginInstantTime) || isArchived(beginInstantTime);
  }

  /**
   * Returns whether the instant is archived.
   */
  public boolean isArchived(String instantTime) {
    return HoodieTimeline.compareTimestamps(instantTime, LESSER_THAN, this.firstNonSavepointCommit);
  }

  /**
   * Returns whether the give instant time {@code instantTime} completed before the base instant {@code baseInstant}.
   */
  public boolean isCompletedBefore(String baseInstant, String instantTime) {
    Option completionTimeOpt = getCompletionTime(baseInstant, instantTime);
    if (completionTimeOpt.isPresent()) {
      return HoodieTimeline.compareTimestamps(completionTimeOpt.get(), LESSER_THAN, baseInstant);
    }
    return false;
  }

  /**
   * Returns whether the given instant time {@code instantTime} is sliced after or on the base instant {@code baseInstant}.
   */
  public boolean isSlicedAfterOrOn(String baseInstant, String instantTime) {
    Option completionTimeOpt = getCompletionTime(baseInstant, instantTime);
    if (completionTimeOpt.isPresent()) {
      return HoodieTimeline.compareTimestamps(completionTimeOpt.get(), GREATER_THAN_OR_EQUALS, baseInstant);
    }
    return true;
  }

  /**
   * Get completion time with a base instant time as a reference to fix the compatibility.
   *
   * @param baseInstant The base instant
   * @param instantTime The instant time to query the completion time with
   *
   * @return Probability fixed completion time.
   */
  public Option getCompletionTime(String baseInstant, String instantTime) {
    Option completionTimeOpt = getCompletionTime(instantTime);
    if (completionTimeOpt.isPresent()) {
      String completionTime = completionTimeOpt.get();
      if (completionTime.length() != baseInstant.length()) {
        // ==============================================================
        // LEGACY CODE
        // ==============================================================
        // Fixes the completion time to reflect the completion sequence correctly
        // if the file slice base instant time is not in datetime format.
        // For example, many test cases just use integer string as the instant time.
        // CAUTION: this fix only works for OCC(Optimistic Concurrency Control).
        // for NB-CC(Non-blocking Concurrency Control), the file slicing may be incorrect.
        return Option.of(instantTime);
      }
    }
    return completionTimeOpt;
  }

  /**
   * Queries the instant completion time with given start time.
   *
   * @param beginTime The start time.
   *
   * @return The completion time if the instant finished or empty if it is still pending.
   */
  public Option getCompletionTime(String beginTime) {
    String completionTime = this.beginToCompletionInstantTimeMap.get(beginTime);
    if (completionTime != null) {
      return Option.of(completionTime);
    }
    if (HoodieTimeline.compareTimestamps(beginTime, GREATER_THAN_OR_EQUALS, this.cursorInstant)) {
      // the instant is still pending
      return Option.empty();
    }
    loadCompletionTimeIncrementally(beginTime);
    return Option.ofNullable(this.beginToCompletionInstantTimeMap.get(beginTime));
  }

  /**
   * Queries the instant start time with given completion time range.
   *
   * 

By default, assumes there is at most 1 day time of duration for an instant to accelerate the queries. * * @param timeline The timeline. * @param rangeStart The query range start completion time. * @param rangeEnd The query range end completion time. * @param rangeType The range type. * * @return The sorted instant time list. */ public List getStartTimes( HoodieTimeline timeline, Option rangeStart, Option rangeEnd, InstantRange.RangeType rangeType) { // assumes any instant/transaction lasts at most 1 day to optimize the query efficiency. return getStartTimes(timeline, rangeStart, rangeEnd, rangeType, s -> HoodieInstantTimeGenerator.instantTimeMinusMillis(s, MILLI_SECONDS_IN_ONE_DAY)); } /** * Queries the instant start time with given completion time range. * * @param rangeStart The query range start completion time. * @param rangeEnd The query range end completion time. * @param earliestInstantTimeFunc The function to generate the earliest start time boundary * with the minimum completion time. * * @return The sorted instant time list. */ @VisibleForTesting public List getStartTimes( String rangeStart, String rangeEnd, Function earliestInstantTimeFunc) { return getStartTimes(metaClient.getCommitsTimeline().filterCompletedInstants(), Option.ofNullable(rangeStart), Option.ofNullable(rangeEnd), InstantRange.RangeType.CLOSED_CLOSED, earliestInstantTimeFunc); } /** * Queries the instant start time with given completion time range. * * @param timeline The timeline. * @param rangeStart The query range start completion time. * @param rangeEnd The query range end completion time. * @param rangeType The range type. * @param earliestInstantTimeFunc The function to generate the earliest start time boundary * with the minimum completion time. * * @return The sorted instant time list. */ public List getStartTimes( HoodieTimeline timeline, Option rangeStart, Option rangeEnd, InstantRange.RangeType rangeType, Function earliestInstantTimeFunc) { final boolean startFromEarliest = START_COMMIT_EARLIEST.equalsIgnoreCase(rangeStart.orElse(null)); String earliestInstantToLoad = null; if (rangeStart.isPresent() && !startFromEarliest) { earliestInstantToLoad = earliestInstantTimeFunc.apply(rangeStart.get()); } else if (rangeEnd.isPresent()) { earliestInstantToLoad = earliestInstantTimeFunc.apply(rangeEnd.get()); } // ensure the earliest instant boundary be loaded. if (earliestInstantToLoad != null && HoodieTimeline.compareTimestamps(this.cursorInstant, GREATER_THAN, earliestInstantToLoad)) { loadCompletionTimeIncrementally(earliestInstantToLoad); } if (rangeStart.isEmpty() && rangeEnd.isPresent()) { // returns the last instant that finished at or before the given completion time 'endTime'. String maxInstantTime = timeline.getInstantsAsStream() .filter(instant -> instant.isCompleted() && HoodieTimeline.compareTimestamps(instant.getCompletionTime(), LESSER_THAN_OR_EQUALS, rangeEnd.get())) .max(Comparator.comparing(HoodieInstant::getCompletionTime)).map(HoodieInstant::getTimestamp).orElse(null); if (maxInstantTime != null) { return Collections.singletonList(maxInstantTime); } // fallback to archived timeline return this.beginToCompletionInstantTimeMap.entrySet().stream() .filter(entry -> HoodieTimeline.compareTimestamps(entry.getValue(), LESSER_THAN_OR_EQUALS, rangeEnd.get())) .map(Map.Entry::getKey).collect(Collectors.toList()); } if (startFromEarliest) { // expedience for snapshot read: ['earliest', _) to avoid loading unnecessary instants. rangeStart = Option.empty(); } if (rangeStart.isEmpty() && rangeEnd.isEmpty()) { // (_, _): read the latest snapshot. return timeline.filterCompletedInstants().lastInstant().map(instant -> Collections.singletonList(instant.getTimestamp())).orElse(Collections.emptyList()); } final InstantRange instantRange = InstantRange.builder() .rangeType(rangeType) .startInstant(rangeStart.orElse(null)) .endInstant(rangeEnd.orElse(null)) .nullableBoundary(true) .build(); return this.beginToCompletionInstantTimeMap.entrySet().stream() .filter(entry -> instantRange.isInRange(entry.getValue())) .map(Map.Entry::getKey).sorted().collect(Collectors.toList()); } // ------------------------------------------------------------------------- // Utilities // ------------------------------------------------------------------------- private void loadCompletionTimeIncrementally(String startTime) { // the 'startTime' should be out of the eager loading range, switch to a lazy loading. // This operation is resource costly. synchronized (this) { if (HoodieTimeline.compareTimestamps(startTime, LESSER_THAN, this.cursorInstant)) { HoodieArchivedTimeline.loadInstants(metaClient, new HoodieArchivedTimeline.ClosedOpenTimeRangeFilter(startTime, this.cursorInstant), HoodieArchivedTimeline.LoadMode.TIME, r -> true, this::readCompletionTime); } // refresh the start instant this.cursorInstant = startTime; } } /** * This is method to read instant completion time. * This would also update 'startToCompletionInstantTimeMap' map with start time/completion time pairs. * Only instants starts from 'startInstant' (inclusive) are considered. */ private void load() { // load active instants first. this.metaClient.getActiveTimeline() .filterCompletedInstants().getInstantsAsStream() .forEach(instant -> setCompletionTime(instant.getTimestamp(), instant.getCompletionTime())); // then load the archived instants. HoodieArchivedTimeline.loadInstants(metaClient, new HoodieArchivedTimeline.StartTsFilter(this.cursorInstant), HoodieArchivedTimeline.LoadMode.TIME, r -> true, this::readCompletionTime); } private void readCompletionTime(String instantTime, GenericRecord record) { final String completionTime = record.get(COMPLETION_TIME_ARCHIVED_META_FIELD).toString(); setCompletionTime(instantTime, completionTime); } private void setCompletionTime(String beginInstantTime, String completionTime) { if (completionTime == null) { // the meta-server instant does not have completion time completionTime = beginInstantTime; } this.beginToCompletionInstantTimeMap.putIfAbsent(beginInstantTime, completionTime); } public String getCursorInstant() { return cursorInstant; } public boolean isEmptyTable() { return this.beginToCompletionInstantTimeMap.isEmpty(); } @Override public void close() { this.beginToCompletionInstantTimeMap.clear(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy