All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.table.timeline.HoodieArchivedTimeline Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.common.table.timeline;

import org.apache.hudi.avro.model.HoodieLSMTimelineInstant;
import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.io.storage.HoodieAvroFileReader;
import org.apache.hudi.io.storage.HoodieIOFactory;
import org.apache.hudi.storage.StoragePath;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.BiConsumer;
import java.util.function.Function;

import static org.apache.hudi.common.util.ConfigUtils.DEFAULT_HUDI_CONFIG_FOR_READER;

/**
 * Represents the Archived Timeline for the Hoodie table.
 *
 * 

Timeline Refresh

*

Instants are read from the archive file during initialization and never refreshed. To refresh, clients need to call * #reload(). * *

Serialization/De-serialization

*

This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized. */ public class HoodieArchivedTimeline extends HoodieDefaultTimeline { public static final String INSTANT_TIME_ARCHIVED_META_FIELD = "instantTime"; public static final String COMPLETION_TIME_ARCHIVED_META_FIELD = "completionTime"; private static final String ACTION_ARCHIVED_META_FIELD = "action"; private static final String METADATA_ARCHIVED_META_FIELD = "metadata"; private static final String PLAN_ARCHIVED_META_FIELD = "plan"; private HoodieTableMetaClient metaClient; private final Map readCommits = new ConcurrentHashMap<>(); private static final Logger LOG = LoggerFactory.getLogger(HoodieArchivedTimeline.class); /** * Used for loading the archived timeline incrementally, the earliest loaded instant time get memorized * each time the timeline is loaded. The instant time is then used as the end boundary * of the next loading. */ private String cursorInstant; /** * Loads all the archived instants. * Note that there is no lazy loading, so this may not work if the archived timeline range is really long. * TBD: Should we enforce maximum time range? */ public HoodieArchivedTimeline(HoodieTableMetaClient metaClient) { this.metaClient = metaClient; setInstants(this.loadInstants()); this.cursorInstant = firstInstant().map(HoodieInstant::getTimestamp).orElse(null); // multiple casts will make this lambda serializable - // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16 this.details = (Function> & Serializable) this::getInstantDetails; } /** * Loads completed instants from startTs(inclusive). * Note that there is no lazy loading, so this may not work if really early startTs is specified. */ public HoodieArchivedTimeline(HoodieTableMetaClient metaClient, String startTs) { this.metaClient = metaClient; setInstants(loadInstants(new StartTsFilter(startTs), LoadMode.METADATA)); this.cursorInstant = startTs; // multiple casts will make this lambda serializable - // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16 this.details = (Function> & Serializable) this::getInstantDetails; } /** * For serialization and de-serialization only. * * @deprecated */ public HoodieArchivedTimeline() { } /** * This method is only used when this object is deserialized in a spark executor. * * @deprecated */ private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException { in.defaultReadObject(); } public void loadInstantDetailsInMemory(String startTs, String endTs) { loadInstants(startTs, endTs); } public void loadCompletedInstantDetailsInMemory() { loadInstants(null, LoadMode.METADATA); } public void loadCompactionDetailsInMemory(String compactionInstantTime) { loadCompactionDetailsInMemory(compactionInstantTime, compactionInstantTime); } public void loadCompactionDetailsInMemory(String startTs, String endTs) { // load compactionPlan loadInstants(new TimeRangeFilter(startTs, endTs), LoadMode.PLAN, record -> record.get(ACTION_ARCHIVED_META_FIELD).toString().equals(HoodieTimeline.COMMIT_ACTION) && record.get(PLAN_ARCHIVED_META_FIELD) != null ); } public void clearInstantDetailsFromMemory(String instantTime) { this.readCommits.remove(instantTime); } public void clearInstantDetailsFromMemory(String startTs, String endTs) { this.findInstantsInRange(startTs, endTs).getInstants().forEach(instant -> this.readCommits.remove(instant.getTimestamp())); } @Override public Option getInstantDetails(HoodieInstant instant) { return Option.ofNullable(readCommits.get(instant.getTimestamp())); } public HoodieArchivedTimeline reload() { return new HoodieArchivedTimeline(metaClient); } /** * Reloads the archived timeline incrementally with given beginning timestamp {@code startTs}. * This method is not thread safe. * *

IMPORTANT: this is for multiple loading of one static snapshot of the timeline, if there is new instants got archived, * use {@link #reload()} instead. */ public HoodieArchivedTimeline reload(String startTs) { if (this.cursorInstant != null) { if (HoodieTimeline.compareTimestamps(startTs, LESSER_THAN, this.cursorInstant)) { appendInstants(loadInstants(new ClosedOpenTimeRangeFilter(startTs, this.cursorInstant), LoadMode.METADATA)); this.cursorInstant = startTs; } return this; } else { // a null cursor instant indicates an empty timeline return new HoodieArchivedTimeline(metaClient, startTs); } } private HoodieInstant readCommit(String instantTime, GenericRecord record, Option> instantDetailsConsumer) { final String action = record.get(ACTION_ARCHIVED_META_FIELD).toString(); final String completionTime = record.get(COMPLETION_TIME_ARCHIVED_META_FIELD).toString(); instantDetailsConsumer.ifPresent(consumer -> consumer.accept(instantTime, record)); return new HoodieInstant(HoodieInstant.State.COMPLETED, action, instantTime, completionTime); } @Nullable private BiConsumer getInstantDetailsFunc(LoadMode loadMode) { switch (loadMode) { case METADATA: return (instant, record) -> { ByteBuffer commitMeta = (ByteBuffer) record.get(METADATA_ARCHIVED_META_FIELD); if (commitMeta != null) { // in case the entry comes from an empty completed meta file this.readCommits.put(instant, commitMeta.array()); } }; case PLAN: return (instant, record) -> { ByteBuffer plan = (ByteBuffer) record.get(PLAN_ARCHIVED_META_FIELD); if (plan != null) { // in case the entry comes from an empty completed meta file this.readCommits.put(instant, plan.array()); } }; default: return null; } } private List loadInstants() { return loadInstants(null, LoadMode.ACTION); } private List loadInstants(String startTs, String endTs) { return loadInstants(new TimeRangeFilter(startTs, endTs), LoadMode.METADATA); } private List loadInstants(TimeRangeFilter filter, LoadMode loadMode) { return loadInstants(filter, loadMode, r -> true); } /** * This is method to read selected instants. Do NOT use this directly use one of the helper methods above * If loadInstantDetails is set to true, this would also update 'readCommits' map with commit details * If filter is specified, only the filtered instants are loaded * If commitsFilter is specified, only the filtered records are loaded. */ private List loadInstants( @Nullable TimeRangeFilter filter, LoadMode loadMode, Function commitsFilter) { Map instantsInRange = new ConcurrentHashMap<>(); Option> instantDetailsConsumer = Option.ofNullable(getInstantDetailsFunc(loadMode)); loadInstants(metaClient, filter, loadMode, commitsFilter, (instantTime, avroRecord) -> instantsInRange.putIfAbsent(instantTime, readCommit(instantTime, avroRecord, instantDetailsConsumer))); ArrayList result = new ArrayList<>(instantsInRange.values()); Collections.sort(result); return result; } /** * Loads the instants from the timeline. * * @param metaClient The meta client. * @param filter The time range filter where the target instant belongs to. * @param loadMode The load mode. * @param commitsFilter Filter of the instant type. * @param recordConsumer Consumer of the instant record payload. */ public static void loadInstants( HoodieTableMetaClient metaClient, @Nullable TimeRangeFilter filter, LoadMode loadMode, Function commitsFilter, BiConsumer recordConsumer) { try { // List all files List fileNames = LSMTimeline.latestSnapshotManifest(metaClient).getFileNames(); Schema readSchema = LSMTimeline.getReadSchema(loadMode); fileNames.stream() .filter(fileName -> filter == null || LSMTimeline.isFileInRange(filter, fileName)) .parallel().forEach(fileName -> { // Read the archived file try (HoodieAvroFileReader reader = (HoodieAvroFileReader) HoodieIOFactory.getIOFactory(metaClient.getStorage()) .getReaderFactory(HoodieRecordType.AVRO) .getFileReader(DEFAULT_HUDI_CONFIG_FOR_READER, new StoragePath(metaClient.getArchivePath(), fileName))) { try (ClosableIterator iterator = reader.getIndexedRecordIterator(HoodieLSMTimelineInstant.getClassSchema(), readSchema)) { while (iterator.hasNext()) { GenericRecord record = (GenericRecord) iterator.next(); String instantTime = record.get(INSTANT_TIME_ARCHIVED_META_FIELD).toString(); if ((filter == null || filter.isInRange(instantTime)) && commitsFilter.apply(record)) { recordConsumer.accept(instantTime, record); } } } } catch (IOException ioException) { throw new HoodieIOException("Error open file reader for path: " + new StoragePath(metaClient.getArchivePath(), fileName)); } }); } catch (IOException e) { throw new HoodieIOException( "Could not load archived commit timeline from path " + metaClient.getArchivePath(), e); } } @Override public HoodieDefaultTimeline getWriteTimeline() { // filter in-memory instants Set validActions = CollectionUtils.createSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION, LOG_COMPACTION_ACTION, REPLACE_COMMIT_ACTION); return new HoodieDefaultTimeline(getInstantsAsStream().filter(i -> readCommits.containsKey(i.getTimestamp())) .filter(s -> validActions.contains(s.getAction())), details); } // ------------------------------------------------------------------------- // Inner Class // ------------------------------------------------------------------------- /** * Different mode for loading the archived instant metadata. */ public enum LoadMode { /** * Loads the instantTime, completionTime. */ TIME, /** * Loads the instantTime, completionTime, action. */ ACTION, /** * Loads the instantTime, completionTime, action, metadata. */ METADATA, /** * Loads the instantTime, completionTime, action, plan. */ PLAN } /** * A time based filter with range (startTs, endTs]. */ public static class TimeRangeFilter { protected final String startTs; protected final String endTs; public TimeRangeFilter(String startTs, String endTs) { this.startTs = startTs; this.endTs = endTs; } public boolean isInRange(String instantTime) { return HoodieTimeline.isInRange(instantTime, this.startTs, this.endTs); } } /** * A time based filter with range [startTs, endTs). */ public static class ClosedOpenTimeRangeFilter extends TimeRangeFilter { public ClosedOpenTimeRangeFilter(String startTs, String endTs) { super(startTs, endTs); } public boolean isInRange(String instantTime) { return HoodieTimeline.isInClosedOpenRange(instantTime, this.startTs, this.endTs); } } /** * A time based filter with range [startTs, +∞). */ public static class StartTsFilter extends TimeRangeFilter { public StartTsFilter(String startTs) { super(startTs, null); // endTs is never used } public boolean isInRange(String instantTime) { return HoodieTimeline.compareTimestamps(instantTime, GREATER_THAN_OR_EQUALS, startTs); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy