org.apache.hudi.client.utils.LegacyArchivedMetaEntryReader Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.client.utils;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
import org.apache.hudi.client.timeline.ActiveActionWithDetails;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.timeline.ActiveAction;
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.ClosableIterator;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.storage.StoragePathInfo;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.apache.hudi.common.util.StringUtils.getUTF8Bytes;
/**
* Tools used for migrating to new LSM tree style archived timeline.
*/
public class LegacyArchivedMetaEntryReader {
private static final Logger LOG = LoggerFactory.getLogger(LegacyArchivedMetaEntryReader.class);
private static final Pattern ARCHIVE_FILE_PATTERN =
Pattern.compile("^\\.commits_\\.archive\\.([0-9]+).*");
public static final String MERGE_ARCHIVE_PLAN_NAME = "mergeArchivePlan";
private static final String ACTION_TYPE_KEY = "actionType";
private static final String ACTION_STATE = "actionState";
private static final String STATE_TRANSITION_TIME = "stateTransitionTime";
private final HoodieTableMetaClient metaClient;
public LegacyArchivedMetaEntryReader(HoodieTableMetaClient metaClient) {
this.metaClient = metaClient;
}
public ClosableIterator getActiveActionsIterator() {
return loadInstants(null);
}
/**
* Reads the avro record for instant and details.
*/
private Pair> readInstant(GenericRecord record) {
final String instantTime = record.get(HoodiePartitionMetadata.COMMIT_TIME_KEY).toString();
final String action = record.get(ACTION_TYPE_KEY).toString();
final String stateTransitionTime = (String) record.get(STATE_TRANSITION_TIME);
final Option details = getMetadataKey(action).map(key -> {
Object actionData = record.get(key);
if (actionData != null) {
if (action.equals(HoodieTimeline.COMPACTION_ACTION)) {
return HoodieAvroUtils.indexedRecordToBytes((IndexedRecord) actionData);
} else {
return getUTF8Bytes(actionData.toString());
}
}
return null;
});
HoodieInstant instant = new HoodieInstant(HoodieInstant.State.valueOf(record.get(ACTION_STATE).toString()), action,
instantTime, stateTransitionTime);
return Pair.of(instant,details);
}
@Nonnull
private Option getMetadataKey(String action) {
switch (action) {
case HoodieTimeline.CLEAN_ACTION:
return Option.of("hoodieCleanMetadata");
case HoodieTimeline.COMMIT_ACTION:
case HoodieTimeline.DELTA_COMMIT_ACTION:
return Option.of("hoodieCommitMetadata");
case HoodieTimeline.ROLLBACK_ACTION:
return Option.of("hoodieRollbackMetadata");
case HoodieTimeline.SAVEPOINT_ACTION:
return Option.of("hoodieSavePointMetadata");
case HoodieTimeline.COMPACTION_ACTION:
case HoodieTimeline.LOG_COMPACTION_ACTION:
return Option.of("hoodieCompactionPlan");
case HoodieTimeline.REPLACE_COMMIT_ACTION:
return Option.of("hoodieReplaceCommitMetadata");
case HoodieTimeline.INDEXING_ACTION:
return Option.of("hoodieIndexCommitMetadata");
default:
LOG.error(String.format("Unknown action in metadata (%s)", action));
return Option.empty();
}
}
/**
* This is method to read selected instants. Do NOT use this directly use one of the helper methods above
* If loadInstantDetails is set to true, this would also update 'readCommits' map with commit details
* If filter is specified, only the filtered instants are loaded
* If commitsFilter is specified, only the filtered records are loaded.
*/
private ClosableIterator loadInstants(HoodieArchivedTimeline.TimeRangeFilter filter) {
try {
// List all files
List pathInfoList = metaClient.getStorage().globEntries(
new StoragePath(metaClient.getArchivePath(), ".commits_.archive*"));
// Sort files by version suffix in reverse (implies reverse chronological order)
pathInfoList.sort(new ArchiveLogVersionComparator());
ClosableIterator> itr = getRecordIterator(pathInfoList);
return new ClosableIterator() {
private ActiveAction activeAction;
private Pair> nextInstantAndDetail;
@Override
public void close() {
itr.close();
}
@Override
public boolean hasNext() {
List>> instantAndDetails = new ArrayList<>();
String lastInstantTime = null;
if (nextInstantAndDetail != null) {
instantAndDetails.add(nextInstantAndDetail);
lastInstantTime = nextInstantAndDetail.getKey().getTimestamp();
nextInstantAndDetail = null;
}
while (itr.hasNext()) {
HoodieRecord record = itr.next();
Pair> instantAndDetail = readInstant((GenericRecord) record.getData());
String instantTime = instantAndDetail.getKey().getTimestamp();
if (filter == null || filter.isInRange(instantTime)) {
if (lastInstantTime == null) {
instantAndDetails.add(instantAndDetail);
lastInstantTime = instantTime;
} else if (lastInstantTime.equals(instantTime)) {
instantAndDetails.add(instantAndDetail);
} else {
nextInstantAndDetail = instantAndDetail;
break;
}
}
}
if (!instantAndDetails.isEmpty()) {
this.activeAction = ActiveActionWithDetails.fromInstantAndDetails(instantAndDetails);
return true;
}
return false;
}
@Override
public ActiveAction next() {
return this.activeAction;
}
};
} catch (IOException e) {
throw new HoodieIOException(
"Could not load archived commit timeline from path " + metaClient.getArchivePath(), e);
}
}
/**
* Returns the avro record iterator with given file statuses.
*/
private ClosableIterator> getRecordIterator(
List pathInfoList) throws IOException {
return new ClosableIterator>() {
final Iterator pathInfoIterator = pathInfoList.iterator();
HoodieLogFormat.Reader reader;
ClosableIterator> recordItr;
@Override
public void close() {
if (this.reader != null) {
closeLogFormatReader(reader);
}
}
@Override
public boolean hasNext() {
if (recordItr != null && recordItr.hasNext()) {
return true;
}
// new reader if possible
if (reader != null) {
while (reader.hasNext()) {
HoodieLogBlock block = reader.next();
if (block instanceof HoodieAvroDataBlock) {
HoodieAvroDataBlock avroBlock = (HoodieAvroDataBlock) block;
recordItr = avroBlock.getRecordIterator(HoodieRecord.HoodieRecordType.AVRO);
if (recordItr.hasNext()) {
return true;
}
}
}
// no records in the reader, close the reader
closeLogFormatReader(reader);
reader = null;
}
// new reader
while (pathInfoIterator.hasNext()) {
StoragePathInfo pathInfo = pathInfoIterator.next();
try {
reader = HoodieLogFormat.newReader(
metaClient.getStorage(),
new HoodieLogFile(pathInfo.getPath()),
HoodieArchivedMetaEntry.getClassSchema());
} catch (IOException ioe) {
throw new HoodieIOException(
"Error initializing the reader for archived log: " + pathInfo.getPath(), ioe);
}
while (reader.hasNext()) {
HoodieLogBlock block = reader.next();
if (block instanceof HoodieAvroDataBlock) {
HoodieAvroDataBlock avroBlock = (HoodieAvroDataBlock) block;
recordItr = avroBlock.getRecordIterator(HoodieRecord.HoodieRecordType.AVRO);
if (recordItr.hasNext()) {
return true;
}
}
}
if (!reader.hasNext()) {
try {
reader.close();
} catch (IOException e) {
throw new HoodieIOException("Failed to close log reader " + pathInfo.getPath());
}
}
}
return false;
}
@Override
public HoodieRecord next() {
return this.recordItr.next();
}
};
}
private void closeLogFormatReader(HoodieLogFormat.Reader reader) {
try {
reader.close();
} catch (IOException ioe) {
throw new HoodieIOException("Error closing log format reader", ioe);
}
}
/**
* Sort files by reverse order of version suffix in file name.
*/
public static class ArchiveLogVersionComparator
implements Comparator, Serializable {
@Override
public int compare(StoragePathInfo f1, StoragePathInfo f2) {
return Integer.compare(getArchivedFileSuffix(f2), getArchivedFileSuffix(f1));
}
}
private static int getArchivedFileSuffix(StoragePathInfo f) {
try {
Matcher fileMatcher = ARCHIVE_FILE_PATTERN.matcher(f.getPath().getName());
if (fileMatcher.matches()) {
return Integer.parseInt(fileMatcher.group(1));
}
} catch (NumberFormatException e) {
// log and ignore any format warnings
LOG.warn("error getting suffix for archived file: " + f.getPath());
}
// return default value in case of any errors
return 0;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy