org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.table.view;
import org.apache.hudi.common.model.CompactionOperation;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.dto.BaseFileDTO;
import org.apache.hudi.common.table.timeline.dto.ClusteringOpDTO;
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
import org.apache.hudi.common.table.timeline.dto.DTOUtils;
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
import org.apache.hudi.common.table.timeline.dto.InstantDTO;
import org.apache.hudi.common.table.timeline.dto.TimelineDTO;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.RetryHelper;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieRemoteException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.http.Consts;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.client.utils.URIBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* A proxy for table file-system view which translates local View API calls to REST calls to remote timeline service.
*/
public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView, Serializable {
private static final String BASE_URL = "/v1/hoodie/view";
public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/partition/latest/");
public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL, "slices/file/latest/");
public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL =
String.format("%s/%s", BASE_URL, "slices/uncompacted/partition/latest/");
public static final String ALL_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/all");
public static final String LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/merged/beforeoron/latest/");
public static final String LATEST_SLICES_RANGE_INSTANT_URL = String.format("%s/%s", BASE_URL, "slices/range/latest/");
public static final String LATEST_SLICES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/beforeoron/latest/");
public static final String ALL_LATEST_SLICES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/all/beforeoron/latest/");
public static final String PENDING_COMPACTION_OPS = String.format("%s/%s", BASE_URL, "compactions/pending/");
public static final String PENDING_LOG_COMPACTION_OPS = String.format("%s/%s", BASE_URL, "logcompactions/pending/");
public static final String LATEST_PARTITION_DATA_FILES_URL =
String.format("%s/%s", BASE_URL, "datafiles/latest/partition");
public static final String LATEST_PARTITION_DATA_FILE_URL =
String.format("%s/%s", BASE_URL, "datafile/latest/partition");
public static final String ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all");
public static final String LATEST_ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all/latest/");
public static final String LATEST_DATA_FILE_ON_INSTANT_URL = String.format("%s/%s", BASE_URL, "datafile/on/latest/");
public static final String LATEST_DATA_FILES_RANGE_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafiles/range/latest/");
public static final String LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafiles/beforeoron/latest/");
public static final String ALL_LATEST_BASE_FILES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "basefiles/all/beforeoron/");
public static final String ALL_FILEGROUPS_FOR_PARTITION_URL =
String.format("%s/%s", BASE_URL, "filegroups/all/partition/");
public static final String ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON =
String.format("%s/%s", BASE_URL, "filegroups/replaced/beforeoron/");
public static final String ALL_REPLACED_FILEGROUPS_BEFORE =
String.format("%s/%s", BASE_URL, "filegroups/replaced/before/");
public static final String ALL_REPLACED_FILEGROUPS_AFTER_OR_ON =
String.format("%s/%s", BASE_URL, "filegroups/replaced/afteroron/");
public static final String ALL_REPLACED_FILEGROUPS_PARTITION =
String.format("%s/%s", BASE_URL, "filegroups/replaced/partition/");
public static final String PENDING_CLUSTERING_FILEGROUPS = String.format("%s/%s", BASE_URL, "clustering/pending/");
public static final String LAST_INSTANT = String.format("%s/%s", BASE_URL, "timeline/instant/last");
public static final String LAST_INSTANTS = String.format("%s/%s", BASE_URL, "timeline/instants/last");
public static final String TIMELINE = String.format("%s/%s", BASE_URL, "timeline/instants/all");
// POST Requests
public static final String REFRESH_TABLE = String.format("%s/%s", BASE_URL, "refresh/");
public static final String LOAD_ALL_PARTITIONS_URL = String.format("%s/%s", BASE_URL, "loadallpartitions/");
public static final String PARTITION_PARAM = "partition";
public static final String BASEPATH_PARAM = "basepath";
public static final String INSTANT_PARAM = "instant";
public static final String MAX_INSTANT_PARAM = "maxinstant";
public static final String MIN_INSTANT_PARAM = "mininstant";
public static final String INSTANTS_PARAM = "instants";
public static final String FILEID_PARAM = "fileid";
public static final String LAST_INSTANT_TS = "lastinstantts";
public static final String TIMELINE_HASH = "timelinehash";
public static final String REFRESH_OFF = "refreshoff";
public static final String INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM = "includependingcompaction";
private static final Logger LOG = LoggerFactory.getLogger(RemoteHoodieTableFileSystemView.class);
private final String serverHost;
private final int serverPort;
private final String basePath;
private final HoodieTableMetaClient metaClient;
private HoodieTimeline timeline;
private final ObjectMapper mapper;
private final int timeoutMs;
private boolean closed = false;
private RetryHelper retryHelper;
private enum RequestMethod {
GET, POST
}
public RemoteHoodieTableFileSystemView(String server, int port, HoodieTableMetaClient metaClient) {
this(metaClient, FileSystemViewStorageConfig.newBuilder().withRemoteServerHost(server).withRemoteServerPort(port).build());
}
public RemoteHoodieTableFileSystemView(HoodieTableMetaClient metaClient, FileSystemViewStorageConfig viewConf) {
this.basePath = metaClient.getBasePath();
this.mapper = new ObjectMapper();
this.metaClient = metaClient;
this.timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
this.serverHost = viewConf.getRemoteViewServerHost();
this.serverPort = viewConf.getRemoteViewServerPort();
this.timeoutMs = viewConf.getRemoteTimelineClientTimeoutSecs() * 1000;
if (viewConf.isRemoteTimelineClientRetryEnabled()) {
retryHelper = new RetryHelper(
viewConf.getRemoteTimelineClientMaxRetryIntervalMs(),
viewConf.getRemoteTimelineClientMaxRetryNumbers(),
viewConf.getRemoteTimelineInitialRetryIntervalMs(),
viewConf.getRemoteTimelineClientRetryExceptions(),
"Sending request");
}
}
private T executeRequest(String requestPath, Map queryParameters, TypeReference reference,
RequestMethod method) throws IOException {
ValidationUtils.checkArgument(!closed, "View already closed");
URIBuilder builder =
new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath).setScheme("http");
queryParameters.forEach(builder::addParameter);
// Adding mandatory parameters - Last instants affecting file-slice
timeline.lastInstant().ifPresent(instant -> builder.addParameter(LAST_INSTANT_TS, instant.getTimestamp()));
builder.addParameter(TIMELINE_HASH, timeline.getTimelineHash());
String url = builder.toString();
LOG.info("Sending request : (" + url + ")");
Response response = retryHelper != null ? retryHelper.start(() -> get(timeoutMs, url, method)) : get(timeoutMs, url, method);
String content = response.returnContent().asString(Consts.UTF_8);
return (T) mapper.readValue(content, reference);
}
private Map getParamsWithPartitionPath(String partitionPath) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
return paramsMap;
}
private Map getParams() {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
return paramsMap;
}
private Map getParams(String paramName, String instant) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(paramName, instant);
return paramsMap;
}
private Map getParamsWithAdditionalParam(String partitionPath, String paramName, String paramVal) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
paramsMap.put(paramName, paramVal);
return paramsMap;
}
private Map getParamsWithAdditionalParams(String partitionPath, String[] paramNames,
String[] paramVals) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
ValidationUtils.checkArgument(paramNames.length == paramVals.length);
for (int i = 0; i < paramNames.length; i++) {
paramsMap.put(paramNames[i], paramVals[i]);
}
return paramsMap;
}
@Override
public Stream getLatestBaseFiles(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
return getLatestBaseFilesFromParams(paramsMap, LATEST_PARTITION_DATA_FILES_URL);
}
@Override
public Stream getLatestBaseFiles() {
Map paramsMap = getParams();
return getLatestBaseFilesFromParams(paramsMap, LATEST_ALL_DATA_FILES);
}
private Stream getLatestBaseFilesFromParams(Map paramsMap, String requestPath) {
try {
List dataFiles = executeRequest(requestPath, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestBaseFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
return getLatestBaseFilesFromParams(paramsMap, LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL);
}
@Override
public Map> getAllLatestBaseFilesBeforeOrOn(String maxCommitTime) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(MAX_INSTANT_PARAM, maxCommitTime);
try {
Map> dataFileMap = executeRequest(
ALL_LATEST_BASE_FILES_BEFORE_ON_INSTANT_URL,
paramsMap,
new TypeReference
© 2015 - 2025 Weber Informatics LLC | Privacy Policy