Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hudi.common.table.view.RemoteHoodieTableFileSystemView Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.table.view;
import org.apache.hudi.common.model.CompactionOperation;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.dto.BaseFileDTO;
import org.apache.hudi.common.table.timeline.dto.ClusteringOpDTO;
import org.apache.hudi.common.table.timeline.dto.CompactionOpDTO;
import org.apache.hudi.common.table.timeline.dto.FileGroupDTO;
import org.apache.hudi.common.table.timeline.dto.FileSliceDTO;
import org.apache.hudi.common.table.timeline.dto.InstantDTO;
import org.apache.hudi.common.table.timeline.dto.TimelineDTO;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.RetryHelper;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieRemoteException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.http.Consts;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.client.utils.URIBuilder;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
/**
* A proxy for table file-system view which translates local View API calls to REST calls to remote timeline service.
*/
public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView, Serializable {
private static final String BASE_URL = "/v1/hoodie/view";
public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/partition/latest/");
public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL, "slices/file/latest/");
public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL =
String.format("%s/%s", BASE_URL, "slices/uncompacted/partition/latest/");
public static final String ALL_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/all");
public static final String LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/merged/beforeoron/latest/");
public static final String LATEST_SLICES_RANGE_INSTANT_URL = String.format("%s/%s", BASE_URL, "slices/range/latest/");
public static final String LATEST_SLICES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/beforeoron/latest/");
public static final String PENDING_COMPACTION_OPS = String.format("%s/%s", BASE_URL, "compactions/pending/");
public static final String LATEST_PARTITION_DATA_FILES_URL =
String.format("%s/%s", BASE_URL, "datafiles/latest/partition");
public static final String LATEST_PARTITION_DATA_FILE_URL =
String.format("%s/%s", BASE_URL, "datafile/latest/partition");
public static final String ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all");
public static final String LATEST_ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all/latest/");
public static final String LATEST_DATA_FILE_ON_INSTANT_URL = String.format("%s/%s", BASE_URL, "datafile/on/latest/");
public static final String LATEST_DATA_FILES_RANGE_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafiles/range/latest/");
public static final String LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafiles/beforeoron/latest/");
public static final String ALL_FILEGROUPS_FOR_PARTITION_URL =
String.format("%s/%s", BASE_URL, "filegroups/all/partition/");
public static final String ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON =
String.format("%s/%s", BASE_URL, "filegroups/replaced/beforeoron/");
public static final String ALL_REPLACED_FILEGROUPS_BEFORE =
String.format("%s/%s", BASE_URL, "filegroups/replaced/before/");
public static final String ALL_REPLACED_FILEGROUPS_PARTITION =
String.format("%s/%s", BASE_URL, "filegroups/replaced/partition/");
public static final String PENDING_CLUSTERING_FILEGROUPS = String.format("%s/%s", BASE_URL, "clustering/pending/");
public static final String LAST_INSTANT = String.format("%s/%s", BASE_URL, "timeline/instant/last");
public static final String LAST_INSTANTS = String.format("%s/%s", BASE_URL, "timeline/instants/last");
public static final String TIMELINE = String.format("%s/%s", BASE_URL, "timeline/instants/all");
// POST Requests
public static final String REFRESH_TABLE = String.format("%s/%s", BASE_URL, "refresh/");
public static final String PARTITION_PARAM = "partition";
public static final String BASEPATH_PARAM = "basepath";
public static final String INSTANT_PARAM = "instant";
public static final String MAX_INSTANT_PARAM = "maxinstant";
public static final String INSTANTS_PARAM = "instants";
public static final String FILEID_PARAM = "fileid";
public static final String LAST_INSTANT_TS = "lastinstantts";
public static final String TIMELINE_HASH = "timelinehash";
public static final String REFRESH_OFF = "refreshoff";
public static final String INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM = "includependingcompaction";
private static final Logger LOG = LogManager.getLogger(RemoteHoodieTableFileSystemView.class);
private final String serverHost;
private final int serverPort;
private final String basePath;
private final HoodieTableMetaClient metaClient;
private HoodieTimeline timeline;
private final ObjectMapper mapper;
private final int timeoutMs;
private boolean closed = false;
private RetryHelper retryHelper;
private enum RequestMethod {
GET, POST
}
public RemoteHoodieTableFileSystemView(String server, int port, HoodieTableMetaClient metaClient) {
this(metaClient, FileSystemViewStorageConfig.newBuilder().withRemoteServerHost(server).withRemoteServerPort(port).build());
}
public RemoteHoodieTableFileSystemView(HoodieTableMetaClient metaClient, FileSystemViewStorageConfig viewConf) {
this.basePath = metaClient.getBasePath();
this.mapper = new ObjectMapper();
this.metaClient = metaClient;
this.timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
this.serverHost = viewConf.getRemoteViewServerHost();
this.serverPort = viewConf.getRemoteViewServerPort();
this.timeoutMs = viewConf.getRemoteTimelineClientTimeoutSecs() * 1000;
if (viewConf.isRemoteTimelineClientRetryEnabled()) {
retryHelper = new RetryHelper(
viewConf.getRemoteTimelineClientMaxRetryIntervalMs(),
viewConf.getRemoteTimelineClientMaxRetryNumbers(),
viewConf.getRemoteTimelineInitialRetryIntervalMs(),
viewConf.getRemoteTimelineClientRetryExceptions(),
"Sending request");
}
}
private T executeRequest(String requestPath, Map queryParameters, TypeReference reference,
RequestMethod method) throws IOException {
ValidationUtils.checkArgument(!closed, "View already closed");
URIBuilder builder =
new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath).setScheme("http");
queryParameters.forEach(builder::addParameter);
// Adding mandatory parameters - Last instants affecting file-slice
timeline.lastInstant().ifPresent(instant -> builder.addParameter(LAST_INSTANT_TS, instant.getTimestamp()));
builder.addParameter(TIMELINE_HASH, timeline.getTimelineHash());
String url = builder.toString();
LOG.info("Sending request : (" + url + ")");
Response response = retryHelper != null ? retryHelper.start(() -> get(timeoutMs, url, method)) : get(timeoutMs, url, method);
String content = response.returnContent().asString(Consts.UTF_8);
return (T) mapper.readValue(content, reference);
}
private Map getParamsWithPartitionPath(String partitionPath) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
return paramsMap;
}
private Map getParams() {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
return paramsMap;
}
private Map getParams(String paramName, String instant) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(paramName, instant);
return paramsMap;
}
private Map getParamsWithAdditionalParam(String partitionPath, String paramName, String paramVal) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
paramsMap.put(paramName, paramVal);
return paramsMap;
}
private Map getParamsWithAdditionalParams(String partitionPath, String[] paramNames,
String[] paramVals) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
ValidationUtils.checkArgument(paramNames.length == paramVals.length);
for (int i = 0; i < paramNames.length; i++) {
paramsMap.put(paramNames[i], paramVals[i]);
}
return paramsMap;
}
@Override
public Stream getLatestBaseFiles(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
return getLatestBaseFilesFromParams(paramsMap, LATEST_PARTITION_DATA_FILES_URL);
}
@Override
public Stream getLatestBaseFiles() {
Map paramsMap = getParams();
return getLatestBaseFilesFromParams(paramsMap, LATEST_ALL_DATA_FILES);
}
private Stream getLatestBaseFilesFromParams(Map paramsMap, String requestPath) {
try {
List dataFiles = executeRequest(requestPath, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestBaseFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
return getLatestBaseFilesFromParams(paramsMap, LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL);
}
@Override
public Option getBaseFileOn(String partitionPath, String instantTime, String fileId) {
Map paramsMap = getParamsWithAdditionalParams(partitionPath,
new String[] {INSTANT_PARAM, FILEID_PARAM}, new String[] {instantTime, fileId});
try {
List dataFiles = executeRequest(LATEST_DATA_FILE_ON_INSTANT_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestBaseFilesInRange(List commitsToReturn) {
Map paramsMap =
getParams(INSTANTS_PARAM, StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
return getLatestBaseFilesFromParams(paramsMap, LATEST_DATA_FILES_RANGE_INSTANT_URL);
}
@Override
public Stream getAllBaseFiles(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
return getLatestBaseFilesFromParams(paramsMap, ALL_DATA_FILES);
}
@Override
public Stream getLatestFileSlices(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles = executeRequest(LATEST_PARTITION_SLICES_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Option getLatestFileSlice(String partitionPath, String fileId) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
try {
List dataFiles = executeRequest(LATEST_PARTITION_SLICE_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(FileSliceDTO::toFileSlice).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestUnCompactedFileSlices(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles = executeRequest(LATEST_PARTITION_UNCOMPACTED_SLICES_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime,
boolean includeFileSlicesInPendingCompaction) {
Map paramsMap = getParamsWithAdditionalParams(partitionPath,
new String[] {MAX_INSTANT_PARAM, INCLUDE_FILES_IN_PENDING_COMPACTION_PARAM},
new String[] {maxCommitTime, String.valueOf(includeFileSlicesInPendingCompaction)});
try {
List dataFiles = executeRequest(LATEST_SLICES_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestMergedFileSlicesBeforeOrOn(String partitionPath, String maxInstantTime) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxInstantTime);
try {
List dataFiles = executeRequest(LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestFileSliceInRange(List commitsToReturn) {
Map paramsMap =
getParams(INSTANTS_PARAM, StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
try {
List dataFiles = executeRequest(LATEST_SLICES_RANGE_INSTANT_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getAllFileSlices(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles =
executeRequest(ALL_SLICES_URL, paramsMap, new TypeReference>() {}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getAllFileGroups(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List fileGroups = executeRequest(ALL_FILEGROUPS_FOR_PARTITION_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getReplacedFileGroupsBeforeOrOn(String maxCommitTime, String partitionPath) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
try {
List fileGroups = executeRequest(ALL_REPLACED_FILEGROUPS_BEFORE_OR_ON, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getReplacedFileGroupsBefore(String maxCommitTime, String partitionPath) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
try {
List fileGroups = executeRequest(ALL_REPLACED_FILEGROUPS_BEFORE, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getAllReplacedFileGroups(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List fileGroups = executeRequest(ALL_REPLACED_FILEGROUPS_PARTITION, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
public boolean refresh() {
Map paramsMap = getParams();
try {
// refresh the local timeline first.
this.timeline = metaClient.reloadActiveTimeline().filterCompletedAndCompactionInstants();
return executeRequest(REFRESH_TABLE, paramsMap, new TypeReference() {}, RequestMethod.POST);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream> getPendingCompactionOperations() {
Map paramsMap = getParams();
try {
List dtos = executeRequest(PENDING_COMPACTION_OPS, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dtos.stream().map(CompactionOpDTO::toCompactionOperation);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream> getFileGroupsInPendingClustering() {
Map paramsMap = getParams();
try {
List dtos = executeRequest(PENDING_CLUSTERING_FILEGROUPS, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return dtos.stream().map(ClusteringOpDTO::toClusteringOperation);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public void close() {
closed = true;
}
@Override
public void reset() {
refresh();
}
@Override
public Option getLastInstant() {
Map paramsMap = getParams();
try {
List instants =
executeRequest(LAST_INSTANT, paramsMap, new TypeReference>() {}, RequestMethod.GET);
return Option.fromJavaOptional(instants.stream().map(InstantDTO::toInstant).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public HoodieTimeline getTimeline() {
Map paramsMap = getParams();
try {
TimelineDTO timeline =
executeRequest(TIMELINE, paramsMap, new TypeReference() {}, RequestMethod.GET);
return TimelineDTO.toTimeline(timeline, metaClient);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public void sync() {
refresh();
}
@Override
public Option getLatestBaseFile(String partitionPath, String fileId) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
try {
List dataFiles = executeRequest(LATEST_PARTITION_DATA_FILE_URL, paramsMap,
new TypeReference>() {}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(BaseFileDTO::toHoodieBaseFile).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
private Response get(int timeoutMs, String url, RequestMethod method) throws IOException {
switch (method) {
case GET:
return Request.Get(url).connectTimeout(timeoutMs).socketTimeout(timeoutMs).execute();
case POST:
default:
return Request.Post(url).connectTimeout(timeoutMs).socketTimeout(timeoutMs).execute();
}
}
}