com.uber.hoodie.common.table.view.RemoteHoodieTableFileSystemView Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2019 Uber Technologies, Inc. ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.table.view;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.uber.hoodie.common.model.CompactionOperation;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieFileGroup;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.SyncableFileSystemView;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.table.timeline.dto.CompactionOpDTO;
import com.uber.hoodie.common.table.timeline.dto.DataFileDTO;
import com.uber.hoodie.common.table.timeline.dto.FileGroupDTO;
import com.uber.hoodie.common.table.timeline.dto.FileSliceDTO;
import com.uber.hoodie.common.table.timeline.dto.InstantDTO;
import com.uber.hoodie.common.table.timeline.dto.TimelineDTO;
import com.uber.hoodie.common.util.Option;
import com.uber.hoodie.common.util.StringUtils;
import com.uber.hoodie.common.util.collection.Pair;
import com.uber.hoodie.exception.HoodieRemoteException;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.client.utils.URIBuilder;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* A proxy for table file-system view which translates local View API calls to REST calls to remote timeline service
*/
public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView, Serializable {
private static final String BASE_URL = "/v1/hoodie/view";
public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL,
"slices/partition/latest/");
public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL,
"slices/file/latest/");
public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL = String.format("%s/%s", BASE_URL,
"slices/uncompacted/partition/latest/");
public static final String ALL_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/all");
public static final String LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/merged/beforeoron/latest/");
public static final String LATEST_SLICES_RANGE_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/range/latest/");
public static final String LATEST_SLICES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "slices/beforeoron/latest/");
public static final String PENDING_COMPACTION_OPS =
String.format("%s/%s", BASE_URL, "compactions/pending/");
public static final String LATEST_PARTITION_DATA_FILES_URL = String.format("%s/%s", BASE_URL,
"datafiles/latest/partition");
public static final String LATEST_PARTITION_DATA_FILE_URL = String.format("%s/%s", BASE_URL,
"datafile/latest/partition");
public static final String ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all");
public static final String LATEST_ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all/latest/");
public static final String LATEST_DATA_FILE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafile/on/latest/");
public static final String LATEST_DATA_FILES_RANGE_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafiles/range/latest/");
public static final String LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL =
String.format("%s/%s", BASE_URL, "datafiles/beforeoron/latest/");
public static final String ALL_FILEGROUPS_FOR_PARTITION_URL =
String.format("%s/%s", BASE_URL, "filegroups/all/partition/");
public static final String LAST_INSTANT = String.format("%s/%s", BASE_URL, "timeline/instant/last");
public static final String LAST_INSTANTS = String.format("%s/%s", BASE_URL, "timeline/instants/last");
public static final String TIMELINE = String.format("%s/%s", BASE_URL, "timeline/instants/all");
// POST Requests
public static final String REFRESH_DATASET = String.format("%s/%s", BASE_URL, "refresh/");
public static final String PARTITION_PARAM = "partition";
public static final String BASEPATH_PARAM = "basepath";
public static final String INSTANT_PARAM = "instant";
public static final String MAX_INSTANT_PARAM = "maxinstant";
public static final String INSTANTS_PARAM = "instants";
public static final String FILEID_PARAM = "fileid";
public static final String LAST_INSTANT_TS = "lastinstantts";
public static final String TIMELINE_HASH = "timelinehash";
public static final String REFRESH_OFF = "refreshoff";
private static Logger log = LogManager.getLogger(RemoteHoodieTableFileSystemView.class);
private final String serverHost;
private final int serverPort;
private final String basePath;
private final HoodieTableMetaClient metaClient;
private final HoodieTimeline timeline;
private final ObjectMapper mapper;
private boolean closed = false;
private enum RequestMethod {
GET,
POST
}
public RemoteHoodieTableFileSystemView(String server, int port, HoodieTableMetaClient metaClient) {
this.basePath = metaClient.getBasePath();
this.serverHost = server;
this.serverPort = port;
this.mapper = new ObjectMapper();
this.metaClient = metaClient;
this.timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
}
private T executeRequest(String requestPath, Map queryParameters, TypeReference reference,
RequestMethod method) throws IOException {
Preconditions.checkArgument(!closed, "View already closed");
URIBuilder builder = new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath)
.setScheme("http");
queryParameters.entrySet().stream().forEach(entry -> {
builder.addParameter(entry.getKey(), entry.getValue());
});
// Adding mandatory parameters - Last instants affecting file-slice
timeline.lastInstant().ifPresent(instant -> builder.addParameter(LAST_INSTANT_TS, instant.getTimestamp()));
builder.addParameter(TIMELINE_HASH, timeline.getTimelineHash());
String url = builder.toString();
log.info("Sending request : (" + url + ")");
Response response = null;
int timeout = 1000 * 300; // 5 min timeout
switch (method) {
case GET:
response = Request.Get(url).connectTimeout(timeout).socketTimeout(timeout).execute();
break;
case POST:
default:
response = Request.Post(url).connectTimeout(timeout).socketTimeout(timeout).execute();
break;
}
String content = response.returnContent().asString();
return mapper.readValue(content, reference);
}
private Map getParamsWithPartitionPath(String partitionPath) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
return paramsMap;
}
private Map getParams() {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
return paramsMap;
}
private Map getParams(String paramName, String instant) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(paramName, instant);
return paramsMap;
}
private Map getParamsWithAdditionalParam(String partitionPath, String paramName, String paramVal) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
paramsMap.put(paramName, paramVal);
return paramsMap;
}
private Map getParamsWithAdditionalParams(String partitionPath, String[] paramNames,
String[] paramVals) {
Map paramsMap = new HashMap<>();
paramsMap.put(BASEPATH_PARAM, basePath);
paramsMap.put(PARTITION_PARAM, partitionPath);
Preconditions.checkArgument(paramNames.length == paramVals.length);
for (int i = 0; i < paramNames.length; i++) {
paramsMap.put(paramNames[i], paramVals[i]);
}
return paramsMap;
}
@Override
public Stream getLatestDataFiles(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles = executeRequest(LATEST_PARTITION_DATA_FILES_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestDataFiles() {
Map paramsMap = getParams();
try {
List dataFiles = executeRequest(LATEST_ALL_DATA_FILES, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
try {
List dataFiles = executeRequest(LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Option getDataFileOn(String partitionPath, String instantTime, String fileId) {
Map paramsMap = getParamsWithAdditionalParams(partitionPath,
new String[]{INSTANT_PARAM, FILEID_PARAM},
new String[]{instantTime, fileId});
try {
List dataFiles = executeRequest(LATEST_DATA_FILE_ON_INSTANT_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestDataFilesInRange(List commitsToReturn) {
Map paramsMap = getParams(INSTANTS_PARAM,
StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
try {
List dataFiles = executeRequest(LATEST_DATA_FILES_RANGE_INSTANT_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getAllDataFiles(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles = executeRequest(ALL_DATA_FILES, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestFileSlices(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles = executeRequest(LATEST_PARTITION_SLICES_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Option getLatestFileSlice(String partitionPath, String fileId) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
try {
List dataFiles = executeRequest(LATEST_PARTITION_SLICE_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(FileSliceDTO::toFileSlice).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestUnCompactedFileSlices(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles = executeRequest(LATEST_PARTITION_UNCOMPACTED_SLICES_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
try {
List dataFiles = executeRequest(LATEST_SLICES_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestMergedFileSlicesBeforeOrOn(String partitionPath, String maxInstantTime) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxInstantTime);
try {
List dataFiles = executeRequest(LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getLatestFileSliceInRange(List commitsToReturn) {
Map paramsMap = getParams(INSTANTS_PARAM,
StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
try {
List dataFiles = executeRequest(LATEST_SLICES_RANGE_INSTANT_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getAllFileSlices(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List dataFiles = executeRequest(ALL_SLICES_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dataFiles.stream().map(FileSliceDTO::toFileSlice);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream getAllFileGroups(String partitionPath) {
Map paramsMap = getParamsWithPartitionPath(partitionPath);
try {
List fileGroups = executeRequest(ALL_FILEGROUPS_FOR_PARTITION_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
public boolean refresh() {
Map paramsMap = getParams();
try {
return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference() {
}, RequestMethod.POST);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public Stream> getPendingCompactionOperations() {
Map paramsMap = getParams();
try {
List dtos = executeRequest(PENDING_COMPACTION_OPS, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return dtos.stream().map(CompactionOpDTO::toCompactionOperation);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public void close() {
closed = true;
}
@Override
public void reset() {
refresh();
}
@Override
public Option getLastInstant() {
Map paramsMap = getParams();
try {
List instants = executeRequest(LAST_INSTANT, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return Option.fromJavaOptional(instants.stream().map(InstantDTO::toInstant).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public HoodieTimeline getTimeline() {
Map paramsMap = getParams();
try {
TimelineDTO timeline = executeRequest(TIMELINE, paramsMap,
new TypeReference() {
}, RequestMethod.GET);
return TimelineDTO.toTimeline(timeline, metaClient);
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
@Override
public void sync() {
//noop
}
@Override
public Option getLatestDataFile(String partitionPath, String fileId) {
Map paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
try {
List dataFiles = executeRequest(LATEST_PARTITION_DATA_FILE_URL, paramsMap,
new TypeReference>() {
}, RequestMethod.GET);
return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
} catch (IOException e) {
throw new HoodieRemoteException(e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy