All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.common.table.view.RemoteHoodieTableFileSystemView Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2019 Uber Technologies, Inc. ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *          http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.uber.hoodie.common.table.view;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.uber.hoodie.common.model.CompactionOperation;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieFileGroup;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.SyncableFileSystemView;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.table.timeline.dto.CompactionOpDTO;
import com.uber.hoodie.common.table.timeline.dto.DataFileDTO;
import com.uber.hoodie.common.table.timeline.dto.FileGroupDTO;
import com.uber.hoodie.common.table.timeline.dto.FileSliceDTO;
import com.uber.hoodie.common.table.timeline.dto.InstantDTO;
import com.uber.hoodie.common.table.timeline.dto.TimelineDTO;
import com.uber.hoodie.common.util.Option;
import com.uber.hoodie.common.util.StringUtils;
import com.uber.hoodie.common.util.collection.Pair;
import com.uber.hoodie.exception.HoodieRemoteException;
import java.io.IOException;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.client.utils.URIBuilder;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/**
 * A proxy for table file-system view which translates local View API calls to REST calls to remote timeline service
 */
public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView, Serializable {

  private static final String BASE_URL = "/v1/hoodie/view";
  public static final String LATEST_PARTITION_SLICES_URL = String.format("%s/%s", BASE_URL,
      "slices/partition/latest/");
  public static final String LATEST_PARTITION_SLICE_URL = String.format("%s/%s", BASE_URL,
      "slices/file/latest/");
  public static final String LATEST_PARTITION_UNCOMPACTED_SLICES_URL = String.format("%s/%s", BASE_URL,
      "slices/uncompacted/partition/latest/");
  public static final String ALL_SLICES_URL = String.format("%s/%s", BASE_URL, "slices/all");
  public static final String LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL =
      String.format("%s/%s", BASE_URL, "slices/merged/beforeoron/latest/");
  public static final String LATEST_SLICES_RANGE_INSTANT_URL =
      String.format("%s/%s", BASE_URL, "slices/range/latest/");
  public static final String LATEST_SLICES_BEFORE_ON_INSTANT_URL =
      String.format("%s/%s", BASE_URL, "slices/beforeoron/latest/");

  public static final String PENDING_COMPACTION_OPS =
      String.format("%s/%s", BASE_URL, "compactions/pending/");

  public static final String LATEST_PARTITION_DATA_FILES_URL = String.format("%s/%s", BASE_URL,
      "datafiles/latest/partition");
  public static final String LATEST_PARTITION_DATA_FILE_URL = String.format("%s/%s", BASE_URL,
      "datafile/latest/partition");
  public static final String ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all");
  public static final String LATEST_ALL_DATA_FILES = String.format("%s/%s", BASE_URL, "datafiles/all/latest/");
  public static final String LATEST_DATA_FILE_ON_INSTANT_URL =
      String.format("%s/%s", BASE_URL, "datafile/on/latest/");

  public static final String LATEST_DATA_FILES_RANGE_INSTANT_URL =
      String.format("%s/%s", BASE_URL, "datafiles/range/latest/");
  public static final String LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL =
      String.format("%s/%s", BASE_URL, "datafiles/beforeoron/latest/");

  public static final String ALL_FILEGROUPS_FOR_PARTITION_URL =
      String.format("%s/%s", BASE_URL, "filegroups/all/partition/");

  public static final String LAST_INSTANT = String.format("%s/%s", BASE_URL, "timeline/instant/last");
  public static final String LAST_INSTANTS = String.format("%s/%s", BASE_URL, "timeline/instants/last");

  public static final String TIMELINE = String.format("%s/%s", BASE_URL, "timeline/instants/all");

  // POST Requests
  public static final String REFRESH_DATASET = String.format("%s/%s", BASE_URL, "refresh/");

  public static final String PARTITION_PARAM = "partition";
  public static final String BASEPATH_PARAM = "basepath";
  public static final String INSTANT_PARAM = "instant";
  public static final String MAX_INSTANT_PARAM = "maxinstant";
  public static final String INSTANTS_PARAM = "instants";
  public static final String FILEID_PARAM = "fileid";
  public static final String LAST_INSTANT_TS = "lastinstantts";
  public static final String TIMELINE_HASH = "timelinehash";
  public static final String REFRESH_OFF = "refreshoff";

  private static Logger log = LogManager.getLogger(RemoteHoodieTableFileSystemView.class);

  private final String serverHost;
  private final int serverPort;
  private final String basePath;
  private final HoodieTableMetaClient metaClient;
  private final HoodieTimeline timeline;
  private final ObjectMapper mapper;

  private boolean closed = false;

  private enum RequestMethod {
    GET,
    POST
  }

  public RemoteHoodieTableFileSystemView(String server, int port, HoodieTableMetaClient metaClient) {
    this.basePath = metaClient.getBasePath();
    this.serverHost = server;
    this.serverPort = port;
    this.mapper = new ObjectMapper();
    this.metaClient = metaClient;
    this.timeline = metaClient.getActiveTimeline().filterCompletedAndCompactionInstants();
  }

  private  T executeRequest(String requestPath, Map queryParameters, TypeReference reference,
      RequestMethod method) throws IOException {
    Preconditions.checkArgument(!closed, "View already closed");

    URIBuilder builder = new URIBuilder().setHost(serverHost).setPort(serverPort).setPath(requestPath)
        .setScheme("http");

    queryParameters.entrySet().stream().forEach(entry -> {
      builder.addParameter(entry.getKey(), entry.getValue());
    });

    // Adding mandatory parameters - Last instants affecting file-slice
    timeline.lastInstant().ifPresent(instant -> builder.addParameter(LAST_INSTANT_TS, instant.getTimestamp()));
    builder.addParameter(TIMELINE_HASH, timeline.getTimelineHash());

    String url = builder.toString();
    log.info("Sending request : (" + url + ")");
    Response response = null;
    int timeout = 1000 * 300; // 5 min timeout
    switch (method) {
      case GET:
        response = Request.Get(url).connectTimeout(timeout).socketTimeout(timeout).execute();
        break;
      case POST:
      default:
        response = Request.Post(url).connectTimeout(timeout).socketTimeout(timeout).execute();
        break;
    }
    String content = response.returnContent().asString();
    return mapper.readValue(content, reference);
  }

  private Map getParamsWithPartitionPath(String partitionPath) {
    Map paramsMap = new HashMap<>();
    paramsMap.put(BASEPATH_PARAM, basePath);
    paramsMap.put(PARTITION_PARAM, partitionPath);
    return paramsMap;
  }

  private Map getParams() {
    Map paramsMap = new HashMap<>();
    paramsMap.put(BASEPATH_PARAM, basePath);
    return paramsMap;
  }

  private Map getParams(String paramName, String instant) {
    Map paramsMap = new HashMap<>();
    paramsMap.put(BASEPATH_PARAM, basePath);
    paramsMap.put(paramName, instant);
    return paramsMap;
  }

  private Map getParamsWithAdditionalParam(String partitionPath, String paramName, String paramVal) {
    Map paramsMap = new HashMap<>();
    paramsMap.put(BASEPATH_PARAM, basePath);
    paramsMap.put(PARTITION_PARAM, partitionPath);
    paramsMap.put(paramName, paramVal);
    return paramsMap;
  }

  private Map getParamsWithAdditionalParams(String partitionPath, String[] paramNames,
      String[] paramVals) {
    Map paramsMap = new HashMap<>();
    paramsMap.put(BASEPATH_PARAM, basePath);
    paramsMap.put(PARTITION_PARAM, partitionPath);
    Preconditions.checkArgument(paramNames.length == paramVals.length);
    for (int i = 0; i < paramNames.length; i++) {
      paramsMap.put(paramNames[i], paramVals[i]);
    }
    return paramsMap;
  }

  @Override
  public Stream getLatestDataFiles(String partitionPath) {
    Map paramsMap = getParamsWithPartitionPath(partitionPath);
    try {
      List dataFiles = executeRequest(LATEST_PARTITION_DATA_FILES_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestDataFiles() {
    Map paramsMap = getParams();
    try {
      List dataFiles = executeRequest(LATEST_ALL_DATA_FILES, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime) {
    Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
    try {
      List dataFiles = executeRequest(LATEST_DATA_FILES_BEFORE_ON_INSTANT_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Option getDataFileOn(String partitionPath, String instantTime, String fileId) {
    Map paramsMap = getParamsWithAdditionalParams(partitionPath,
        new String[]{INSTANT_PARAM, FILEID_PARAM},
        new String[]{instantTime, fileId});
    try {
      List dataFiles = executeRequest(LATEST_DATA_FILE_ON_INSTANT_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestDataFilesInRange(List commitsToReturn) {
    Map paramsMap = getParams(INSTANTS_PARAM,
        StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
    try {
      List dataFiles = executeRequest(LATEST_DATA_FILES_RANGE_INSTANT_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getAllDataFiles(String partitionPath) {
    Map paramsMap = getParamsWithPartitionPath(partitionPath);
    try {
      List dataFiles = executeRequest(ALL_DATA_FILES, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(DataFileDTO::toHoodieDataFile);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestFileSlices(String partitionPath) {
    Map paramsMap = getParamsWithPartitionPath(partitionPath);
    try {
      List dataFiles = executeRequest(LATEST_PARTITION_SLICES_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(FileSliceDTO::toFileSlice);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Option getLatestFileSlice(String partitionPath, String fileId) {
    Map paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
    try {
      List dataFiles = executeRequest(LATEST_PARTITION_SLICE_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return Option.fromJavaOptional(dataFiles.stream().map(FileSliceDTO::toFileSlice).findFirst());
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestUnCompactedFileSlices(String partitionPath) {
    Map paramsMap = getParamsWithPartitionPath(partitionPath);
    try {
      List dataFiles = executeRequest(LATEST_PARTITION_UNCOMPACTED_SLICES_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(FileSliceDTO::toFileSlice);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
    Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxCommitTime);
    try {
      List dataFiles = executeRequest(LATEST_SLICES_BEFORE_ON_INSTANT_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(FileSliceDTO::toFileSlice);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestMergedFileSlicesBeforeOrOn(String partitionPath, String maxInstantTime) {
    Map paramsMap = getParamsWithAdditionalParam(partitionPath, MAX_INSTANT_PARAM, maxInstantTime);
    try {
      List dataFiles = executeRequest(LATEST_SLICES_MERGED_BEFORE_ON_INSTANT_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(FileSliceDTO::toFileSlice);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getLatestFileSliceInRange(List commitsToReturn) {
    Map paramsMap = getParams(INSTANTS_PARAM,
        StringUtils.join(commitsToReturn.toArray(new String[0]), ","));
    try {
      List dataFiles = executeRequest(LATEST_SLICES_RANGE_INSTANT_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(FileSliceDTO::toFileSlice);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getAllFileSlices(String partitionPath) {
    Map paramsMap = getParamsWithPartitionPath(partitionPath);
    try {
      List dataFiles = executeRequest(ALL_SLICES_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dataFiles.stream().map(FileSliceDTO::toFileSlice);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream getAllFileGroups(String partitionPath) {
    Map paramsMap = getParamsWithPartitionPath(partitionPath);
    try {
      List fileGroups = executeRequest(ALL_FILEGROUPS_FOR_PARTITION_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return fileGroups.stream().map(dto -> FileGroupDTO.toFileGroup(dto, metaClient));
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  public boolean refresh() {
    Map paramsMap = getParams();
    try {
      return executeRequest(REFRESH_DATASET, paramsMap, new TypeReference() {
      }, RequestMethod.POST);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public Stream> getPendingCompactionOperations() {
    Map paramsMap = getParams();
    try {
      List dtos = executeRequest(PENDING_COMPACTION_OPS, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return dtos.stream().map(CompactionOpDTO::toCompactionOperation);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public void close() {
    closed = true;
  }

  @Override
  public void reset() {
    refresh();
  }

  @Override
  public Option getLastInstant() {
    Map paramsMap = getParams();
    try {
      List instants = executeRequest(LAST_INSTANT, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return Option.fromJavaOptional(instants.stream().map(InstantDTO::toInstant).findFirst());
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public HoodieTimeline getTimeline() {
    Map paramsMap = getParams();
    try {
      TimelineDTO timeline = executeRequest(TIMELINE, paramsMap,
          new TypeReference() {
          }, RequestMethod.GET);
      return TimelineDTO.toTimeline(timeline, metaClient);
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }

  @Override
  public void sync() {
    //noop
  }

  @Override
  public Option getLatestDataFile(String partitionPath, String fileId) {
    Map paramsMap = getParamsWithAdditionalParam(partitionPath, FILEID_PARAM, fileId);
    try {
      List dataFiles = executeRequest(LATEST_PARTITION_DATA_FILE_URL, paramsMap,
          new TypeReference>() {
          }, RequestMethod.GET);
      return Option.fromJavaOptional(dataFiles.stream().map(DataFileDTO::toHoodieDataFile).findFirst());
    } catch (IOException e) {
      throw new HoodieRemoteException(e);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy