All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.client.embedded.EmbeddedTimelineService Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.client.embedded;

import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.metrics.Registry;
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.util.NetworkUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.HoodieStorageUtils;
import org.apache.hudi.storage.StorageConfiguration;
import org.apache.hudi.timeline.service.TimelineService;

import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * Timeline Service that runs as part of write client.
 */
public class EmbeddedTimelineService {
  // lock used when starting/stopping/modifying embedded services
  private static final Object SERVICE_LOCK = new Object();

  private static final Logger LOG = LoggerFactory.getLogger(EmbeddedTimelineService.class);
  private static final AtomicInteger NUM_SERVERS_RUNNING = new AtomicInteger(0);
  // Map of TimelineServiceIdentifier to existing timeline service running
  private static final Map RUNNING_SERVICES = new HashMap<>();
  private static final Registry METRICS_REGISTRY = Registry.getRegistry("TimelineService");
  private static final String NUM_EMBEDDED_TIMELINE_SERVERS = "numEmbeddedTimelineServers";
  private int serverPort;
  private String hostAddr;
  private final HoodieEngineContext context;
  private final StorageConfiguration storageConf;
  private final HoodieWriteConfig writeConfig;
  private TimelineService.Config serviceConfig;
  private final TimelineServiceIdentifier timelineServiceIdentifier;
  private final Set basePaths; // the set of base paths using this EmbeddedTimelineService

  private transient FileSystemViewManager viewManager;
  private transient TimelineService server;

  private EmbeddedTimelineService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, HoodieWriteConfig writeConfig,
                                  TimelineServiceIdentifier timelineServiceIdentifier) {
    setHostAddr(embeddedTimelineServiceHostAddr);
    this.context = context;
    this.writeConfig = writeConfig;
    this.timelineServiceIdentifier = timelineServiceIdentifier;
    this.basePaths = new HashSet<>();
    this.basePaths.add(writeConfig.getBasePath());
    this.storageConf = context.getStorageConf();
    this.viewManager = createViewManager();
  }

  /**
   * Returns an existing embedded timeline service if one is running for the given configuration and reuse is enabled, or starts a new one.
   * @param context The {@link HoodieEngineContext} for the client
   * @param embeddedTimelineServiceHostAddr The host address to use for the service (nullable)
   * @param writeConfig The {@link HoodieWriteConfig} for the client
   * @return A running {@link EmbeddedTimelineService}
   * @throws IOException if an error occurs while starting the service
   */
  public static EmbeddedTimelineService getOrStartEmbeddedTimelineService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, HoodieWriteConfig writeConfig) throws IOException {
    return getOrStartEmbeddedTimelineService(context, embeddedTimelineServiceHostAddr, writeConfig, TimelineService::new);
  }

  static EmbeddedTimelineService getOrStartEmbeddedTimelineService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, HoodieWriteConfig writeConfig,
                                                                   TimelineServiceCreator timelineServiceCreator) throws IOException {
    TimelineServiceIdentifier timelineServiceIdentifier = getTimelineServiceIdentifier(embeddedTimelineServiceHostAddr, writeConfig);
    // if reuse is enabled, check if any existing instances are compatible
    if (writeConfig.isEmbeddedTimelineServerReuseEnabled()) {
      synchronized (SERVICE_LOCK) {
        if (RUNNING_SERVICES.containsKey(timelineServiceIdentifier)) {
          RUNNING_SERVICES.get(timelineServiceIdentifier).addBasePath(writeConfig.getBasePath());
          LOG.info("Reusing existing embedded timeline server with configuration: " + RUNNING_SERVICES.get(timelineServiceIdentifier).serviceConfig);
          return RUNNING_SERVICES.get(timelineServiceIdentifier);
        }
        // if no compatible instance is found, create a new one
        EmbeddedTimelineService service = createAndStartService(context, embeddedTimelineServiceHostAddr, writeConfig,
            timelineServiceCreator, timelineServiceIdentifier);
        RUNNING_SERVICES.put(timelineServiceIdentifier, service);
        return service;
      }
    }
    // if not, create a new instance. If reuse is not enabled, there is no need to add it to RUNNING_SERVICES
    return createAndStartService(context, embeddedTimelineServiceHostAddr, writeConfig, timelineServiceCreator, timelineServiceIdentifier);
  }

  private static EmbeddedTimelineService createAndStartService(HoodieEngineContext context, String embeddedTimelineServiceHostAddr, HoodieWriteConfig writeConfig,
                                                               TimelineServiceCreator timelineServiceCreator,
                                                               TimelineServiceIdentifier timelineServiceIdentifier) throws IOException {
    EmbeddedTimelineService service = new EmbeddedTimelineService(context, embeddedTimelineServiceHostAddr, writeConfig, timelineServiceIdentifier);
    service.startServer(timelineServiceCreator);
    METRICS_REGISTRY.set(NUM_EMBEDDED_TIMELINE_SERVERS, NUM_SERVERS_RUNNING.incrementAndGet());
    return service;
  }

  public static void shutdownAllTimelineServers() {
    RUNNING_SERVICES.entrySet().forEach(entry -> {
      LOG.info("Closing Timeline server");
      entry.getValue().server.close();
      METRICS_REGISTRY.set(NUM_EMBEDDED_TIMELINE_SERVERS, NUM_SERVERS_RUNNING.decrementAndGet());
      LOG.info("Closed Timeline server");
    });
    RUNNING_SERVICES.clear();
  }

  private FileSystemViewManager createViewManager() {
    // Using passed-in configs to build view storage configs
    FileSystemViewStorageConfig.Builder builder =
        FileSystemViewStorageConfig.newBuilder().fromProperties(writeConfig.getClientSpecifiedViewStorageConfig().getProps());
    FileSystemViewStorageType storageType = builder.build().getStorageType();
    if (storageType.equals(FileSystemViewStorageType.REMOTE_ONLY)
        || storageType.equals(FileSystemViewStorageType.REMOTE_FIRST)) {
      // Reset to default if set to Remote
      builder.withStorageType(FileSystemViewStorageType.MEMORY);
    }
    return FileSystemViewManager.createViewManagerWithTableMetadata(context, writeConfig.getMetadataConfig(), builder.build(), writeConfig.getCommonConfig());
  }

  private void startServer(TimelineServiceCreator timelineServiceCreator) throws IOException {
    TimelineService.Config.Builder timelineServiceConfBuilder = TimelineService.Config.builder()
        .serverPort(writeConfig.getEmbeddedTimelineServerPort())
        .numThreads(writeConfig.getEmbeddedTimelineServerThreads())
        .compress(writeConfig.getEmbeddedTimelineServerCompressOutput())
        .async(writeConfig.getEmbeddedTimelineServerUseAsync());
    // Only passing marker-related write configs to timeline server
    // if timeline-server-based markers are used.
    if (writeConfig.getMarkersType() == MarkerType.TIMELINE_SERVER_BASED) {
      timelineServiceConfBuilder
          .enableMarkerRequests(true)
          .markerBatchNumThreads(writeConfig.getMarkersTimelineServerBasedBatchNumThreads())
          .markerBatchIntervalMs(writeConfig.getMarkersTimelineServerBasedBatchIntervalMs())
          .markerParallelism(writeConfig.getMarkersDeleteParallelism());
    }

    if (writeConfig.isEarlyConflictDetectionEnable()) {
      timelineServiceConfBuilder.earlyConflictDetectionEnable(true)
          .earlyConflictDetectionStrategy(writeConfig.getEarlyConflictDetectionStrategyClassName())
          .earlyConflictDetectionCheckCommitConflict(writeConfig.earlyConflictDetectionCheckCommitConflict())
          .asyncConflictDetectorInitialDelayMs(writeConfig.getAsyncConflictDetectorInitialDelayMs())
          .asyncConflictDetectorPeriodMs(writeConfig.getAsyncConflictDetectorPeriodMs())
          .earlyConflictDetectionMaxAllowableHeartbeatIntervalInMs(
              writeConfig.getHoodieClientHeartbeatIntervalInMs()
                  * writeConfig.getHoodieClientHeartbeatTolerableMisses());
    }

    if (writeConfig.isTimelineServerBasedInstantStateEnabled()) {
      timelineServiceConfBuilder
          .instantStateForceRefreshRequestNumber(writeConfig.getTimelineServerBasedInstantStateForceRefreshRequestNumber())
          .enableInstantStateRequests(true);
    }

    this.serviceConfig = timelineServiceConfBuilder.build();

    server = timelineServiceCreator.create(context, storageConf.unwrapCopyAs(Configuration.class), serviceConfig,
        HoodieStorageUtils.getStorage(writeConfig.getBasePath(), storageConf.newInstance()), viewManager);
    serverPort = server.startService();
    LOG.info("Started embedded timeline server at " + hostAddr + ":" + serverPort);
  }

  @FunctionalInterface
  interface TimelineServiceCreator {
    TimelineService create(HoodieEngineContext context, Configuration hadoopConf, TimelineService.Config timelineServerConf,
                           HoodieStorage storage, FileSystemViewManager globalFileSystemViewManager) throws IOException;
  }

  private void setHostAddr(String embeddedTimelineServiceHostAddr) {
    if (embeddedTimelineServiceHostAddr != null) {
      LOG.info("Overriding hostIp to (" + embeddedTimelineServiceHostAddr + ") found in spark-conf. It was " + this.hostAddr);
      this.hostAddr = embeddedTimelineServiceHostAddr;
    } else {
      LOG.warn("Unable to find driver bind address from spark config");
      this.hostAddr = NetworkUtils.getHostname();
    }
  }

  /**
   * Retrieves proper view storage configs for remote clients to access this service.
   */
  public FileSystemViewStorageConfig getRemoteFileSystemViewConfig() {
    FileSystemViewStorageType viewStorageType = writeConfig.getClientSpecifiedViewStorageConfig()
        .shouldEnableBackupForRemoteFileSystemView()
        ? FileSystemViewStorageType.REMOTE_FIRST : FileSystemViewStorageType.REMOTE_ONLY;
    return FileSystemViewStorageConfig.newBuilder()
        .withStorageType(viewStorageType)
        .withRemoteServerHost(hostAddr)
        .withRemoteServerPort(serverPort)
        .withRemoteTimelineClientTimeoutSecs(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientTimeoutSecs())
        .withRemoteTimelineClientRetry(writeConfig.getClientSpecifiedViewStorageConfig().isRemoteTimelineClientRetryEnabled())
        .withRemoteTimelineClientMaxRetryNumbers(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientMaxRetryNumbers())
        .withRemoteTimelineInitialRetryIntervalMs(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineInitialRetryIntervalMs())
        .withRemoteTimelineClientMaxRetryIntervalMs(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientMaxRetryIntervalMs())
        .withRemoteTimelineClientRetryExceptions(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientRetryExceptions())
        .build();
  }

  public FileSystemViewManager getViewManager() {
    return viewManager;
  }

  /**
   * Adds a new base path to the set that are managed by this instance.
   * @param basePath the new base path to add
   */
  private void addBasePath(String basePath) {
    basePaths.add(basePath);
  }

  /**
   * Stops the embedded timeline service for the given base path. If a timeline service is managing multiple tables, it will only be shutdown once all tables have been stopped.
   * @param basePath For the table to stop the service for
   */
  public void stopForBasePath(String basePath) {
    synchronized (SERVICE_LOCK) {
      basePaths.remove(basePath);
      if (basePaths.isEmpty()) {
        RUNNING_SERVICES.remove(timelineServiceIdentifier);
      }
    }
    if (this.server != null) {
      this.server.unregisterBasePath(basePath);
    }
    // continue rest of shutdown outside of the synchronized block to avoid excess blocking
    if (basePaths.isEmpty() && null != server) {
      LOG.info("Closing Timeline server");
      this.server.close();
      METRICS_REGISTRY.set(NUM_EMBEDDED_TIMELINE_SERVERS, NUM_SERVERS_RUNNING.decrementAndGet());
      this.server = null;
      this.viewManager = null;
      LOG.info("Closed Timeline server");
    }
  }

  private static TimelineServiceIdentifier getTimelineServiceIdentifier(String hostAddr, HoodieWriteConfig writeConfig) {
    return new TimelineServiceIdentifier(hostAddr, writeConfig.getMarkersType(), writeConfig.isMetadataTableEnabled(),
        writeConfig.isEarlyConflictDetectionEnable(), writeConfig.isTimelineServerBasedInstantStateEnabled());
  }

  static class TimelineServiceIdentifier {
    private final String hostAddr;
    private final MarkerType markerType;
    private final boolean isMetadataEnabled;
    private final boolean isEarlyConflictDetectionEnable;
    private final boolean isTimelineServerBasedInstantStateEnabled;

    public TimelineServiceIdentifier(String hostAddr, MarkerType markerType, boolean isMetadataEnabled, boolean isEarlyConflictDetectionEnable,
                                     boolean isTimelineServerBasedInstantStateEnabled) {
      this.hostAddr = hostAddr;
      this.markerType = markerType;
      this.isMetadataEnabled = isMetadataEnabled;
      this.isEarlyConflictDetectionEnable = isEarlyConflictDetectionEnable;
      this.isTimelineServerBasedInstantStateEnabled = isTimelineServerBasedInstantStateEnabled;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) {
        return true;
      }
      if (!(o instanceof TimelineServiceIdentifier)) {
        return false;
      }
      TimelineServiceIdentifier that = (TimelineServiceIdentifier) o;
      if (this.hostAddr != null && that.hostAddr != null) {
        return isMetadataEnabled == that.isMetadataEnabled && isEarlyConflictDetectionEnable == that.isEarlyConflictDetectionEnable
            && isTimelineServerBasedInstantStateEnabled == that.isTimelineServerBasedInstantStateEnabled && hostAddr.equals(that.hostAddr) && markerType == that.markerType;
      } else {
        return (hostAddr == null && that.hostAddr == null);
      }
    }

    @Override
    public int hashCode() {
      return Objects.hash(hostAddr, markerType, isMetadataEnabled, isEarlyConflictDetectionEnable, isTimelineServerBasedInstantStateEnabled);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy