All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.client.CZClient Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.client;

import com.clickzetta.platform.arrow.ArrowIGSTableMeta;
import com.clickzetta.platform.arrow.ArrowTable;
import com.clickzetta.platform.bulkload.BulkLoadMetaData;
import com.clickzetta.platform.bulkload.BulkLoadStreamImpl;
import com.clickzetta.platform.client.api.*;
import com.clickzetta.platform.client.api.multi.*;
import com.clickzetta.platform.client.proxy.RpcProxy;
import com.clickzetta.platform.common.Constant;
import com.clickzetta.platform.common.IGSTableMeta;
import com.clickzetta.platform.util.Pair;
import com.clickzetta.platform.util.Util;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.AbstractMessage;
import cz.proto.ingestion.Ingestion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;

public final class CZClient implements RpcProxy, Client {

  private static final Logger LOG = LoggerFactory.getLogger(CZClient.class);

  String clientId;
  ClientContext clientContext;
  RpcProxy rpcProxy;
  volatile boolean initialized = false;
  volatile ExecutorService executorService;

  CZClient() {
  }

  @Override
  public synchronized void open() throws IOException {
    validRpcProxyInit();
  }

  @Override
  public synchronized void close(long wait_time_ms) throws IOException {
    if (!initialized) {
      return;
    }
    Util.tryWithFinally(() -> {
      if (rpcProxy != null) {
        rpcProxy.close(wait_time_ms);
      }
    }, () -> {
      if (executorService != null) {
        executorService.shutdown();
        try {
          boolean closed = executorService.awaitTermination(wait_time_ms, TimeUnit.MILLISECONDS);
          if (!closed) {
            executorService.shutdownNow();
          }
        } catch (InterruptedException ite) {
          // ignore
        }
      }
      executorService = null;
      rpcProxy = null;
      initialized = false;
    });
  }

  private void initInternalExecutorService(int parallelSize) {
    if (this.executorService == null) {
      synchronized (this) {
        if (this.executorService == null) {
          int corePoolSize = Math.min(Math.max(1, parallelSize), 64);
          this.executorService = new ThreadPoolExecutor(corePoolSize, corePoolSize, 10, TimeUnit.MINUTES,
              new LinkedBlockingQueue<>(), new ThreadFactoryBuilder()
              .setNameFormat("multiStream-create-%d").setDaemon(true).build());
        }
      }
    }
  }

  private void closeInternalExecutorService() {
    if (this.executorService != null) {
      synchronized (this) {
        if (this.executorService != null) {
          executorService.shutdown();
          try {
            boolean closed = executorService.awaitTermination(5 * 1000, TimeUnit.MILLISECONDS);
            if (!closed) {
              executorService.shutdownNow();
            }
          } catch (InterruptedException ite) {
            // ignore
          }
          this.executorService = null;
        }
      }
    }
  }

  @Override
  public void releaseResource(Stream stream) throws IOException {
    validRpcProxyInit();
    if (stream.getProtocolType() == ProtocolType.V1) {
      rpcProxy.dropTablets(clientContext.instanceId(), clientContext.workspace(), stream.getTable().getSchemaName(),
          stream.getTable().getTableName(), stream.getTable().getTableId(), null);
    } else {
      rpcProxy.closeStreamV2(clientContext.instanceId(), clientContext.workspace(),
          stream.getTable().getSchemaName(), stream.getTable().getTableName());
    }
  }

  @Override
  public void releaseResource(MultiStream multiStream, String schemaName, String tableName) throws IOException {
    ProtocolType protocolType = multiStream.getProtocolType();
    Preconditions.checkArgument(protocolType == ProtocolType.V2, "multiStream only support v2.");
    MultiTable multiTable = multiStream.getTable();
    validRpcProxyInit();
    try {
      rpcProxy.closeStreamV2(clientContext.instanceId(), clientContext.workspace(), schemaName, tableName);
    } finally {
      multiStream.getMultiTablesOptions().removeTable(schemaName, tableName);
      multiTable.removeTableSchema(schemaName, tableName);
    }
  }

  @Override
  public ClientContext getClientContext() {
    return clientContext;
  }

  @Override
  public Stream createStream(String schemaName, String tableName, Integer tabletNum, Options options) throws IOException {
    LOG.info("start to create stream for schema [{}.{}] extra properties {}.", schemaName, tableName, options.getProperties());
    if (tabletNum != null) {
      options.getProperties().put(IGS_TABLET_NUM, tabletNum);
    }
    switch (options.getProtocolType()) {
      case V1:
        return createKuduPartialRowStream(schemaName, tableName, tabletNum, options);
      case V2:
        return createArrowRowStream(schemaName, tableName, tabletNum, options);
      default:
        throw new UnsupportedOperationException(
            "not support createStream with unknown options protocolType. " + options.getProtocolType());
    }
  }

  private Stream createKuduPartialRowStream(String schemaName, String tableName, Integer tabletNum, Options options) throws IOException {
    CZStream stream = null;
    try {
      validRpcProxyInit();
      IGSTableMeta igsTableMeta = rpcProxy.getTableMeta(clientContext.clientId(),
          clientContext.instanceId(), clientContext.workspace(), schemaName, tableName);
      // if we need to refresh all properties.
      refreshInstanceId(igsTableMeta.getInstanceId());
      CZTable czTable = new CZTable(igsTableMeta);
      rpcProxy.createTablet(clientContext.instanceId(), clientContext.workspace(), czTable, tabletNum);

      // check pk table not use async flusher.
      if (igsTableMeta.getTableType() == Ingestion.IGSTableType.ACID &&
          options.getFlushMode() == FlushMode.AUTO_FLUSH_BACKGROUND) {
        LOG.warn("acid table not support flushMode with AUTO_FLUSH_BACKGROUND. reset to AUTO_FLUSH_SYNC.");
        options = options.toBuilder().withFlushMode(FlushMode.AUTO_FLUSH_SYNC).build();
      }
      stream = new CZStream(this, czTable, options);

      // try to get tablet workers with retry.
      List> workers;
      boolean waitForLoaded;
      int retryTime = 0;
      do {
        workers = rpcProxy.getMutateWorkers(clientContext.instanceId(), czTable.getTableId(), schemaName, tableName,
            clientContext.igsConnectMode());
        waitForLoaded = workers.stream().anyMatch(w -> w._1 == null || w._1.length() == 0);
        if (waitForLoaded) {
          LOG.info("sleep & retry to get all tablet loaded. times {}", ++retryTime);
          try {
            Thread.sleep(2 * 1000);
          } catch (InterruptedException ite) {
            throw new IOException(ite);
          }
        }
      } while (waitForLoaded);

      // support user define worker address. such as privateLink or internal test.
      if (clientContext.workerAddrs() != null && !clientContext.workerAddrs().isEmpty()) {
        LOG.info("User define worker address {}. overwrite which get from controller.", clientContext.workerAddrs());
        workers.clear();
        for (Pair pair : clientContext.workerAddrs()) {
          String host = pair.getFirst();
          int port = pair.getSecond() != null ? pair.getSecond() : -1;
          workers.add(new Tuple2<>(host, port));
        }
      }
      stream.getSession().initRpcConnection(workers);
    } finally {
      close();
    }
    return stream;
  }

  private Stream createArrowRowStream(String schemaName, String tableName, Integer tabletNum, Options options) throws IOException {
    ArrowStream stream = null;
    try {
      validRpcProxyInit();
      ArrowIGSTableMeta igsTableMeta = rpcProxy.createOrGetStreamV2(clientContext.instanceId(),
          clientContext.workspace(), schemaName, tableName, tabletNum);
      // if we need to refresh all properties.
      refreshInstanceId(igsTableMeta.getInstanceId());

      // check pk table not use async flusher.
      if (igsTableMeta.getTableType() == Ingestion.IGSTableType.ACID &&
          options.getFlushMode() == FlushMode.AUTO_FLUSH_BACKGROUND) {
        LOG.warn("acid table not support flushMode with AUTO_FLUSH_BACKGROUND. reset to AUTO_FLUSH_SYNC.");
        options = options.toBuilder().withFlushMode(FlushMode.AUTO_FLUSH_SYNC).build();
      }

      ArrowTable arrowTable = new ArrowTable(igsTableMeta);
      stream = new ArrowStream(this, arrowTable, options);

      // try to get tablet workers with retry.
      List> workers = getRouteWorkers(schemaName, tableName);
      stream.getSession().initRpcConnection(workers);
    } finally {
      close();
    }
    return stream;
  }

  private List> getRouteWorkers(String schemaName, String tableName) throws IOException {
    return getRouteWorkers(schemaName, tableName, rpcProxy);
  }

  private List> getRouteWorkers(String schemaName, String tableName, RpcProxy proxy) throws IOException {
    // try to get tablet workers with retry.
    List> workers;
    boolean waitForLoaded;
    int retryTime = 0;
    int GET_WORKER_MAX_TIMES = 1000;
    Properties properties = getClientContext().getProperties();
    if (properties.get(Constant.GET_WORKER_MAX_RETRY_TIMES) != null) {
      GET_WORKER_MAX_TIMES = properties.get(Constant.GET_WORKER_MAX_RETRY_TIMES) instanceof Integer ?
          (Integer) properties.get(Constant.GET_WORKER_MAX_RETRY_TIMES) :
          Integer.parseInt(properties.getProperty(Constant.GET_WORKER_MAX_RETRY_TIMES));
    }
    do {
      workers = proxy.getRouteWorkers(clientContext.instanceId(), clientContext.workspace(), schemaName, tableName,
          clientContext.igsConnectMode());
      waitForLoaded = workers.stream().anyMatch(w -> w._1 == null || w._1.length() == 0);
      if (waitForLoaded) {
        LOG.info("sleep & retry to get all tablet loaded for table {}.{}. times {}", schemaName, tableName, ++retryTime);
        try {
          Thread.sleep(2 * 1000);
        } catch (InterruptedException ite) {
          throw new IOException(ite);
        }
      }
    } while (waitForLoaded && retryTime <= GET_WORKER_MAX_TIMES);
    if (waitForLoaded) {
      throw new IOException(String.format("tablet loaded failed & getRouteWorkers failed reach max retry %s times.", GET_WORKER_MAX_TIMES));
    }

    // support user define worker address. such as privateLink or internal test.
    if (clientContext.workerAddrs() != null && !clientContext.workerAddrs().isEmpty()) {
      LOG.info("User define worker address {}. overwrite which get from controller.", clientContext.workerAddrs());
      workers.clear();
      for (Pair pair : clientContext.workerAddrs()) {
        String host = pair.getFirst();
        int port = pair.getSecond() != null ? pair.getSecond() : -1;
        workers.add(new Tuple2<>(host, port));
      }
    }
    return workers;
  }

  private Map>> initMultiStreams(MultiTable multiTable,
                                                                               MultiTablesOptions options,
                                                                               boolean closeParallelPool) throws IOException {
    Map>> tableWorkers = new ConcurrentHashMap<>();
    AtomicReference exception = new AtomicReference<>();
    CountDownLatch latch = new CountDownLatch(options.getAllTableIdentifiers().size());

    // init executor service to create streams of multiple tables in parallel.
    // only init once.
    initInternalExecutorService(options.getAllTableIdentifiers().size());
    final Map threadLocalMap = new ConcurrentHashMap<>();

    long startTimeMs = System.currentTimeMillis();
    for (TableIdentifier identifier : options.getAllTableIdentifiers()) {
      executorService.execute(() -> {
        if (exception.get() != null) {
          latch.countDown();
          return;
        }

        long startInternalTimeMs = System.currentTimeMillis();
        String schemaName = identifier.schemaName;
        String tableName = identifier.tableName;
        LOG.info("start to init table {}.{}", schemaName, tableName);

        long threadId = Thread.currentThread().getId();
        RpcProxy internalProxy = threadLocalMap.computeIfAbsent(threadId, id -> {
          try {
            return createRpcProxy();
          } catch (IOException e) {
            LOG.error("failed to init internal proxy with {}.{}: {}", schemaName, tableName, e);
            exception.compareAndSet(null, e);
            return null;
          }
        });
        if (exception.get() != null || internalProxy == null) {
          latch.countDown();
          return;
        }
        try {
          int tabletNum = options.getTabletNum(schemaName, tableName);
          ArrowIGSTableMeta igsTableMeta = internalProxy.createOrGetStreamV2(clientContext.instanceId(),
              clientContext.workspace(), schemaName, tableName, tabletNum);
          // if we need to refresh all properties.
          refreshInstanceId(igsTableMeta.getInstanceId());
          multiTable.addSchema(igsTableMeta);
          LOG.info("created stream for table {}.{} cost {} ms", schemaName, tableName,
              System.currentTimeMillis() - startInternalTimeMs);

          // try to get all workers host port.
          List> workers = getRouteWorkers(schemaName, tableName, internalProxy);
          tableWorkers.put(identifier, workers);
          LOG.info("end to init table {}.{} cost {} ms", schemaName, tableName,
              System.currentTimeMillis() - startInternalTimeMs);
        } catch (Throwable t) {
          LOG.error("failed to init table {}.{}: {}", schemaName, tableName, t);
          exception.compareAndSet(null, t);
        } finally {
          latch.countDown();
        }
      });
    }

    Function function = e -> {
      if (exception.get() != null) {
        return new IOException(exception.get());
      } else {
        return e;
      }
    };
    try {
      boolean result = latch.await(10, TimeUnit.MINUTES);
      if (!result) {
        String errMsg = String.format("create multi stream failed to wait latch with init size %s reverse size %s.",
            options.getAllTableIdentifiers().size(), latch.getCount());
        LOG.error(errMsg);
        throw function.apply(new IOException(errMsg));
      }
    } catch (InterruptedException ex) {
      String errMsg = String.format("create multi stream timeout with init size %s reverse size %s.",
          options.getAllTableIdentifiers().size(), latch.getCount());
      LOG.error(errMsg);
      throw function.apply(new IOException(errMsg, ex));
    } finally {
      if (closeParallelPool) {
        closeInternalExecutorService();
      }
      // try to release all rpc proxy used in thread local.
      threadLocalMap.values().stream().parallel().forEach(rpcProxy -> {
        try {
          rpcProxy.close(1 * 500);
        } catch (IOException ioe) {
          // ignore
        }
      });
    }

    LOG.info("parallel to init stream cost {} ms", System.currentTimeMillis() - startTimeMs);
    // check parallel exception.
    if (exception.get() != null) {
      throw new IOException(exception.get());
    }
    return tableWorkers;
  }

  @Override
  public MultiStream createMultiStream(MultiTablesOptions multiTablesOptions, Options options) throws IOException {
    LOG.info("start to create multi stream with extra properties {}.", options.getProperties());
    Preconditions.checkArgument(options.getProtocolType() == ProtocolType.V2,
        "multiStream only support protocolType with V2.");

    Object obj = options.getProperties().getOrDefault(Constant.MULTI_TABLE_CACHE_SIZE, 1024);
    int maximumSize = obj instanceof String ? Integer.parseInt((String) obj) : (int) obj;
    MultiTable multiTable = new MultiTable(maximumSize);
    Map>> tableWorkers = initMultiStreams(multiTable, multiTablesOptions, true);

    if (options.getFlushMode() == FlushMode.AUTO_FLUSH_BACKGROUND) {
      LOG.warn("multi table not support flushMode with AUTO_FLUSH_BACKGROUND. reset to AUTO_FLUSH_SYNC.");
      options = options.toBuilder().withFlushMode(FlushMode.AUTO_FLUSH_SYNC).build();
    }
    MultiStream multiStream = new MultiStreamImpl(this, multiTable, multiTablesOptions, options);
    // try to get all workers host port.
    // Deduplicate workers.
    Set> workerSet = new HashSet<>();
    for (List> list : tableWorkers.values()) {
      workerSet.addAll(list);
    }
    multiStream.getSession().initRpcConnection(new ArrayList<>(workerSet));
    return multiStream;
  }

  @Override
  public MultiStream appendStream(MultiStream multiStream, String schemaName, String tableName) throws IOException {
    validRpcProxyInit();
    Preconditions.checkArgument(multiStream.getSession().getOptions().getProtocolType() == ProtocolType.V2,
        "multiStream only support protocolType with V2.");
    List> workers = null;

    MultiTable multiTable = multiStream.getTable();
    int tabletNum = multiStream.getMultiTablesOptions().getTabletNum(schemaName, tableName);
    ArrowIGSTableMeta igsTableMeta = rpcProxy.createOrGetStreamV2(clientContext.instanceId(),
        clientContext.workspace(), schemaName, tableName, tabletNum);
    // if we need to refresh all properties.
    refreshInstanceId(igsTableMeta.getInstanceId());
    multiTable.addSchema(igsTableMeta);

    // try to get tablet workers with retry.
    workers = getRouteWorkers(schemaName, tableName);

    // try to get all workers host port.
    multiStream.getSession().initRpcConnection(workers);
    return multiStream;
  }

  @Override
  public MultiStream batchAppendStream(MultiStream multiStream, MultiTablesOptions newTableOptions) throws IOException {
    MultiTable multiTable = multiStream.getTable();
    Map>> tableWorkers = initMultiStreams(multiTable, newTableOptions, false);
    // try to get all workers host port.
    // Deduplicate workers.
    Set> workerSet = new HashSet<>();
    for (List> list : tableWorkers.values()) {
      workerSet.addAll(list);
    }
    multiStream.getSession().initRpcConnection(new ArrayList<>(workerSet));
    multiStream.getMultiTablesOptions().mergeFrom(newTableOptions);
    return multiStream;
  }

  @Override
  public BulkLoadStream createBulkLoadStream(String schemaName,
                                             String tableName,
                                             BulkLoadOptions options) throws IOException {
    validRpcProxyInit();
    BulkLoadMetaData metaData = rpcProxy.createBulkLoadStreamV2(
        clientContext.instanceId(), clientContext.workspace(), schemaName, tableName, options);
    // Refresh all properties if necessary.
    refreshInstanceId(metaData.getInstanceId());
    return new BulkLoadStreamImpl(this, metaData);
  }

  @Override
  public BulkLoadStream getBulkLoadStream(String schemaName, String tableName, String streamId) throws IOException {
    validRpcProxyInit();
    BulkLoadMetaData metaData = rpcProxy.getBulkLoadStreamV2(
        clientContext.instanceId(), clientContext.workspace(), schemaName, tableName, streamId, true);
    // Refresh all properties if necessary.
    refreshInstanceId(metaData.getInstanceId());
    return new BulkLoadStreamImpl(this, metaData);
  }

  @Override
  public BulkLoadMetaData getBulkLoadStreamMetaData(String schemaName, String tableName, String streamId) throws IOException {
    validRpcProxyInit();
    BulkLoadMetaData metaData = rpcProxy.getBulkLoadStreamV2(
        clientContext.instanceId(), clientContext.workspace(), schemaName, tableName, streamId, false);
    // Refresh all properties if necessary.
    refreshInstanceId(metaData.getInstanceId());
    return metaData;
  }

  private RpcProxy createRpcProxy() throws IOException {
    // open rpc proxy to call rpc message.
    RpcProxy proxy = RpcProxy.Builder.build(clientContext, clientContext.igsRouterMode());
    try {
      proxy.open();
    } catch (Exception e) {
      if (proxy != null) {
        try {
          proxy.close();
        } catch (Exception ce) {
          LOG.warn("fallback rpc proxy init to close rpc proxy failed.", ce);
        }
      }
      throw e;
    }
    return proxy;
  }

  private synchronized void validRpcProxyInit() throws IOException {
    if (initialized) {
      return;
    }
    if (rpcProxy == null) {
      rpcProxy = createRpcProxy();
    }
    initialized = true;
  }

  private void refreshInstanceId(long instanceId) {
    if (!clientContext.getProperties().containsKey("instanceId")) {
      LOG.info("reset instanceId from {} to {}", clientContext.instanceId(), instanceId);
      clientContext.getProperties().put("instanceId", instanceId);
      clientContext.refreshAll(clientContext);
    }
  }

  @Override
  public 
  CompletableFuture rpcProxyCallAsync(Ingestion.MethodEnum method, T request) throws IOException {
    validRpcProxyInit();
    return rpcProxy.rpcProxyCallAsync(method, request);
  }

  @Override
  public CompletableFuture dummyValidAsync(String request) throws IOException {
    validRpcProxyInit();
    return rpcProxy.dummyValidAsync(request);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy