com.clickzetta.platform.client.CZClient Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
package com.clickzetta.platform.client;
import com.clickzetta.platform.arrow.ArrowIGSTableMeta;
import com.clickzetta.platform.arrow.ArrowTable;
import com.clickzetta.platform.bulkload.BulkLoadMetaData;
import com.clickzetta.platform.bulkload.BulkLoadStreamImpl;
import com.clickzetta.platform.client.api.*;
import com.clickzetta.platform.client.api.multi.*;
import com.clickzetta.platform.client.proxy.RpcProxy;
import com.clickzetta.platform.common.Constant;
import com.clickzetta.platform.common.IGSTableMeta;
import com.clickzetta.platform.util.Pair;
import com.clickzetta.platform.util.Util;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.AbstractMessage;
import cz.proto.ingestion.Ingestion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
public final class CZClient implements RpcProxy, Client {
private static final Logger LOG = LoggerFactory.getLogger(CZClient.class);
String clientId;
ClientContext clientContext;
RpcProxy rpcProxy;
volatile boolean initialized = false;
volatile ExecutorService executorService;
CZClient() {
}
@Override
public synchronized void open() throws IOException {
validRpcProxyInit();
}
@Override
public synchronized void close(long wait_time_ms) throws IOException {
if (!initialized) {
return;
}
Util.tryWithFinally(() -> {
if (rpcProxy != null) {
rpcProxy.close(wait_time_ms);
}
}, () -> {
if (executorService != null) {
executorService.shutdown();
try {
boolean closed = executorService.awaitTermination(wait_time_ms, TimeUnit.MILLISECONDS);
if (!closed) {
executorService.shutdownNow();
}
} catch (InterruptedException ite) {
// ignore
}
}
executorService = null;
rpcProxy = null;
initialized = false;
});
}
private void initInternalExecutorService(int parallelSize) {
if (this.executorService == null) {
synchronized (this) {
if (this.executorService == null) {
int corePoolSize = Math.min(Math.max(1, parallelSize), 64);
this.executorService = new ThreadPoolExecutor(corePoolSize, corePoolSize, 10, TimeUnit.MINUTES,
new LinkedBlockingQueue<>(), new ThreadFactoryBuilder()
.setNameFormat("multiStream-create-%d").setDaemon(true).build());
}
}
}
}
private void closeInternalExecutorService() {
if (this.executorService != null) {
synchronized (this) {
if (this.executorService != null) {
executorService.shutdown();
try {
boolean closed = executorService.awaitTermination(5 * 1000, TimeUnit.MILLISECONDS);
if (!closed) {
executorService.shutdownNow();
}
} catch (InterruptedException ite) {
// ignore
}
this.executorService = null;
}
}
}
}
@Override
public void releaseResource(Stream stream) throws IOException {
validRpcProxyInit();
if (stream.getProtocolType() == ProtocolType.V1) {
rpcProxy.dropTablets(clientContext.instanceId(), clientContext.workspace(), stream.getTable().getSchemaName(),
stream.getTable().getTableName(), stream.getTable().getTableId(), null);
} else {
rpcProxy.closeStreamV2(clientContext.instanceId(), clientContext.workspace(),
stream.getTable().getSchemaName(), stream.getTable().getTableName());
}
}
@Override
public void releaseResource(MultiStream multiStream, String schemaName, String tableName) throws IOException {
ProtocolType protocolType = multiStream.getProtocolType();
Preconditions.checkArgument(protocolType == ProtocolType.V2, "multiStream only support v2.");
MultiTable multiTable = multiStream.getTable();
validRpcProxyInit();
try {
rpcProxy.closeStreamV2(clientContext.instanceId(), clientContext.workspace(), schemaName, tableName);
} finally {
multiStream.getMultiTablesOptions().removeTable(schemaName, tableName);
multiTable.removeTableSchema(schemaName, tableName);
}
}
@Override
public ClientContext getClientContext() {
return clientContext;
}
@Override
public Stream createStream(String schemaName, String tableName, Integer tabletNum, Options options) throws IOException {
LOG.info("start to create stream for schema [{}.{}] extra properties {}.", schemaName, tableName, options.getProperties());
if (tabletNum != null) {
options.getProperties().put(IGS_TABLET_NUM, tabletNum);
}
switch (options.getProtocolType()) {
case V1:
return createKuduPartialRowStream(schemaName, tableName, tabletNum, options);
case V2:
return createArrowRowStream(schemaName, tableName, tabletNum, options);
default:
throw new UnsupportedOperationException(
"not support createStream with unknown options protocolType. " + options.getProtocolType());
}
}
private Stream createKuduPartialRowStream(String schemaName, String tableName, Integer tabletNum, Options options) throws IOException {
CZStream stream = null;
try {
validRpcProxyInit();
IGSTableMeta igsTableMeta = rpcProxy.getTableMeta(clientContext.clientId(),
clientContext.instanceId(), clientContext.workspace(), schemaName, tableName);
// if we need to refresh all properties.
refreshInstanceId(igsTableMeta.getInstanceId());
CZTable czTable = new CZTable(igsTableMeta);
rpcProxy.createTablet(clientContext.instanceId(), clientContext.workspace(), czTable, tabletNum);
// check pk table not use async flusher.
if (igsTableMeta.getTableType() == Ingestion.IGSTableType.ACID &&
options.getFlushMode() == FlushMode.AUTO_FLUSH_BACKGROUND) {
LOG.warn("acid table not support flushMode with AUTO_FLUSH_BACKGROUND. reset to AUTO_FLUSH_SYNC.");
options = options.toBuilder().withFlushMode(FlushMode.AUTO_FLUSH_SYNC).build();
}
stream = new CZStream(this, czTable, options);
// try to get tablet workers with retry.
List> workers;
boolean waitForLoaded;
int retryTime = 0;
do {
workers = rpcProxy.getMutateWorkers(clientContext.instanceId(), czTable.getTableId(), schemaName, tableName,
clientContext.igsConnectMode());
waitForLoaded = workers.stream().anyMatch(w -> w._1 == null || w._1.length() == 0);
if (waitForLoaded) {
LOG.info("sleep & retry to get all tablet loaded. times {}", ++retryTime);
try {
Thread.sleep(2 * 1000);
} catch (InterruptedException ite) {
throw new IOException(ite);
}
}
} while (waitForLoaded);
// support user define worker address. such as privateLink or internal test.
if (clientContext.workerAddrs() != null && !clientContext.workerAddrs().isEmpty()) {
LOG.info("User define worker address {}. overwrite which get from controller.", clientContext.workerAddrs());
workers.clear();
for (Pair pair : clientContext.workerAddrs()) {
String host = pair.getFirst();
int port = pair.getSecond() != null ? pair.getSecond() : -1;
workers.add(new Tuple2<>(host, port));
}
}
stream.getSession().initRpcConnection(workers);
} finally {
close();
}
return stream;
}
private Stream createArrowRowStream(String schemaName, String tableName, Integer tabletNum, Options options) throws IOException {
ArrowStream stream = null;
try {
validRpcProxyInit();
ArrowIGSTableMeta igsTableMeta = rpcProxy.createOrGetStreamV2(clientContext.instanceId(),
clientContext.workspace(), schemaName, tableName, tabletNum);
// if we need to refresh all properties.
refreshInstanceId(igsTableMeta.getInstanceId());
// check pk table not use async flusher.
if (igsTableMeta.getTableType() == Ingestion.IGSTableType.ACID &&
options.getFlushMode() == FlushMode.AUTO_FLUSH_BACKGROUND) {
LOG.warn("acid table not support flushMode with AUTO_FLUSH_BACKGROUND. reset to AUTO_FLUSH_SYNC.");
options = options.toBuilder().withFlushMode(FlushMode.AUTO_FLUSH_SYNC).build();
}
ArrowTable arrowTable = new ArrowTable(igsTableMeta);
stream = new ArrowStream(this, arrowTable, options);
// try to get tablet workers with retry.
List> workers = getRouteWorkers(schemaName, tableName);
stream.getSession().initRpcConnection(workers);
} finally {
close();
}
return stream;
}
private List> getRouteWorkers(String schemaName, String tableName) throws IOException {
return getRouteWorkers(schemaName, tableName, rpcProxy);
}
private List> getRouteWorkers(String schemaName, String tableName, RpcProxy proxy) throws IOException {
// try to get tablet workers with retry.
List> workers;
boolean waitForLoaded;
int retryTime = 0;
int GET_WORKER_MAX_TIMES = 1000;
Properties properties = getClientContext().getProperties();
if (properties.get(Constant.GET_WORKER_MAX_RETRY_TIMES) != null) {
GET_WORKER_MAX_TIMES = properties.get(Constant.GET_WORKER_MAX_RETRY_TIMES) instanceof Integer ?
(Integer) properties.get(Constant.GET_WORKER_MAX_RETRY_TIMES) :
Integer.parseInt(properties.getProperty(Constant.GET_WORKER_MAX_RETRY_TIMES));
}
do {
workers = proxy.getRouteWorkers(clientContext.instanceId(), clientContext.workspace(), schemaName, tableName,
clientContext.igsConnectMode());
waitForLoaded = workers.stream().anyMatch(w -> w._1 == null || w._1.length() == 0);
if (waitForLoaded) {
LOG.info("sleep & retry to get all tablet loaded for table {}.{}. times {}", schemaName, tableName, ++retryTime);
try {
Thread.sleep(2 * 1000);
} catch (InterruptedException ite) {
throw new IOException(ite);
}
}
} while (waitForLoaded && retryTime <= GET_WORKER_MAX_TIMES);
if (waitForLoaded) {
throw new IOException(String.format("tablet loaded failed & getRouteWorkers failed reach max retry %s times.", GET_WORKER_MAX_TIMES));
}
// support user define worker address. such as privateLink or internal test.
if (clientContext.workerAddrs() != null && !clientContext.workerAddrs().isEmpty()) {
LOG.info("User define worker address {}. overwrite which get from controller.", clientContext.workerAddrs());
workers.clear();
for (Pair pair : clientContext.workerAddrs()) {
String host = pair.getFirst();
int port = pair.getSecond() != null ? pair.getSecond() : -1;
workers.add(new Tuple2<>(host, port));
}
}
return workers;
}
private Map>> initMultiStreams(MultiTable multiTable,
MultiTablesOptions options,
boolean closeParallelPool) throws IOException {
Map>> tableWorkers = new ConcurrentHashMap<>();
AtomicReference exception = new AtomicReference<>();
CountDownLatch latch = new CountDownLatch(options.getAllTableIdentifiers().size());
// init executor service to create streams of multiple tables in parallel.
// only init once.
initInternalExecutorService(options.getAllTableIdentifiers().size());
final Map threadLocalMap = new ConcurrentHashMap<>();
long startTimeMs = System.currentTimeMillis();
for (TableIdentifier identifier : options.getAllTableIdentifiers()) {
executorService.execute(() -> {
if (exception.get() != null) {
latch.countDown();
return;
}
long startInternalTimeMs = System.currentTimeMillis();
String schemaName = identifier.schemaName;
String tableName = identifier.tableName;
LOG.info("start to init table {}.{}", schemaName, tableName);
long threadId = Thread.currentThread().getId();
RpcProxy internalProxy = threadLocalMap.computeIfAbsent(threadId, id -> {
try {
return createRpcProxy();
} catch (IOException e) {
LOG.error("failed to init internal proxy with {}.{}: {}", schemaName, tableName, e);
exception.compareAndSet(null, e);
return null;
}
});
if (exception.get() != null || internalProxy == null) {
latch.countDown();
return;
}
try {
int tabletNum = options.getTabletNum(schemaName, tableName);
ArrowIGSTableMeta igsTableMeta = internalProxy.createOrGetStreamV2(clientContext.instanceId(),
clientContext.workspace(), schemaName, tableName, tabletNum);
// if we need to refresh all properties.
refreshInstanceId(igsTableMeta.getInstanceId());
multiTable.addSchema(igsTableMeta);
LOG.info("created stream for table {}.{} cost {} ms", schemaName, tableName,
System.currentTimeMillis() - startInternalTimeMs);
// try to get all workers host port.
List> workers = getRouteWorkers(schemaName, tableName, internalProxy);
tableWorkers.put(identifier, workers);
LOG.info("end to init table {}.{} cost {} ms", schemaName, tableName,
System.currentTimeMillis() - startInternalTimeMs);
} catch (Throwable t) {
LOG.error("failed to init table {}.{}: {}", schemaName, tableName, t);
exception.compareAndSet(null, t);
} finally {
latch.countDown();
}
});
}
Function function = e -> {
if (exception.get() != null) {
return new IOException(exception.get());
} else {
return e;
}
};
try {
boolean result = latch.await(10, TimeUnit.MINUTES);
if (!result) {
String errMsg = String.format("create multi stream failed to wait latch with init size %s reverse size %s.",
options.getAllTableIdentifiers().size(), latch.getCount());
LOG.error(errMsg);
throw function.apply(new IOException(errMsg));
}
} catch (InterruptedException ex) {
String errMsg = String.format("create multi stream timeout with init size %s reverse size %s.",
options.getAllTableIdentifiers().size(), latch.getCount());
LOG.error(errMsg);
throw function.apply(new IOException(errMsg, ex));
} finally {
if (closeParallelPool) {
closeInternalExecutorService();
}
// try to release all rpc proxy used in thread local.
threadLocalMap.values().stream().parallel().forEach(rpcProxy -> {
try {
rpcProxy.close(1 * 500);
} catch (IOException ioe) {
// ignore
}
});
}
LOG.info("parallel to init stream cost {} ms", System.currentTimeMillis() - startTimeMs);
// check parallel exception.
if (exception.get() != null) {
throw new IOException(exception.get());
}
return tableWorkers;
}
@Override
public MultiStream createMultiStream(MultiTablesOptions multiTablesOptions, Options options) throws IOException {
LOG.info("start to create multi stream with extra properties {}.", options.getProperties());
Preconditions.checkArgument(options.getProtocolType() == ProtocolType.V2,
"multiStream only support protocolType with V2.");
Object obj = options.getProperties().getOrDefault(Constant.MULTI_TABLE_CACHE_SIZE, 1024);
int maximumSize = obj instanceof String ? Integer.parseInt((String) obj) : (int) obj;
MultiTable multiTable = new MultiTable(maximumSize);
Map>> tableWorkers = initMultiStreams(multiTable, multiTablesOptions, true);
if (options.getFlushMode() == FlushMode.AUTO_FLUSH_BACKGROUND) {
LOG.warn("multi table not support flushMode with AUTO_FLUSH_BACKGROUND. reset to AUTO_FLUSH_SYNC.");
options = options.toBuilder().withFlushMode(FlushMode.AUTO_FLUSH_SYNC).build();
}
MultiStream multiStream = new MultiStreamImpl(this, multiTable, multiTablesOptions, options);
// try to get all workers host port.
// Deduplicate workers.
Set> workerSet = new HashSet<>();
for (List> list : tableWorkers.values()) {
workerSet.addAll(list);
}
multiStream.getSession().initRpcConnection(new ArrayList<>(workerSet));
return multiStream;
}
@Override
public MultiStream appendStream(MultiStream multiStream, String schemaName, String tableName) throws IOException {
validRpcProxyInit();
Preconditions.checkArgument(multiStream.getSession().getOptions().getProtocolType() == ProtocolType.V2,
"multiStream only support protocolType with V2.");
List> workers = null;
MultiTable multiTable = multiStream.getTable();
int tabletNum = multiStream.getMultiTablesOptions().getTabletNum(schemaName, tableName);
ArrowIGSTableMeta igsTableMeta = rpcProxy.createOrGetStreamV2(clientContext.instanceId(),
clientContext.workspace(), schemaName, tableName, tabletNum);
// if we need to refresh all properties.
refreshInstanceId(igsTableMeta.getInstanceId());
multiTable.addSchema(igsTableMeta);
// try to get tablet workers with retry.
workers = getRouteWorkers(schemaName, tableName);
// try to get all workers host port.
multiStream.getSession().initRpcConnection(workers);
return multiStream;
}
@Override
public MultiStream batchAppendStream(MultiStream multiStream, MultiTablesOptions newTableOptions) throws IOException {
MultiTable multiTable = multiStream.getTable();
Map>> tableWorkers = initMultiStreams(multiTable, newTableOptions, false);
// try to get all workers host port.
// Deduplicate workers.
Set> workerSet = new HashSet<>();
for (List> list : tableWorkers.values()) {
workerSet.addAll(list);
}
multiStream.getSession().initRpcConnection(new ArrayList<>(workerSet));
multiStream.getMultiTablesOptions().mergeFrom(newTableOptions);
return multiStream;
}
@Override
public BulkLoadStream createBulkLoadStream(String schemaName,
String tableName,
BulkLoadOptions options) throws IOException {
validRpcProxyInit();
BulkLoadMetaData metaData = rpcProxy.createBulkLoadStreamV2(
clientContext.instanceId(), clientContext.workspace(), schemaName, tableName, options);
// Refresh all properties if necessary.
refreshInstanceId(metaData.getInstanceId());
return new BulkLoadStreamImpl(this, metaData);
}
@Override
public BulkLoadStream getBulkLoadStream(String schemaName, String tableName, String streamId) throws IOException {
validRpcProxyInit();
BulkLoadMetaData metaData = rpcProxy.getBulkLoadStreamV2(
clientContext.instanceId(), clientContext.workspace(), schemaName, tableName, streamId, true);
// Refresh all properties if necessary.
refreshInstanceId(metaData.getInstanceId());
return new BulkLoadStreamImpl(this, metaData);
}
@Override
public BulkLoadMetaData getBulkLoadStreamMetaData(String schemaName, String tableName, String streamId) throws IOException {
validRpcProxyInit();
BulkLoadMetaData metaData = rpcProxy.getBulkLoadStreamV2(
clientContext.instanceId(), clientContext.workspace(), schemaName, tableName, streamId, false);
// Refresh all properties if necessary.
refreshInstanceId(metaData.getInstanceId());
return metaData;
}
private RpcProxy createRpcProxy() throws IOException {
// open rpc proxy to call rpc message.
RpcProxy proxy = RpcProxy.Builder.build(clientContext, clientContext.igsRouterMode());
try {
proxy.open();
} catch (Exception e) {
if (proxy != null) {
try {
proxy.close();
} catch (Exception ce) {
LOG.warn("fallback rpc proxy init to close rpc proxy failed.", ce);
}
}
throw e;
}
return proxy;
}
private synchronized void validRpcProxyInit() throws IOException {
if (initialized) {
return;
}
if (rpcProxy == null) {
rpcProxy = createRpcProxy();
}
initialized = true;
}
private void refreshInstanceId(long instanceId) {
if (!clientContext.getProperties().containsKey("instanceId")) {
LOG.info("reset instanceId from {} to {}", clientContext.instanceId(), instanceId);
clientContext.getProperties().put("instanceId", instanceId);
clientContext.refreshAll(clientContext);
}
}
@Override
public
CompletableFuture rpcProxyCallAsync(Ingestion.MethodEnum method, T request) throws IOException {
validRpcProxyInit();
return rpcProxy.rpcProxyCallAsync(method, request);
}
@Override
public CompletableFuture dummyValidAsync(String request) throws IOException {
validRpcProxyInit();
return rpcProxy.dummyValidAsync(request);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy