io.github.shanqiang.sp.output.ClickhouseOutputTable Maven / Gradle / Ivy
The newest version!
package io.github.shanqiang.sp.output;
import com.clickhouse.jdbc.ClickHouseConnection;
import com.clickhouse.jdbc.ClickHouseDataSource;
import io.github.shanqiang.Threads;
import io.github.shanqiang.exception.UnknownTypeException;
import io.github.shanqiang.sp.StreamProcessing;
import io.github.shanqiang.table.Column;
import io.github.shanqiang.table.Table;
import io.github.shanqiang.table.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import static com.google.common.base.Throwables.getStackTraceAsString;
import static io.github.shanqiang.util.ScalarUtil.toDouble;
import static io.github.shanqiang.util.ScalarUtil.toInteger;
import static io.github.shanqiang.util.ScalarUtil.toLong;
import static io.github.shanqiang.util.ScalarUtil.toStr;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor;
public class ClickhouseOutputTable extends AbstractOutputTable {
private static final Logger logger = LoggerFactory.getLogger(ClickhouseOutputTable.class);
private final String jdbcUrl;
private final String httpPort;
private final String tableName;
private final String userName;
private final String password;
private final String clusterName;
private final String databaseName;
private final int maxRetryTimes;
private final int multiple;
private final int batchSize;
private final long flushInterval;
private final String ttl_expr;
protected final Map columnTypeMap;
private final String insertPrefix;
private final String engine;
private final boolean replica;
private final String storagePolicy;
private final String[] indexColumns;
private final String timeColumn;
private final ScheduledExecutorService partitionsDetector;
private Map shardMap = new ConcurrentHashMap<>();
private final Map> threadMap = new ConcurrentHashMap<>();
public ClickhouseOutputTable(
int multiple,
String jdbcUrl,
String userName,
String password,
String clusterName,
String databaseName,
String tableName,
String[] indexColumns,
String timeColumn,
Map columnTypeMap) throws IOException {
this(
multiple,
40000,
Duration.ofSeconds(10),
jdbcUrl,
userName,
password,
clusterName,
databaseName,
tableName,
indexColumns,
timeColumn,
1,
"30 day",
"ReplicatedMergeTree",
"ssd_hdd",
true,
columnTypeMap
);
}
/**
* @param multiple 开shard数的multiple倍的线程每个线程一个连接写Clickhouse
* @param batchSize
* @param jdbcUrl
* @param userName
* @param password
* @param clusterName
* @param databaseName
* @param tableName
* @param indexColumns
* @param timeColumn
* @param maxRetryTimes
* @param ttl_expr
* @param engine
* @param storagePolicy
* @param replica
* @param columnTypeMap
* @throws IOException
*/
public ClickhouseOutputTable(
int multiple,
int batchSize,
Duration flushInterval,
String jdbcUrl,
String userName,
String password,
String clusterName,
String databaseName,
String tableName,
String[] indexColumns,
String timeColumn,
int maxRetryTimes,
String ttl_expr,
String engine,
String storagePolicy,
boolean replica,
Map columnTypeMap) throws IOException {
super(10, "|ClickhouseOutputTable|" + tableName);
this.multiple = multiple;
this.jdbcUrl = requireNonNull(jdbcUrl);
this.httpPort = jdbcUrl.split(",", 2)[0].split(":", 2)[1];
this.userName = requireNonNull(userName);
this.password = requireNonNull(password);
this.clusterName = requireNonNull(clusterName);
this.databaseName = requireNonNull(databaseName);
this.tableName = requireNonNull(tableName);
this.indexColumns = indexColumns;
this.timeColumn = timeColumn;
this.maxRetryTimes = maxRetryTimes;
this.batchSize = batchSize;
this.flushInterval = requireNonNull(flushInterval).toMillis();
this.ttl_expr = ttl_expr;
this.engine = engine;
this.storagePolicy = storagePolicy;
this.replica = replica;
this.columnTypeMap = requireNonNull(columnTypeMap);
this.insertPrefix = "insert into " + databaseName + "." + tableName;
this.partitionsDetector = newSingleThreadScheduledExecutor(Threads.threadsNamed("shards_detector"));
createTable();
}
private ClickHouseConnection connect() throws SQLException {
String connString = format("jdbc:ch://%s/%s?health_check_interval=5000&load_balancing_policy=roundRobin&failover=2",
jdbcUrl, databaseName);
ClickHouseDataSource ds = new ClickHouseDataSource(connString);
ClickHouseConnection conn = ds.getConnection(userName, password);
return conn;
}
private ClickHouseConnection connect(String ip) throws SQLException {
String connString = format("jdbc:ch://%s:%s/%s?health_check_interval=5000&failover=2", ip, httpPort, databaseName);
ClickHouseDataSource ds = new ClickHouseDataSource(connString);
ClickHouseConnection conn = ds.getConnection(userName, password);
return conn;
}
private static String toDorisType(Type type) {
switch (type) {
case VARBYTE:
case BIGDECIMAL:
return "String";
case INT:
return "Int32";
case BIGINT:
return "Int64";
case DOUBLE:
return "Double";
default:
throw new UnknownTypeException(null == type ? "null" : type.name());
}
}
private void createTable() {
StringBuilder fieldsBuilder = new StringBuilder();
for (String columnName : columnTypeMap.keySet()) {
Type type = columnTypeMap.get(columnName);
if (columnName.equals(timeColumn)) {
//时间列用作ttl设置,需要设置成clickhouse中的Datetime类型
fieldsBuilder.append("`").append(columnName).append("`").append(" ").append("Datetime").append(",");
} else {
fieldsBuilder.append("`").append(columnName).append("`").append(" ").append(toDorisType(type)).append(",");
}
}
String fieldsSchema = fieldsBuilder.toString();
if (fieldsSchema.length() > 0) {
fieldsSchema = fieldsSchema.substring(0, fieldsSchema.length() - 1);
}
String createDatabaseSql = format("CREATE DATABASE IF NOT EXISTS %s ON CLUSTER %s", databaseName, clusterName);
/* 表不存在则创建表 */
String createLocalTableSql = format("CREATE TABLE IF NOT EXISTS %s ON CLUSTER %s (%s) ",
tableName, clusterName, fieldsSchema);
if (replica == true) {
//有副本的集群需要设置副本配置,配置会保存在zookeeper
createLocalTableSql += format("ENGINE = %s('/clickhouse/tables/{shard}/%s/%s', '{replica}') " +
"ORDER BY (%s) " +
"TTL %s + INTERVAL %s " +
"SETTINGS index_granularity = 8192",
engine,
databaseName,
tableName,
String.join(",", indexColumns),
timeColumn,
ttl_expr,
storagePolicy);
} else {
//没有副本的情况建表语句
createLocalTableSql += format("ENGINE = %s " +
"ORDER BY (%s) " +
"TTL %s + INTERVAL %s " +
"SETTINGS index_granularity = 8192",
engine,
String.join(",", indexColumns),
timeColumn,
ttl_expr,
storagePolicy);
}
if (storagePolicy != null || storagePolicy.equals("")) {
createLocalTableSql += format(", storage_policy = '%s'", storagePolicy);
}
logger.info(">>> create local table sql: " + createLocalTableSql);
String createDistributedTableSql = format("CREATE TABLE IF NOT EXISTS %s ON CLUSTER %s (%s) " +
"ENGINE = Distributed('%s', '%s', '%s', rand())",
tableName + "_all",
clusterName,
fieldsSchema,
clusterName,
databaseName,
tableName
);
logger.info(">>> create distributed table sql: " + createDistributedTableSql);
int retryCount = 0;
while (retryCount < maxRetryTimes) {
try {
try (ClickHouseConnection connection = connect()) {
PreparedStatement preparedStatement = connection.prepareStatement(createDatabaseSql);
preparedStatement.execute();
preparedStatement = connection.prepareStatement(createLocalTableSql);
preparedStatement.execute();
preparedStatement = connection.prepareStatement(createDistributedTableSql);
preparedStatement.execute();
}
return;
} catch (Throwable t) {
logger.error(">>> create table error: {}, has retried {} times", getStackTraceAsString(t), retryCount);
retryCount++;
if (retryCount >= maxRetryTimes) {
throw new IllegalStateException(t);
}
try {
Thread.sleep(1 * 1000L);
} catch (Throwable t2) {
logger.error("retry sleep error!", t2);
}
}
}
throw new IllegalStateException(">>> create clickhouse table error for " + tableName + ", we have tried " + maxRetryTimes + " times");
}
private void setValues(PreparedStatement preparedStatement, List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy