io.kyligence.kap.clickhouse.job.ShardLoader Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.kyligence.kap.clickhouse.job;
import static io.kyligence.kap.clickhouse.job.DataLoader.columns;
import static io.kyligence.kap.clickhouse.job.DataLoader.getPrefixColumn;
import static io.kyligence.kap.clickhouse.job.DataLoader.orderColumns;
import java.sql.Date;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.metadata.cube.model.LayoutEntity;
import org.apache.kylin.metadata.cube.model.NDataflow;
import org.apache.kylin.metadata.cube.model.NDataflowManager;
import org.apache.kylin.metadata.model.NDataModel;
import io.kyligence.kap.clickhouse.ClickHouseNameUtil;
import io.kyligence.kap.clickhouse.ddl.ClickHouseCreateTable;
import io.kyligence.kap.clickhouse.ddl.ClickHouseRender;
import io.kyligence.kap.clickhouse.ddl.TableSetting;
import io.kyligence.kap.clickhouse.parser.DescQueryParser;
import io.kyligence.kap.clickhouse.parser.ExistsQueryParser;
import io.kyligence.kap.secondstorage.ddl.AlterTable;
import io.kyligence.kap.secondstorage.ddl.CreateDatabase;
import io.kyligence.kap.secondstorage.ddl.Desc;
import io.kyligence.kap.secondstorage.ddl.DropTable;
import io.kyligence.kap.secondstorage.ddl.ExistsTable;
import io.kyligence.kap.secondstorage.ddl.Select;
import io.kyligence.kap.secondstorage.ddl.SkippingIndexChooser;
import io.kyligence.kap.secondstorage.ddl.exp.ColumnWithAlias;
import io.kyligence.kap.secondstorage.ddl.exp.TableIdentifier;
import io.kyligence.kap.secondstorage.metadata.TableEntity;
import lombok.Builder;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
@Getter
@Slf4j
public class ShardLoader {
private final ClickHouse clickHouse;
private final String database;
private final ClickHouseRender render = new ClickHouseRender();
private final Engine tableEngine;
private final LayoutEntity layout;
private final TableEntity tableEntity;
private final String partitionColumn;
private final String partitionFormat;
private final List parquetFiles;
private final String destTableName;
private final String insertTempTableName;
private final String destTempTableName;
private final String likeTempTableName;
private final boolean incremental;
private final List targetPartitions;
private final List committedPartition = new ArrayList<>();
private final Set needDropPartition;
private final Set needDropTable;
private final String jdbcURL;
private final String nodeName;
private final LoadContext loadContext;
public ShardLoader(ShardLoadContext context) {
this.clickHouse = new ClickHouse(context.jdbcURL);
this.database = context.database;
this.tableEngine = context.tableEngine;
this.layout = context.layout;
this.tableEntity = context.tableEntity;
this.parquetFiles = context.parquetFiles;
this.partitionColumn = context.partitionColumn;
this.partitionFormat = context.partitionFormat;
this.incremental = partitionColumn != null;
this.destTableName = context.destTableName;
this.insertTempTableName = ClickHouseNameUtil.getInsertTempTableName(context.executableId, context.segmentId,
context.layout.getId());
this.destTempTableName = ClickHouseNameUtil.getDestTempTableName(context.executableId, context.segmentId,
context.layout.getId());
this.likeTempTableName = ClickHouseNameUtil.getLikeTempTableName(context.executableId, context.segmentId,
context.layout.getId());
this.targetPartitions = context.targetPartitions;
this.needDropPartition = context.needDropPartition;
this.needDropTable = context.needDropTable;
this.jdbcURL = context.jdbcURL;
this.nodeName = context.nodeName;
this.loadContext = context.loadContext;
}
public void createDestTableIgnoreExist() throws SQLException {
final ClickHouseCreateTable likeTable = ClickHouseCreateTable.createCKTableIgnoreExist(database, destTableName)
.likeTable(database, insertTempTableName);
clickHouse.apply(likeTable.toSql(render));
}
public List getInsertTempTablePartition() throws SQLException {
Select queryPartition = new Select(TableIdentifier.table(database, insertTempTableName))
.column(ColumnWithAlias.builder().name(getPrefixColumn(partitionColumn)).distinct(true).build());
return clickHouse.queryPartition(queryPartition.toSql(render), partitionFormat);
}
public void setup(boolean newJob) throws SQLException {
//1. prepare database
final CreateDatabase createDb = CreateDatabase.createDatabase(database);
clickHouse.apply(createDb.toSql(render));
//2. desc dest table
int existCode = clickHouse.query(new ExistsTable(TableIdentifier.table(database, destTableName)).toSql(), ExistsQueryParser.EXISTS).get(0);
Map columnTypeMap = new HashMap<>();
if (existCode == 1) {
columnTypeMap = clickHouse.query(new Desc(TableIdentifier.table(database, destTableName)).toSql(render), DescQueryParser.Desc).stream()
.collect(Collectors.toMap(ClickHouseSystemQuery.DescTable::getColumn, ClickHouseSystemQuery.DescTable::getDatatype));
}
//3. prepare temp table
if (newJob) {
createTable(insertTempTableName, columnTypeMap, layout, partitionColumn, true);
}
createTable(likeTempTableName, columnTypeMap, layout, partitionColumn, false);
}
public List toSingleFileLoader() {
List loaders = new ArrayList<>(parquetFiles.size());
for (int index = 0; index < parquetFiles.size(); index++) {
String sourceTable = ClickHouseNameUtil.getFileSourceTableName(insertTempTableName, index);
loaders.add(new ClickhouseLoadFileLoad(this, sourceTable, parquetFiles.get(index)));
}
return loaders;
}
@Builder
public static class ShardLoadContext {
String executableId;
String jdbcURL;
String database;
LayoutEntity layout;
TableEntity tableEntity;
List parquetFiles;
String destTableName;
Engine tableEngine;
String partitionColumn;
String partitionFormat;
List targetPartitions;
Set needDropPartition;
Set needDropTable;
String segmentId;
String nodeName;
LoadContext loadContext;
}
public void cleanUp(boolean isPaused) throws SQLException {
if (!isPaused) {
dropTable(insertTempTableName);
}
dropTable(destTempTableName);
dropTable(likeTempTableName);
if (!isPaused && needDropTable != null) {
for (String table : needDropTable) {
dropTable(table);
}
}
}
public void cleanUpQuietly(boolean isPaused) {
try {
this.cleanUp(isPaused);
} catch (SQLException e) {
log.error("clean temp table on {} failed.", clickHouse.getPreprocessedUrl(), e);
}
}
private void createTable(String table, Map columnTypeMap, LayoutEntity layout, String partitionBy,
boolean addPrefix) throws SQLException {
KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
NDataModel model = getLayout().getModel();
NDataflow dataflow = NDataflowManager.getInstance(kylinConfig, model.getProject())
.getDataflow(getLayout().getModel().getId());
dropTable(table);
final ClickHouseCreateTable mergeTable = ClickHouseCreateTable.createCKTable(database, table)
.columns(columns(columnTypeMap, layout, partitionBy, addPrefix))
.orderBy(orderColumns(layout, tableEntity.getPrimaryIndexColumns(), addPrefix))
.partitionBy(addPrefix && partitionBy != null ? getPrefixColumn(partitionBy) : partitionBy)
.engine(Engine.DEFAULT)
.tableSettings(TableSetting.NON_REPLICATED_DEDUPLICATION_WINDOW,
String.valueOf(KylinConfig.getInstanceFromEnv().getSecondStorageLoadDeduplicationWindow()))
.tableSettings(TableSetting.ALLOW_NULLABLE_KEY,
dataflow.getConfig().getSecondStorageIndexAllowNullableKey() ? "1" : "0");
clickHouse.apply(mergeTable.toSql(render));
if (addPrefix && CollectionUtils.isNotEmpty(tableEntity.getSecondaryIndexColumns())) {
addSkippingIndex(table, layout, tableEntity.getSecondaryIndexColumns());
}
}
private void addSkippingIndex(String table, LayoutEntity layoutEntity, Set secondaryIndexColumns)
throws SQLException {
NDataModel model = layoutEntity.getModel();
KylinConfig modelConfig = NDataflowManager.getInstance(KylinConfig.getInstanceFromEnv(), model.getProject())
.getDataflow(model.getId()).getConfig();
int granularity = modelConfig.getSecondStorageSkippingIndexGranularity();
AlterTable alterTable;
TableIdentifier tableIdentifier = TableIdentifier.table(database, table);
for (Integer col : secondaryIndexColumns) {
String columnName = getPrefixColumn(String.valueOf(col));
String name = ClickHouseNameUtil.getSkippingIndexName(destTableName, columnName);
String expr = SkippingIndexChooser
.getSkippingIndexType(layoutEntity.getOrderedDimensions().get(col).getType()).toSql(modelConfig);
alterTable = new AlterTable(tableIdentifier,
new AlterTable.ManipulateIndex(name, columnName, expr, granularity));
clickHouse.apply(alterTable.toSql(render));
}
}
private void dropTable(String table) throws SQLException {
final String dropSQL = DropTable.dropTable(database, table).toSql(render);
clickHouse.apply(dropSQL);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy