io.kyligence.kap.clickhouse.job.DataLoader Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.kyligence.kap.clickhouse.job;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.kylin.metadata.cube.model.LayoutEntity;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.guava30.shaded.common.base.Preconditions;
import org.apache.kylin.guava30.shaded.common.collect.ImmutableList;
import io.kyligence.kap.secondstorage.ColumnMapping;
import io.kyligence.kap.secondstorage.SecondStorageNodeHelper;
import io.kyligence.kap.secondstorage.ddl.exp.ColumnWithType;
import io.kyligence.kap.secondstorage.metadata.TableData;
import io.kyligence.kap.secondstorage.util.SecondStorageDateUtils;
import lombok.Getter;
import lombok.val;
import lombok.extern.slf4j.Slf4j;
import org.apache.kylin.metadata.model.NDataModel;
@Slf4j
public class DataLoader {
static String clickHouseType(DataType dt) {
switch (dt.getName()) {
case DataType.BOOLEAN:
return "UInt8";
case DataType.BYTE:
case DataType.TINY_INT:
return "Int8";
case DataType.SHORT:
case DataType.SMALL_INT:
return "Int16";
case DataType.INT:
case DataType.INT4:
case DataType.INTEGER:
return "Int32";
case DataType.LONG:
case DataType.LONG8:
case DataType.BIGINT:
return "Int64";
case DataType.FLOAT:
return "Float32";
case DataType.DOUBLE:
return "Float64";
case DataType.DECIMAL:
case DataType.NUMERIC:
return String.format(Locale.ROOT, "Decimal(%d,%d)", dt.getPrecision(), dt.getScale());
case DataType.VARCHAR:
case DataType.CHAR:
case DataType.STRING:
return "String";
case DataType.DATE:
return "Date";
case DataType.TIMESTAMP:
case DataType.DATETIME:
return "DateTime";
case DataType.TIME:
case DataType.REAL:
case DataType.ANY_STR:
case DataType.BINARY:
case DataType.ARRAY:
throw new UnsupportedOperationException("will support");
default:
}
throw new UnsupportedOperationException("");
}
static List columns(Map columnTypeMap, LayoutEntity layout, String partitionCol, boolean addPrefix) {
List cols = new ArrayList<>();
layout.getOrderedDimensions()
.forEach((k, v) -> {
String colType = columnTypeMap.get(getPrefixColumn(String.valueOf(k)));
cols.add(new ColumnWithType(
addPrefix ? getPrefixColumn(String.valueOf(k)) : String.valueOf(k),
colType == null ? clickHouseType(v.getType()) : colType,
// partition column must not be null
colType == null && v.getColumnDesc().isNullable() && !String.valueOf(k).equals(partitionCol),
true));
});
return cols;
}
static List orderColumns(LayoutEntity layout, List orderCols, boolean addPrefix) {
val orderedDimensions = layout.getOrderedDimensions();
return orderCols.stream().filter(orderedDimensions::containsKey)
.map(column -> addPrefix ? getPrefixColumn(String.valueOf(column)) : String.valueOf(column))
.collect(Collectors.toList());
}
static String getPrefixColumn(String col) {
return ColumnMapping.kapColumnToSecondStorageColumn(col);
}
private final String executableId;
private final String database;
private final Engine tableEngine;
private final boolean isIncremental;
private final List loadInfoBatch;
private final LoadContext loadContext;
private final List shardLoaders;
private final Map> singleFileLoaderPerNode;
@Getter
private final LoadContext.CompletedSegmentKeyUtil segmentKey;
@Getter
private final String segmentId;
public DataLoader(String executableId,
String database,
Engine tableEngine,
boolean isIncremental,
List loadInfoBatch,
LoadContext loadContext) {
this.executableId = executableId;
this.database = database;
this.tableEngine = tableEngine;
this.isIncremental = isIncremental;
this.loadInfoBatch = loadInfoBatch;
this.loadContext = loadContext;
int totalJdbcNum = loadInfoBatch.stream().mapToInt(item -> item.getNodeNames().length).sum();
this.shardLoaders = new ArrayList<>(totalJdbcNum + 2);
this.segmentId = loadInfoBatch.get(0).getSegmentId();
this.segmentKey = new LoadContext.CompletedSegmentKeyUtil(loadInfoBatch.get(0).getLayout().getId());
this.singleFileLoaderPerNode = new HashMap<>();
toSingleFileLoaderPerNode();
}
public List getShardLoaders() {
return shardLoaders == null ? Collections.emptyList() : shardLoaders;
}
public Map> getSingleFileLoaderPerNode() {
return singleFileLoaderPerNode;
}
private void toSingleFileLoaderPerNode() {
// skip segment when committed
if (loadContext.getHistorySegments(segmentKey).contains(this.segmentId)) {
return;
}
// After new shard is created, JDBC Connection is Ready.
for (val loadInfo : loadInfoBatch) {
String[] nodeNames = loadInfo.getNodeNames();
Preconditions.checkArgument(nodeNames.length == loadInfo.getShardFiles().size());
for (int idx = 0; idx < nodeNames.length; idx++) {
final String nodeName = nodeNames[idx];
final List listParquet = loadInfo.getShardFiles().get(idx);
val builder = ShardLoader.ShardLoadContext.builder().nodeName(nodeName)
.jdbcURL(SecondStorageNodeHelper.resolve(nodeName)).executableId(executableId)
.segmentId(loadInfo.segmentId).database(database).layout(loadInfo.getLayout())
.parquetFiles(listParquet).tableEngine(tableEngine).destTableName(loadInfo.getTargetTable())
.loadContext(loadContext).tableEntity(loadInfo.getTableEntity());
if (isIncremental) {
String partitionColName = loadInfo.model.getPartitionDesc().getPartitionDateColumn();
val dateCol = loadInfo.model.getAllNamedColumns().stream()
.filter(column -> column.getAliasDotColumn().equals(partitionColName)
&& NDataModel.ColumnStatus.DIMENSION.equals(column.getStatus()))
.findFirst().orElseThrow(
() -> new IllegalStateException("can't find partition column " + partitionColName));
Preconditions.checkState(loadInfo.getLayout().getColumns().stream()
.anyMatch(col -> col.getAliasDotName().equals(dateCol.getAliasDotColumn())));
Preconditions.checkArgument(loadInfo.segment.getSegRange().getStart() instanceof Long);
builder.partitionFormat(loadInfo.model.getPartitionDesc().getPartitionDateFormat())
.partitionColumn(Objects.toString(dateCol.getId()))
.targetPartitions(SecondStorageDateUtils.splitByDay(loadInfo.segment.getSegRange()));
}
val needDropTable = getNeedDropTable(loadInfo);
builder.needDropTable(needDropTable);
builder.needDropPartition(getNeedDropPartition(loadInfo, needDropTable));
ShardLoader.ShardLoadContext context = builder.build();
List clickhouseLoadFileLoads = singleFileLoaderPerNode.computeIfAbsent(nodeName, k -> new ArrayList<>());
ShardLoader shardLoader = new ShardLoader(context);
clickhouseLoadFileLoads.addAll(shardLoader.toSingleFileLoader());
shardLoaders.add(shardLoader);
}
}
}
public Map> getLoadCommitDropPartitions() {
Map> dropPartitions = new HashMap<>();
for (ShardLoader shardLoader : this.getShardLoaders()) {
ImmutableList.Builder needDropPartitionTableBuilder = ImmutableList.builder();
needDropPartitionTableBuilder.add(shardLoader.getDestTableName());
if (shardLoader.getNeedDropPartition() != null) {
needDropPartitionTableBuilder.addAll(shardLoader.getNeedDropPartition());
}
val needDropPartitionTables = needDropPartitionTableBuilder.build();
if (shardLoader.isIncremental()) {
List dropPartitionShard = dropPartitions
.computeIfAbsent(shardLoader.getNodeName(), k -> new ArrayList<>());
dropPartitionShard.addAll(shardLoader.getTargetPartitions().stream().map(
partition -> new ClickhouseLoadPartitionDrop(needDropPartitionTables, partition, shardLoader))
.collect(Collectors.toList()));
}
}
return dropPartitions;
}
public Map> getLoadCommitMovePartitions() throws SQLException {
Map> movePartitions = new HashMap<>();
for (ShardLoader shardLoader : this.getShardLoaders()) {
List movePartitionNode = movePartitions
.computeIfAbsent(shardLoader.getNodeName(), k -> new ArrayList<>());
if (shardLoader.isIncremental()) {
movePartitionNode.addAll(shardLoader.getInsertTempTablePartition().stream()
.map(partition -> new ClickhouseLoadPartitionCommit(partition, shardLoader))
.collect(Collectors.toList()));
} else {
movePartitionNode.add(new ClickhouseLoadPartitionCommit(null, shardLoader));
}
}
return movePartitions;
}
public Map> getLoadCommitExceptionPartitions() throws SQLException {
Map> movePartitions = new HashMap<>();
for (ShardLoader shardLoader : this.getShardLoaders()) {
if (shardLoader.isIncremental()) {
List dropPartitionShard = movePartitions
.computeIfAbsent(shardLoader.getNodeName(), k -> new ArrayList<>());
dropPartitionShard.addAll(shardLoader.getInsertTempTablePartition().stream()
.map(partition -> new ClickhouseLoadPartitionDrop(
Collections.singletonList(shardLoader.getDestTableName()), partition, shardLoader))
.collect(Collectors.toList()));
}
}
return movePartitions;
}
private Set getNeedDropTable(LoadInfo loadInfo) {
return loadInfo.containsOldSegmentTableData.stream().filter(tableData -> {
val all = tableData.getAllSegments();
return tableData.getLayoutID() != loadInfo.getLayout().getId() && all.size() == 1 && all.contains(loadInfo.oldSegmentId);
}).map(TableData::getTable).collect(Collectors.toSet());
}
private Set getNeedDropPartition(LoadInfo loadInfo, Set needDropTable) {
return loadInfo.containsOldSegmentTableData.stream()
.filter(tableData -> tableData.getLayoutID() != loadInfo.getLayout().getId() && !needDropTable.contains(tableData.getTable()))
.map(TableData::getTable)
.collect(Collectors.toSet());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy