com.clickzetta.platform.bulkload.BulkLoadWriterImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
package com.clickzetta.platform.bulkload;
import com.clickzetta.platform.client.CZClient;
import com.clickzetta.platform.client.Table;
import com.clickzetta.platform.client.api.ArrowRow;
import com.clickzetta.platform.client.api.BulkLoadOperation;
import com.clickzetta.platform.client.api.BulkLoadState;
import com.clickzetta.platform.client.api.BulkLoadWriter;
import com.clickzetta.platform.client.api.Row;
import com.clickzetta.platform.common.CZException;
import com.clickzetta.platform.common.Constant;
import com.clickzetta.platform.common.Schema;
import com.google.common.base.Preconditions;
import cz.proto.ingestion.v2.IngestionV2;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.OutputFile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
class BulkLoadWriterImpl implements BulkLoadWriter {
private static final Logger LOG = LoggerFactory.getLogger(BulkLoadWriterImpl.class);
private final CZClient client;
private final BulkLoadMetaData metaData;
private final boolean useHttp;
private final int partitionId;
// File format to write.
private final FileFormatType fileFormat;
// Max file size to write.
private final long maxFileSize;
// Max number of records to write in a single file.
private final long maxFileRecordCount;
// Static partition values to write.
private final Map partitionValues;
private StagingConfig stagingConfig;
private FileIO fileIO;
// Root dir to write files.
private String location;
// File writer responsible for a single file.
private RowAppender applier;
// Total number of rows written.
private long totalNumRows = 0;
// List of finished files.
private List finishedFiles = new ArrayList<>();
// List of finished file sizes
private List finishedFileSizes = new ArrayList<>();
// File name uuid to differentiate files from parallel streams.
private String fileNameUuid = UUID.randomUUID().toString();
// Sequential file id to create file name
private int fileId = 0;
private boolean closed = false;
private boolean complexTypeRreCheck;
public BulkLoadWriterImpl(CZClient client,
BulkLoadMetaData metaData,
BulkLoadConfig config,
int partitionId) throws IOException {
Preconditions.checkArgument(metaData.getState() == BulkLoadState.CREATED,
"Failed to create BulkLoadWriter due to invalid state: " + metaData.getState());
this.client = client;
this.metaData = metaData;
this.useHttp = client.getClientContext().url().startsWith("http://");
this.partitionId = partitionId;
this.stagingConfig = config.getStagingConfig();
this.fileIO = stagingConfig.createFileIO(metaData.preferInternalEndpoint(), useHttp, metaData.getEncryptionOptions());
this.location = stagingConfig.getPath();
this.fileFormat = config.getFileFormat();
this.maxFileRecordCount = config.getMaxNumRowsPerFile();
this.maxFileSize = config.getMaxFileSizeInBytesPerFile();
this.partitionValues = PartitionSpecUtils.parsePartitionSpec(
metaData.getArrowTable().getStreamSchema(), metaData.getPartitionSpecs());
Object obj = client.getClientContext().getProperties().getOrDefault(Constant.ARROW_ROW_FORMAT_CHECK, true);
this.complexTypeRreCheck = obj instanceof String ? Boolean.parseBoolean((String) obj) : (boolean) obj;
}
@Override
public String getStreamId() {
return metaData.getStreamId();
}
@Override
public BulkLoadOperation getOperation() {
return metaData.getOperation();
}
@Override
public Schema getSchema() {
return getTable().getSchema();
}
@Override
public Table getTable() {
return metaData.getArrowTable();
}
@Override
public Optional getPartitionSpecs() {
return metaData.getPartitionSpecs();
}
@Override
public long getPartitionId() {
return partitionId;
}
@Override
public Row createRow() {
switch (getOperation()) {
case APPEND:
case OVERWRITE:
case UPSERT:
return new ArrowRow(metaData.getArrowTable(), IngestionV2.OperationType.INSERT, complexTypeRreCheck);
default:
throw new CZException("Unsupported bulk load operation " + getOperation());
}
}
@Override
public void write(Row... rows) throws IOException {
checkFileStatus();
PartitionSpecUtils.fillPartitionValues(partitionValues, rows);
applier.append(rows);
totalNumRows += rows.length;
}
@Override
public void close() throws IOException {
if (closed) {
return;
}
closeCurrentFile();
client.finishBulkLoadStreamWriterV2(
metaData.getInstanceId(),
metaData.getWorkspace(),
metaData.getSchemaName(),
metaData.getTableName(),
metaData.getStreamId(),
partitionId,
finishedFiles,
finishedFileSizes);
LOG.info("Flush bulk load stream {} partitionId {} with {} files",
metaData.getStreamId(), partitionId, finishedFiles.size());
finishedFiles.clear();
finishedFileSizes.clear();
closed = true;
}
private String getCurrentFileName() {
// remove trailing slash if exists
String path = location.endsWith("/") ? location.substring(0, location.length() - 1) : location;
return String.format("%s/%s-%06d.%s", path, fileNameUuid, fileId, fileFormat.name().toLowerCase(Locale.ENGLISH));
}
private void closeCurrentFile() throws IOException {
if (applier != null) {
applier.close();
String fileName = getCurrentFileName();
finishedFiles.add(fileName);
finishedFileSizes.add(applier.length());
fileId++;
applier = null;
// Reset staging config to force refreshing token.
fileIO.close();
fileIO = null;
stagingConfig = null;
}
}
private OutputFile createNextFile() throws IOException {
if (stagingConfig == null) {
stagingConfig = client.getBulkLoadStreamStsTokenV2(
metaData.getInstanceId(),
metaData.getWorkspace(),
metaData.getSchemaName(),
metaData.getTableName(),
metaData.getStreamId());
fileIO = stagingConfig.createFileIO(metaData.preferInternalEndpoint(), useHttp, metaData.getEncryptionOptions());
location = stagingConfig.getPath();
}
String fileName = getCurrentFileName();
return fileIO.newOutputFile(fileName);
}
private void checkFileStatus() throws IOException {
if (applier != null) {
// Check whether the current file is too full.
if (applier.length() > maxFileSize || applier.recordCount() >= maxFileRecordCount) {
closeCurrentFile();
}
}
if (applier == null) {
OutputFile outputFile = createNextFile();
applier = RowAppenderFactory.create(outputFile, metaData.getArrowTable(), fileFormat);
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy