com.clickzetta.platform.tools.IngestLonghaul Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
package com.clickzetta.platform.tools;
import com.clickzetta.platform.client.Table;
import com.clickzetta.platform.client.api.*;
import com.clickzetta.platform.common.Type;
import com.clickzetta.platform.tools.datagen.DataGen;
import com.clickzetta.platform.util.JsonParser;
import com.google.common.base.Preconditions;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.*;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.*;
public class IngestLonghaul {
private static final Logger LOG = LoggerFactory.getLogger(IngestLonghaul.class);
public static LonghaulConf parameterParser(String[] args) throws IOException {
Options options = new Options();
options.addOption("h", "help", false, "Print help.");
options.addOption(Option.builder("c").longOpt("conf").type(String.class)
.hasArg(true).required().desc("The json conf user define.")
.build());
options.addOption(Option.builder("s").longOpt("host").type(String.class)
.hasArg(true).desc("The hostname/ip of the ingestion server")
.build());
options.addOption(Option.builder("p").longOpt("port").type(Integer.class)
.hasArg(true).desc("The tcp listening port of ingestion server.")
.build());
options.addOption(Option.builder("tn").longOpt("tableName").type(String.class)
.hasArg(true).desc("If your tableName is passed from script,you can specify it here.")
.build());
options.addOption(Option.builder("u").longOpt("url").type(String.class)
.hasArg(true).desc("The stream url for gateway conf.")
.build());
HelpFormatter hf = new HelpFormatter();
hf.setWidth(110);
CommandLineParser parser = new DefaultParser();
try {
CommandLine commandLine = parser.parse(options, args);
if (commandLine.hasOption('h')) {
hf.printHelp("WrapExample", options, true);
System.exit(1);
}
boolean exist = commandLine.hasOption("c");
if (exist) {
String confPath = commandLine.getOptionValue("c");
LonghaulConf conf = new JsonParser().parserLonghaulConf(confPath);
if (commandLine.hasOption("s")) {
conf.crlHost = commandLine.getOptionValue("s");
}
if (commandLine.hasOption("p")) {
conf.crlPort = Integer.parseInt(commandLine.getOptionValue("p"));
}
if (commandLine.hasOption("tn")) {
conf.tableName = commandLine.getOptionValue("tn");
}
if (commandLine.hasOption("u")) {
conf.streamUrl = commandLine.getOptionValue("u");
}
return conf;
}
// not reach here if all right.
throw new ParseException("The Conf File parse fail or not set.");
} catch (ParseException e) {
hf.printHelp("WrapExample", options, true);
throw new IOException(e);
}
}
private static Map fieldMapAndSchemaCheck(Table table, List fieldList) {
Map fieldMap = new LinkedHashMap<>();
for (String fieldName : fieldList) {
Preconditions.checkArgument(table.getSchema().hasColumn(fieldName),
"User field [%s] is not found in schema.", fieldName);
Type type = table.getSchema().getColumn(fieldName).getType();
fieldMap.put(fieldName, type);
LOG.info("get field {} with type {}", fieldName, type);
}
return fieldMap;
}
private static Object genDateWithType(DataGen dataGen, Type dataType) {
Object value = null;
switch (dataType) {
case BOOL:
value = dataGen.nextBoolean();
break;
case INT8:
value = dataGen.nextByte();
break;
case INT16:
value = dataGen.nextShort();
break;
case INT32:
case DATE:
value = dataGen.nextInt();
break;
case INT64:
value = dataGen.nextLong();
break;
case FLOAT:
value = dataGen.nextFloat();
break;
case DOUBLE:
value = dataGen.nextDouble();
break;
case STRING:
case VARCHAR:
value = dataGen.nextString();
break;
case BINARY:
value = dataGen.nextBinary();
break;
case DECIMAL:
value = dataGen.nextBigDecimal();
break;
case UNIXTIME_MICROS:
value = dataGen.nextTimestamp();
break;
default:
throw new UnsupportedOperationException("Unsupported dataType: " + dataType);
}
return value;
}
public static void main(String[] args) throws Exception {
LonghaulConf conf = parameterParser(args);
Preconditions.checkArgument(!StringUtils.isEmpty(conf.crlHost) || !StringUtils.isEmpty(conf.streamUrl),
"crlHost or streamUrl can not be empty");
if (!StringUtils.isEmpty(conf.crlHost)) {
Preconditions.checkArgument(conf.crlPort > 0, "crlPort can not less than 0");
}
Preconditions.checkArgument(!StringUtils.isEmpty(conf.schemaName), "schemaName can not be empty");
Preconditions.checkArgument(!StringUtils.isEmpty(conf.tableName), "tableName can not be empty");
Preconditions.checkArgument(!StringUtils.isEmpty(conf.operator), "operator can not be empty");
Preconditions.checkArgument(!CollectionUtils.isEmpty(conf.fieldList), "filePath can not be empty");
Preconditions.checkArgument(!StringUtils.isEmpty(conf.dataGenMode), "dataGen can not be empty");
Preconditions.checkArgument((conf.dataGenTimeMs != null || conf.dataGenCount != null),
"dataGenTimeMs & dataGenCount can not be both empty");
ClientBuilder builder = Client.getBuilder();
if (!StringUtils.isEmpty(conf.crlHost) && conf.crlPort != 0) {
builder.crlAddr(conf.crlHost, conf.crlPort);
}
if (conf.instanceId != null) {
builder.instanceId(conf.getInstanceId());
}
if (conf.workspace != null) {
builder.workspace(conf.getWorkspace());
}
if (!StringUtils.isEmpty(conf.getStreamUrl())) {
builder.streamUrl(conf.getStreamUrl());
}
if (conf.getProperties() != null) {
builder.properties(new Properties() {{
putAll(conf.getProperties());
}});
}
Client client = builder.build();
conf.options.setErrorTypeHandler(new ErrorTypeHandler() {
@Override
public void onSuccess(Message message) {
LOG.info("mutate data with batch id {} cost {} ms successfully.", message.getBatchId(),
System.currentTimeMillis() - message.getTimestamp());
}
@Override
public void onFailure(Message message, Throwable e) {
LOG.error(String.format("mutate data with batch id %s failed.", message.getBatchId()), e);
}
@Override
public boolean getTerminateIfFailure() {
return false;
}
});
com.clickzetta.platform.client.api.Options options = conf.options.toCZSessionOptions();
Stream stream = client.createStream(conf.schemaName, conf.tableName, conf.getTabletNum(), options);
Map fieldMap = fieldMapAndSchemaCheck(stream.getTable(), conf.fieldList);
DataGen dataGen = DataGen.build(conf.dataGenMode);
long totalTimestamp = System.currentTimeMillis() + (conf.dataGenTimeMs != null ? conf.dataGenTimeMs : 0);
long totalWriteNum = conf.dataGenCount != null ? conf.dataGenCount : 0;
Thread.sleep(1);
// gen data with target size or time.
long startTime = System.nanoTime();
long endTime = System.nanoTime();
long dataGenSize = 0;
LOG.info("start to feed data with target totalCnt {} until totalTime {}.", totalWriteNum, new Timestamp(totalTimestamp));
while (dataGenSize < totalWriteNum || System.currentTimeMillis() < totalTimestamp) {
Row op = null;
switch (conf.operator) {
case "insert":
op = stream.createInsertRow();
break;
case "update":
op = stream.createUpdateRow();
break;
case "upsert":
op = stream.createUpsertRow();
break;
case "delete":
op = stream.createDeleteRow();
break;
default:
throw new UnsupportedOperationException(String.format("not support operator with [%s] . " +
"use insert|update|upsert|delete instead.", conf.operator));
}
for (Map.Entry entry : fieldMap.entrySet()) {
try {
Object value = genDateWithType(dataGen, entry.getValue());
op.setValue(entry.getKey(), value);
} catch (Throwable t) {
throw new RuntimeException(t);
}
}
stream.apply(op);
dataGenSize++;
if (conf.dataGenInternalMs != null && conf.dataGenInternalMs > 0) {
Thread.sleep(conf.dataGenInternalMs);
}
if ((conf.dataGenCount != null && dataGenSize >= totalWriteNum) || (conf.dataGenTimeMs != null && System.currentTimeMillis() >= totalTimestamp)) {
LOG.info("end to feed data with actual Cnt {} until time {}.", dataGenSize, new Timestamp(System.currentTimeMillis()));
endTime = System.nanoTime();
break;
}
}
stream.close();
if (conf.isReleaseResource()) {
LOG.info("client call release resource with target stream");
client.releaseResource(stream);
}
client.close();
long totalNaNoTime = endTime - startTime;
LOG.info("end to feed data size: {} cost: {} ms", dataGenSize, totalNaNoTime / 1000 / 1000);
LOG.info("data tps: {} r/s", dataGenSize / (totalNaNoTime / 1000.0 / 1000.0 / 1000.0));
Thread.sleep(1 * 1000);
LOG.info("finish success");
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy