All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clickzetta.platform.tools.IngestLonghaul Maven / Gradle / Ivy

There is a newer version: 2.0.0
Show newest version
package com.clickzetta.platform.tools;

import com.clickzetta.platform.client.Table;
import com.clickzetta.platform.client.api.*;
import com.clickzetta.platform.common.Type;
import com.clickzetta.platform.tools.datagen.DataGen;
import com.clickzetta.platform.util.JsonParser;
import com.google.common.base.Preconditions;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.*;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.sql.Timestamp;
import java.util.*;

public class IngestLonghaul {

  private static final Logger LOG = LoggerFactory.getLogger(IngestLonghaul.class);

  public static LonghaulConf parameterParser(String[] args) throws IOException {
    Options options = new Options();
    options.addOption("h", "help", false, "Print help.");
    options.addOption(Option.builder("c").longOpt("conf").type(String.class)
        .hasArg(true).required().desc("The json conf user define.")
        .build());

    options.addOption(Option.builder("s").longOpt("host").type(String.class)
        .hasArg(true).desc("The hostname/ip of the ingestion server")
        .build());

    options.addOption(Option.builder("p").longOpt("port").type(Integer.class)
        .hasArg(true).desc("The tcp listening port of ingestion server.")
        .build());

    options.addOption(Option.builder("tn").longOpt("tableName").type(String.class)
        .hasArg(true).desc("If your tableName is passed from script,you can specify it here.")
        .build());

    options.addOption(Option.builder("u").longOpt("url").type(String.class)
        .hasArg(true).desc("The stream url for gateway conf.")
        .build());

    HelpFormatter hf = new HelpFormatter();
    hf.setWidth(110);
    CommandLineParser parser = new DefaultParser();
    try {
      CommandLine commandLine = parser.parse(options, args);
      if (commandLine.hasOption('h')) {
        hf.printHelp("WrapExample", options, true);
        System.exit(1);
      }

      boolean exist = commandLine.hasOption("c");
      if (exist) {
        String confPath = commandLine.getOptionValue("c");
        LonghaulConf conf = new JsonParser().parserLonghaulConf(confPath);

        if (commandLine.hasOption("s")) {
          conf.crlHost = commandLine.getOptionValue("s");
        }
        if (commandLine.hasOption("p")) {
          conf.crlPort = Integer.parseInt(commandLine.getOptionValue("p"));
        }
        if (commandLine.hasOption("tn")) {
          conf.tableName = commandLine.getOptionValue("tn");
        }
        if (commandLine.hasOption("u")) {
          conf.streamUrl = commandLine.getOptionValue("u");
        }
        return conf;
      }
      // not reach here if all right.
      throw new ParseException("The Conf File parse fail or not set.");
    } catch (ParseException e) {
      hf.printHelp("WrapExample", options, true);
      throw new IOException(e);
    }
  }

  private static Map fieldMapAndSchemaCheck(Table table, List fieldList) {
    Map fieldMap = new LinkedHashMap<>();
    for (String fieldName : fieldList) {
      Preconditions.checkArgument(table.getSchema().hasColumn(fieldName),
          "User field [%s] is not found in schema.", fieldName);
      Type type = table.getSchema().getColumn(fieldName).getType();
      fieldMap.put(fieldName, type);
      LOG.info("get field {} with type {}", fieldName, type);
    }
    return fieldMap;
  }

  private static Object genDateWithType(DataGen dataGen, Type dataType) {
    Object value = null;
    switch (dataType) {
      case BOOL:
        value = dataGen.nextBoolean();
        break;
      case INT8:
        value = dataGen.nextByte();
        break;
      case INT16:
        value = dataGen.nextShort();
        break;
      case INT32:
      case DATE:
        value = dataGen.nextInt();
        break;
      case INT64:
        value = dataGen.nextLong();
        break;
      case FLOAT:
        value = dataGen.nextFloat();
        break;
      case DOUBLE:
        value = dataGen.nextDouble();
        break;
      case STRING:
      case VARCHAR:
        value = dataGen.nextString();
        break;
      case BINARY:
        value = dataGen.nextBinary();
        break;
      case DECIMAL:
        value = dataGen.nextBigDecimal();
        break;
      case UNIXTIME_MICROS:
        value = dataGen.nextTimestamp();
        break;
      default:
        throw new UnsupportedOperationException("Unsupported dataType: " + dataType);
    }
    return value;
  }

  public static void main(String[] args) throws Exception {
    LonghaulConf conf = parameterParser(args);
    Preconditions.checkArgument(!StringUtils.isEmpty(conf.crlHost) || !StringUtils.isEmpty(conf.streamUrl),
        "crlHost or streamUrl can not be empty");
    if (!StringUtils.isEmpty(conf.crlHost)) {
      Preconditions.checkArgument(conf.crlPort > 0, "crlPort can not less than 0");
    }
    Preconditions.checkArgument(!StringUtils.isEmpty(conf.schemaName), "schemaName can not be empty");
    Preconditions.checkArgument(!StringUtils.isEmpty(conf.tableName), "tableName can not be empty");
    Preconditions.checkArgument(!StringUtils.isEmpty(conf.operator), "operator can not be empty");
    Preconditions.checkArgument(!CollectionUtils.isEmpty(conf.fieldList), "filePath can not be empty");
    Preconditions.checkArgument(!StringUtils.isEmpty(conf.dataGenMode), "dataGen can not be empty");
    Preconditions.checkArgument((conf.dataGenTimeMs != null || conf.dataGenCount != null),
        "dataGenTimeMs & dataGenCount can not be both empty");

    ClientBuilder builder = Client.getBuilder();
    if (!StringUtils.isEmpty(conf.crlHost) && conf.crlPort != 0) {
      builder.crlAddr(conf.crlHost, conf.crlPort);
    }
    if (conf.instanceId != null) {
      builder.instanceId(conf.getInstanceId());
    }
    if (conf.workspace != null) {
      builder.workspace(conf.getWorkspace());
    }
    if (!StringUtils.isEmpty(conf.getStreamUrl())) {
      builder.streamUrl(conf.getStreamUrl());
    }
    if (conf.getProperties() != null) {
      builder.properties(new Properties() {{
        putAll(conf.getProperties());
      }});
    }
    Client client = builder.build();

    conf.options.setErrorTypeHandler(new ErrorTypeHandler() {
      @Override
      public void onSuccess(Message message) {
        LOG.info("mutate data with batch id {} cost {} ms successfully.", message.getBatchId(),
            System.currentTimeMillis() - message.getTimestamp());
      }

      @Override
      public void onFailure(Message message, Throwable e) {
        LOG.error(String.format("mutate data with batch id %s failed.", message.getBatchId()), e);
      }

      @Override
      public boolean getTerminateIfFailure() {
        return false;
      }
    });
    com.clickzetta.platform.client.api.Options options = conf.options.toCZSessionOptions();

    Stream stream = client.createStream(conf.schemaName, conf.tableName, conf.getTabletNum(), options);

    Map fieldMap = fieldMapAndSchemaCheck(stream.getTable(), conf.fieldList);
    DataGen dataGen = DataGen.build(conf.dataGenMode);

    long totalTimestamp = System.currentTimeMillis() + (conf.dataGenTimeMs != null ? conf.dataGenTimeMs : 0);
    long totalWriteNum = conf.dataGenCount != null ? conf.dataGenCount : 0;
    Thread.sleep(1);

    // gen data with target size or time.
    long startTime = System.nanoTime();
    long endTime = System.nanoTime();
    long dataGenSize = 0;
    LOG.info("start to feed data with target totalCnt {} until totalTime {}.", totalWriteNum, new Timestamp(totalTimestamp));
    while (dataGenSize < totalWriteNum || System.currentTimeMillis() < totalTimestamp) {
      Row op = null;
      switch (conf.operator) {
        case "insert":
          op = stream.createInsertRow();
          break;
        case "update":
          op = stream.createUpdateRow();
          break;
        case "upsert":
          op = stream.createUpsertRow();
          break;
        case "delete":
          op = stream.createDeleteRow();
          break;
        default:
          throw new UnsupportedOperationException(String.format("not support operator with [%s] . " +
              "use insert|update|upsert|delete instead.", conf.operator));
      }
      for (Map.Entry entry : fieldMap.entrySet()) {
        try {
          Object value = genDateWithType(dataGen, entry.getValue());
          op.setValue(entry.getKey(), value);
        } catch (Throwable t) {
          throw new RuntimeException(t);
        }
      }
      stream.apply(op);
      dataGenSize++;
      if (conf.dataGenInternalMs != null && conf.dataGenInternalMs > 0) {
        Thread.sleep(conf.dataGenInternalMs);
      }
      if ((conf.dataGenCount != null && dataGenSize >= totalWriteNum) || (conf.dataGenTimeMs != null && System.currentTimeMillis() >= totalTimestamp)) {
        LOG.info("end to feed data with actual Cnt {} until time {}.", dataGenSize, new Timestamp(System.currentTimeMillis()));
        endTime = System.nanoTime();
        break;
      }
    }
    stream.close();
    if (conf.isReleaseResource()) {
      LOG.info("client call release resource with target stream");
      client.releaseResource(stream);
    }
    client.close();
    long totalNaNoTime = endTime - startTime;
    LOG.info("end to feed data size: {} cost: {} ms", dataGenSize, totalNaNoTime / 1000 / 1000);
    LOG.info("data tps: {} r/s", dataGenSize / (totalNaNoTime / 1000.0 / 1000.0 / 1000.0));
    Thread.sleep(1 * 1000);
    LOG.info("finish success");
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy