All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.connect.tools.SchemaSourceTask Maven / Gradle / Ivy

There is a newer version: 3.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.connect.tools;

import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.source.SourceTask;
import org.apache.kafka.tools.ThroughputThrottler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class SchemaSourceTask extends SourceTask {

    private static final Logger log = LoggerFactory.getLogger(SchemaSourceTask.class);

    public static final String NAME_CONFIG = "name";
    public static final String ID_CONFIG = "id";
    public static final String TOPIC_CONFIG = "topic";
    public static final String NUM_MSGS_CONFIG = "num.messages";
    public static final String THROUGHPUT_CONFIG = "throughput";
    public static final String MULTIPLE_SCHEMA_CONFIG = "multiple.schema";
    public static final String PARTITION_COUNT_CONFIG = "partition.count";

    private static final String ID_FIELD = "id";
    private static final String SEQNO_FIELD = "seqno";
    private ThroughputThrottler throttler;

    private String name; // Connector name
    private int id; // Task ID
    private String topic;
    private Map partition;
    private long startingSeqno;
    private long seqno;
    private long count;
    private long maxNumMsgs;
    private boolean multipleSchema;
    private int partitionCount;

    private static Schema valueSchema = SchemaBuilder.struct().version(1).name("record")
        .field("boolean", Schema.BOOLEAN_SCHEMA)
        .field("int", Schema.INT32_SCHEMA)
        .field("long", Schema.INT64_SCHEMA)
        .field("float", Schema.FLOAT32_SCHEMA)
        .field("double", Schema.FLOAT64_SCHEMA)
        .field("partitioning", Schema.INT32_SCHEMA)
        .field("id", Schema.INT32_SCHEMA)
        .field("seqno", Schema.INT64_SCHEMA)
        .build();

    private static Schema valueSchema2 = SchemaBuilder.struct().version(2).name("record")
        .field("boolean", Schema.BOOLEAN_SCHEMA)
        .field("int", Schema.INT32_SCHEMA)
        .field("long", Schema.INT64_SCHEMA)
        .field("float", Schema.FLOAT32_SCHEMA)
        .field("double", Schema.FLOAT64_SCHEMA)
        .field("partitioning", Schema.INT32_SCHEMA)
        .field("string", SchemaBuilder.string().defaultValue("abc").build())
        .field("id", Schema.INT32_SCHEMA)
        .field("seqno", Schema.INT64_SCHEMA)
        .build();

    @Override
    public String version() {
        return new SchemaSourceConnector().version();
    }

    @Override
    public void start(Map props) {
        final long throughput;
        try {
            name = props.get(NAME_CONFIG);
            id = Integer.parseInt(props.get(ID_CONFIG));
            topic = props.get(TOPIC_CONFIG);
            maxNumMsgs = Long.parseLong(props.get(NUM_MSGS_CONFIG));
            multipleSchema = Boolean.parseBoolean(props.get(MULTIPLE_SCHEMA_CONFIG));
            partitionCount = Integer.parseInt(props.containsKey(PARTITION_COUNT_CONFIG) ? props.get(PARTITION_COUNT_CONFIG) : "1");
            throughput = Long.parseLong(props.get(THROUGHPUT_CONFIG));
        } catch (NumberFormatException e) {
            throw new ConnectException("Invalid SchemaSourceTask configuration", e);
        }

        throttler = new ThroughputThrottler(throughput, System.currentTimeMillis());
        partition = Collections.singletonMap(ID_FIELD, id);
        Map previousOffset = this.context.offsetStorageReader().offset(partition);
        if (previousOffset != null) {
            seqno = (Long) previousOffset.get(SEQNO_FIELD) + 1;
        } else {
            seqno = 0;
        }
        startingSeqno = seqno;
        count = 0;
        log.info("Started SchemaSourceTask {}-{} producing to topic {} resuming from seqno {}", name, id, topic, startingSeqno);
    }

    @Override
    public List poll() throws InterruptedException {
        if (count < maxNumMsgs) {
            long sendStartMs = System.currentTimeMillis();
            if (throttler.shouldThrottle(seqno - startingSeqno, sendStartMs)) {
                throttler.throttle();
            }

            Map ccOffset = Collections.singletonMap(SEQNO_FIELD, seqno);
            int partitionVal = (int) (seqno % partitionCount);
            final Struct data;
            final SourceRecord srcRecord;
            if (!multipleSchema || count % 2 == 0) {
                data = new Struct(valueSchema)
                    .put("boolean", true)
                    .put("int", 12)
                    .put("long", 12L)
                    .put("float", 12.2f)
                    .put("double", 12.2)
                    .put("partitioning", partitionVal)
                    .put("id", id)
                    .put("seqno", seqno);

                srcRecord = new SourceRecord(partition, ccOffset, topic, id, Schema.STRING_SCHEMA, "key", valueSchema, data);
            } else {
                data = new Struct(valueSchema2)
                    .put("boolean", true)
                    .put("int", 12)
                    .put("long", 12L)
                    .put("float", 12.2f)
                    .put("double", 12.2)
                    .put("partitioning", partitionVal)
                    .put("string", "def")
                    .put("id", id)
                    .put("seqno", seqno);

                srcRecord = new SourceRecord(partition, ccOffset, topic, id, Schema.STRING_SCHEMA, "key", valueSchema2, data);
            }

            System.out.println("{\"task\": " + id + ", \"seqno\": " + seqno + "}");
            List result = Collections.singletonList(srcRecord);
            seqno++;
            count++;
            return result;
        } else {
            throttler.throttle();
            return new ArrayList<>();
        }
    }

    @Override
    public void stop() {
        throttler.wakeup();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy