All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.talend.sdk.component.singer.kitap.Kitap Maven / Gradle / Ivy

/**
 * Copyright (C) 2006-2023 Talend Inc. - www.talend.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.talend.sdk.component.singer.kitap;

import static java.util.Collections.emptyMap;
import static java.util.Objects.requireNonNull;
import static java.util.Optional.ofNullable;
import static java.util.Spliterators.spliteratorUnknownSize;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;

import java.io.StringReader;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Spliterator;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonArrayBuilder;
import javax.json.JsonBuilderFactory;
import javax.json.JsonNumber;
import javax.json.JsonObject;
import javax.json.JsonReader;
import javax.json.JsonString;
import javax.json.JsonValue;

import org.talend.sdk.component.api.record.Record;
import org.talend.sdk.component.api.record.Schema;
import org.talend.sdk.component.runtime.input.Input;
import org.talend.sdk.component.runtime.input.Mapper;
import org.talend.sdk.component.runtime.manager.ComponentManager;
import org.talend.sdk.component.singer.java.Singer;
import org.talend.sdk.component.singer.java.SingerArgs;

import lombok.RequiredArgsConstructor;

public final class Kitap implements Runnable {

    private final SingerArgs args;

    private final Singer singer;

    private final JsonBuilderFactory jsonBuilderFactory = Json.createBuilderFactory(emptyMap());

    private final RecordJsonMapper recordJsonMapper;

    public Kitap(final String... args) {
        this(new SingerArgs(args), new Singer());
    }

    public Kitap(final SingerArgs args, final Singer singer) {
        this.args = args;
        this.singer = singer;
        this.recordJsonMapper = new RecordJsonMapper(jsonBuilderFactory, singer);
    }

    @Override
    public void run() {
        final JsonObject componentConfig = ofNullable(args.getConfig().getJsonObject("component"))
                .orElseGet(() -> ofNullable(args.getConfig().getJsonString("component_config")).map(jsonString -> {
                    try (final JsonReader reader = Json.createReader(new StringReader(jsonString.getString()))) {
                        return reader.readObject();
                    }
                })
                        .map(json -> json.containsKey("component") ? json.getJsonObject("component") : json)
                        .orElseThrow(() -> new IllegalArgumentException(
                                "No 'component' or 'component_config' entry in config.json")));
        final JsonObject stream = args
                .getCatalog()
                .flatMap(this::extractSelectedStream)
                .orElseGet(() -> jsonBuilderFactory.createObjectBuilder().add("stream", "default").build());
        final String streamName = stream.getString("stream");

        final ComponentManager mgr = ComponentManager.instance();
        if (mgr.find(Stream::of).count() == 0) { // note: normally TALEND-INF/plugins.properties is enough
            mgr.autoDiscoverPlugins(false, true);
        }

        final List missingConfigs = Stream
                .concat(args.getComponentFamily().isPresent() ? Stream.of("family") : Stream.empty(), Stream.of("name"))
                .filter(key -> !componentConfig.containsKey(key))
                .collect(toList());
        if (!missingConfigs.isEmpty()) {
            throw new IllegalArgumentException("Missing component configuration entries: " + missingConfigs);
        }

        final String family = args.getComponentFamily().orElseGet(() -> componentConfig.getString("family"));
        final String name = componentConfig.getString("name");
        final int version = componentConfig.getInt("version", 0);
        final Map configuration = ofNullable(componentConfig.getJsonObject("configuration"))
                .map(this::toConfig)
                .orElseGet(Collections::emptyMap);

        final Mapper mapper = mgr
                .findMapper(family, name, version, configuration)
                .orElseThrow(
                        () -> new IllegalArgumentException("Didn't find the component: '" + family + '#' + name + "'"));

        if (args.isDiscover()) {
            discover(mapper);
        } else {
            readAll(args.getConfig().getJsonObject("schemaCustomization"), stream, streamName, mapper);
        }
    }

    private void discover(final Mapper mapper) {
        final JsonObject schema = records(mapper)
                .findFirst()
                .map(record -> new JsonSchemaGenerator(record.getSchema().getEntries(), jsonBuilderFactory).get())
                .orElseThrow(() -> new IllegalArgumentException(
                        "No record found for " + mapper.rootName() + '#' + mapper.name()));
        final JsonArray metadata = schema
                .getJsonObject("properties")
                .entrySet()
                .stream()
                .collect(jsonBuilderFactory::createArrayBuilder, (a, p) -> {
                    a
                            .add(jsonBuilderFactory
                                    .createObjectBuilder()
                                    .add("metadata",
                                            jsonBuilderFactory
                                                    .createObjectBuilder()
                                                    .add("inclusion", "automatic")
                                                    .add("selected-by-default", true))
                                    .add("breadcrumb",
                                            jsonBuilderFactory.createArrayBuilder().add("properties").add(p.getKey())));
                }, JsonArrayBuilder::addAll)
                .build();
        final String streams = jsonBuilderFactory
                .createObjectBuilder()
                .add("streams",
                        jsonBuilderFactory
                                .createArrayBuilder()
                                .add(jsonBuilderFactory
                                        .createObjectBuilder()
                                        .add("tap_stream_id", "default")
                                        .add("stream", "default")
                                        .add("schema", schema)
                                        .add("metadata", metadata)))
                .build()
                .toString();
        singer.stdout(streams);
    }

    private void readAll(final JsonObject config, final JsonObject stream, final String streamName,
            final Mapper mapper) {
        final AtomicReference lastSchema = new AtomicReference<>();
        records(mapper).peek(record -> {
            final Schema newSchema = record.getSchema();
            if (!newSchema.equals(lastSchema.get())) {
                final JsonArray keys = config != null && config.containsKey("keys") ? config.getJsonArray("keys")
                        : jsonBuilderFactory.createArrayBuilder().build();
                final JsonArray bookmarks =
                        config != null && config.containsKey("bookmarks") ? config.getJsonArray("bookmarks")
                                : jsonBuilderFactory.createArrayBuilder().build();
                final JsonObject schema = ofNullable(stream.getJsonObject("schema"))
                        .orElseGet(() -> new JsonSchemaGenerator(newSchema.getEntries(), jsonBuilderFactory).get());
                singer.writeSchema(streamName, schema, keys, bookmarks);
                lastSchema.set(newSchema);
            }
        }).forEach(record -> singer.writeRecord(streamName, recordJsonMapper.apply(record)));
    }

    private Stream records(final Mapper mapper) {
        return createPartitions(mapper)
                .stream() // note: can be parallelized at some point if needed but NOT parallelStream() please
                .flatMap(m -> {
                    final Input input = m.create();
                    input.start();
                    return StreamSupport
                            .stream(spliteratorUnknownSize(new InputIterator(input), Spliterator.IMMUTABLE), false);
                });
    }

    private Map toConfig(final JsonObject object) {
        return object
                .entrySet()
                .stream()
                .filter(e -> !JsonValue.NULL.equals(e.getValue()))
                .collect(toMap(Map.Entry::getKey, e -> {
                    switch (e.getValue().getValueType()) {
                    case STRING:
                        return JsonString.class.cast(e.getValue()).getString();
                    case NUMBER:
                        return String.valueOf(JsonNumber.class.cast(e.getValue()).doubleValue());
                    case TRUE:
                    case FALSE:
                        return String.valueOf(JsonValue.TRUE.equals(e.getValue()));
                    default:
                        throw new IllegalArgumentException("Unsupported json entry: " + e);
                    }
                }));
    }

    private Optional extractSelectedStream(final JsonObject jsonObject) {
        return ofNullable(jsonObject.getJsonArray("streams"))
                .flatMap(streams -> streams
                        .stream()
                        .filter(stream -> stream.getValueType() == JsonValue.ValueType.OBJECT)
                        .map(JsonValue::asJsonObject)
                        .filter(this::hasSelectedProperty)
                        .findFirst());
    }

    private boolean hasSelectedProperty(final JsonObject stream) {
        return ofNullable(stream.getJsonObject("schema"))
                .map(schema -> schema.getJsonObject("properties"))
                .map(properties -> properties
                        .values()
                        .stream()
                        .filter(prop -> prop.getValueType() == JsonValue.ValueType.OBJECT)
                        .anyMatch(prop -> prop.asJsonObject().getBoolean("selected", false)))
                .orElse(false);
    }

    private List createPartitions(final Mapper mapper) {
        final List partitions;
        mapper.start();
        try {
            partitions = requireNonNull(mapper.split(mapper.assess()), "No mapper created after splitting the source");
        } finally {
            mapper.stop();
        }
        return partitions;
    }

    public static void main(final String... args) {
        EnvironmentSetup.init();
        new Kitap(args).run();
    }

    @RequiredArgsConstructor
    private static final class InputIterator implements Iterator {

        private final Input input;

        private Object next;

        @Override
        public boolean hasNext() {
            next = input.next();
            final boolean hasNext = next != null;
            if (!hasNext) {
                input.stop();
            }
            return hasNext;
        }

        @Override
        public Record next() {
            return Record.class.cast(next);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy