All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.talend.sdk.component.runtime.beam.TalendIO Maven / Gradle / Ivy

There is a newer version: 10.57.0
Show newest version
/**
 * Copyright (C) 2006-2018 Talend Inc. - www.talend.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.talend.sdk.component.runtime.beam;

import static java.util.stream.Collectors.toList;
import static org.apache.beam.sdk.annotations.Experimental.Kind.SOURCE_SINK;

import java.io.IOException;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.function.Consumer;

import javax.json.JsonBuilderFactory;
import javax.json.JsonObject;
import javax.json.bind.Jsonb;

import org.apache.beam.sdk.annotations.Experimental;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.io.BoundedSource;
import org.apache.beam.sdk.io.UnboundedSource;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PBegin;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.PDone;
import org.apache.beam.sdk.values.PInput;
import org.apache.beam.sdk.values.POutput;
import org.joda.time.Instant;
import org.talend.sdk.component.runtime.base.Lifecycle;
import org.talend.sdk.component.runtime.beam.coder.JsonpJsonObjectCoder;
import org.talend.sdk.component.runtime.beam.coder.NoCheckpointCoder;
import org.talend.sdk.component.runtime.input.Input;
import org.talend.sdk.component.runtime.input.Mapper;
import org.talend.sdk.component.runtime.output.Processor;
import org.talend.sdk.component.runtime.serialization.ContainerFinder;
import org.talend.sdk.component.runtime.serialization.LightContainer;

import lombok.AllArgsConstructor;
import lombok.NoArgsConstructor;

@Experimental(SOURCE_SINK)
public final class TalendIO {

    public static Base, Mapper> read(final Mapper mapper) {
        return mapper.isStream() ? new InfiniteRead(mapper) : new Read(mapper);
    }

    public static Write write(final Processor output) {
        return new Write(output);
    }

    private static abstract class Base
            extends PTransform {

        protected D delegate;

        protected Base(final D delegate) {
            this.delegate = delegate;
        }

        protected Base() {
            // no-op
        }

        @Override
        public void validate(final PipelineOptions options) {
            // no-op
        }

        @Override
        protected String getKindString() {
            return "Talend[" + getName() + "]";
        }

        @Override
        public String getName() {
            return delegate.rootName() + "/" + delegate.name();
        }

        @Override
        protected Coder getDefaultOutputCoder() {
            return JsonpJsonObjectCoder.of(delegate.plugin());
        }
    }

    private static class Read extends Base, Mapper> {

        private Read(final Mapper delegate) {
            super(delegate);
        }

        @Override
        public PCollection expand(final PBegin incoming) {
            return incoming.apply(org.apache.beam.sdk.io.Read.from(new BoundedSourceImpl(delegate)));
        }
    }

    private static class InfiniteRead extends Base, Mapper> {

        private InfiniteRead(final Mapper delegate) {
            super(delegate);
        }

        @Override
        public PCollection expand(final PBegin incoming) {
            return incoming.apply(org.apache.beam.sdk.io.Read.from(new UnBoundedSourceImpl(delegate)));
        }
    }

    public static class Write extends Base, PDone, Processor> {

        private Write(final Processor delegate) {
            super(delegate);
        }

        @Override
        public PDone expand(final PCollection incoming) {
            incoming.apply(ParDo.of(new WriteFn(delegate)));
            return PDone.in(incoming.getPipeline());
        }
    }

    @NoArgsConstructor
    private static class WriteFn extends BaseProcessorFn {

        private static final Consumer NOOP_CONSUMER = record -> {
        };

        private volatile JsonBuilderFactory factory;

        private volatile Jsonb jsonb;

        WriteFn(final Processor processor) {
            super(processor);
        }

        @ProcessElement
        public void processElement(final ProcessContext context) throws Exception {
            ensureInit();
            processor.onNext(new BeamInputFactory(context), new BeamOutputFactory(NOOP_CONSUMER, factory, jsonb));
        }

        @FinishBundle
        public void finishBundle(final FinishBundleContext context) throws Exception {
            ensureInit();
            processor.afterGroup(new BeamOutputFactory(NOOP_CONSUMER, factory, jsonb));
        }

        private void ensureInit() {
            if (factory == null) {
                synchronized (this) {
                    if (factory == null) {
                        final LightContainer container = ContainerFinder.Instance.get().find(processor.plugin());
                        factory = container.findService(JsonBuilderFactory.class);
                        jsonb = container.findService(Jsonb.class);
                    }
                }
            }
        }
    }

    @NoArgsConstructor
    @AllArgsConstructor
    private static class BoundedSourceImpl extends BoundedSource {

        private Mapper mapper;

        @Override
        public List> split(final long desiredBundleSizeBytes,
                final PipelineOptions options) throws Exception {
            mapper.start();
            try {
                return mapper.split(desiredBundleSizeBytes).stream().map(i -> new BoundedSourceImpl(i)).collect(
                        toList());
            } finally {
                mapper.stop();
            }
        }

        @Override
        public long getEstimatedSizeBytes(final PipelineOptions options) throws Exception {
            mapper.start();
            try {
                return mapper.assess();
            } finally {
                mapper.stop();
            }
        }

        @Override
        public BoundedReader createReader(final PipelineOptions options) throws IOException {
            mapper.start();
            try {
                return new BoundedReaderImpl<>(this, mapper.create());
            } finally {
                mapper.stop();
            }
        }

        @Override
        public void validate() {
            // no-op
        }

        @Override
        public Coder getOutputCoder() {
            return JsonpJsonObjectCoder.of(mapper.plugin());
        }
    }

    @NoArgsConstructor
    @AllArgsConstructor
    private static class UnBoundedSourceImpl extends UnboundedSource {

        private Mapper mapper;

        @Override
        public List>
                split(final int desiredNumSplits, final PipelineOptions options) {
            mapper.start();
            try {
                return mapper.split(desiredNumSplits).stream().map(i -> new UnBoundedSourceImpl(i)).collect(toList());
            } finally {
                mapper.stop();
            }
        }

        @Override
        public UnboundedReader createReader(final PipelineOptions options,
                final UnboundedSource.CheckpointMark checkpointMark) throws IOException {
            return new UnBoundedReaderImpl<>(this, mapper.create());
        }

        @Override
        public Coder getOutputCoder() {
            return JsonpJsonObjectCoder.of(mapper.plugin());
        }

        @Override
        public Coder getCheckpointMarkCoder() {
            return new NoCheckpointCoder();
        }
    }

    private static class BoundedReaderImpl extends BoundedSource.BoundedReader {

        private BoundedSource source;

        private Input input;

        private Object current;

        private volatile Jsonb jsonb;

        BoundedReaderImpl(final BoundedSource source, final Input input) {
            this.source = source;
            this.input = input;
        }

        @Override
        public boolean start() throws IOException {
            input.start();
            return advance();
        }

        @Override
        public boolean advance() throws IOException {
            final Object next = input.next();
            if (next != null && !JsonObject.class.isInstance(next)) {
                if (jsonb == null) {
                    synchronized (this) {
                        if (jsonb == null) {
                            jsonb = ContainerFinder.Instance.get().find(input.plugin()).findService(Jsonb.class);
                        }
                    }
                }
                current = jsonb.fromJson(jsonb.toJson(next), JsonObject.class);
            } else {
                current = next;
            }
            return current != null;
        }

        @Override
        public T getCurrent() throws NoSuchElementException {
            return (T) current;
        }

        @Override
        public void close() throws IOException {
            input.stop();
        }

        @Override
        public BoundedSource getCurrentSource() {
            return source;
        }
    }

    private static class UnBoundedReaderImpl extends UnboundedSource.UnboundedReader {

        private UnboundedSource source;

        private Input input;

        private Object current;

        private volatile Jsonb jsonb;

        UnBoundedReaderImpl(final UnboundedSource source, final Input input) {
            this.source = source;
            this.input = input;
        }

        @Override
        public boolean start() throws IOException {
            input.start();
            return advance();
        }

        @Override
        public boolean advance() {
            final Object next = input.next();
            if (next != null && !JsonObject.class.isInstance(next)) {
                if (jsonb == null) {
                    synchronized (this) {
                        if (jsonb == null) {
                            jsonb = ContainerFinder.Instance.get().find(input.plugin()).findService(Jsonb.class);
                        }
                    }
                }
                current = jsonb.fromJson(jsonb.toJson(next), JsonObject.class);
            } else {
                current = next;
            }
            return current != null;
        }

        @Override
        public T getCurrent() throws NoSuchElementException {
            return (T) current;
        }

        @Override
        public void close() throws IOException {
            input.stop();
        }

        @Override // we can add @Timestamp later on current model if needed, let's start without
        public Instant getCurrentTimestamp() throws NoSuchElementException {
            return Instant.now();
        }

        @Override
        public Instant getWatermark() {
            return Instant.now();
        }

        @Override // we can add a @Checkpoint method on the emitter if needed, let's start without
        public UnboundedSource.CheckpointMark getCheckpointMark() {
            return UnboundedSource.CheckpointMark.NOOP_CHECKPOINT_MARK;
        }

        @Override
        public UnboundedSource getCurrentSource() {
            return source;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy