All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.avro.AvroSources Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright 2021 Hazelcast Inc.
 *
 * Licensed under the Hazelcast Community License (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://hazelcast.com/hazelcast-community-license
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.avro;

import com.hazelcast.function.BiFunctionEx;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.pipeline.BatchSource;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.reflect.ReflectDatumReader;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecord;

import javax.annotation.Nonnull;

/**
 * Contains factory methods for Apache Avro sources.
 *
 * @since Jet 3.0
 */
public final class AvroSources {

    private AvroSources() {
    }

    /**
     * Returns a builder object that offers a step-by-step fluent API to build
     * a custom Avro file source for the Pipeline API. The source reads records
     * from Apache Avro files in a directory (but not its subdirectories).
     *
     * @param directory           parent directory of the files
     * @param recordClass         the class to read
     * @param                  the type of the datum
     */
    @Nonnull
    public static  AvroSourceBuilder filesBuilder(
            @Nonnull String directory,
            @Nonnull Class recordClass
    ) {
        return filesBuilder(directory, () -> SpecificRecord.class.isAssignableFrom(recordClass) ?
                new SpecificDatumReader<>(recordClass) : new ReflectDatumReader<>(recordClass));
    }

    /**
     * Returns a builder object that offers a step-by-step fluent API to build
     * a custom Avro file source for the Pipeline API. The source reads records
     * from Apache Avro files in a directory (but not its subdirectories).
     *
     * @param directory           parent directory of the files
     * @param datumReaderSupplier the supplier of datum reader which reads
     *                            records from the files
     * @param                  the type of the datum
     */
    @Nonnull
    public static  AvroSourceBuilder filesBuilder(
            @Nonnull String directory,
            @Nonnull SupplierEx> datumReaderSupplier
    ) {
        return new AvroSourceBuilder<>(directory, datumReaderSupplier);
    }

    /**
     * Convenience for {@link #filesBuilder(String, Class)} which
     * reads all the files in the supplied directory as specific records using
     * supplied {@code datumClass}. If {@code datumClass} implements {@link
     * SpecificRecord}, {@link SpecificDatumReader} is used to read the records,
     * {@link ReflectDatumReader} is used otherwise.
     */
    @Nonnull
    public static  BatchSource files(@Nonnull String directory, @Nonnull Class datumClass) {
        return filesBuilder(directory, datumClass).build();
    }

    /**
     * Convenience for {@link #filesBuilder(String, SupplierEx)} which
     * reads all the files in the supplied directory as generic records and
     * emits the results of transforming each generic record with the supplied
     * mapping function.
     */
    @Nonnull
    public static  BatchSource files(
            @Nonnull String directory,
            @Nonnull BiFunctionEx mapOutputFn
    ) {
        return filesBuilder(directory, GenericDatumReader::new)
                .build(mapOutputFn);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy