All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.pipeline.file.impl.LocalFileSourceFactory Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.pipeline.file.impl;

import com.hazelcast.function.FunctionEx;
import com.hazelcast.jet.JetException;
import com.hazelcast.jet.core.ProcessorMetaSupplier;
import com.hazelcast.jet.core.processor.SourceProcessors;
import com.hazelcast.jet.json.JsonUtil;
import com.hazelcast.jet.pipeline.file.FileFormat;
import com.hazelcast.jet.pipeline.file.JsonFileFormat;
import com.hazelcast.jet.pipeline.file.LinesTextFileFormat;
import com.hazelcast.jet.pipeline.file.ParquetFileFormat;
import com.hazelcast.jet.pipeline.file.RawBytesFileFormat;
import com.hazelcast.jet.pipeline.file.TextFileFormat;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

import javax.annotation.Nonnull;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.ServiceLoader;
import java.util.stream.Stream;

import static com.hazelcast.jet.impl.util.Util.uncheckRun;
import static java.util.Objects.requireNonNull;

/**
 * Implementation of FileSourceFactory for the local filesystem.
 */
public class LocalFileSourceFactory implements FileSourceFactory {

    private static Map readFileFnProviders;

    static {
        Map mapFns = new HashMap<>();

        addMapFnProvider(mapFns, new JsonReadFileFnProvider());
        addMapFnProvider(mapFns, new LinesReadFileFnProvider());
        addMapFnProvider(mapFns, new ParquetReadFileFnProvider());
        addMapFnProvider(mapFns, new RawBytesReadFileFnProvider());
        addMapFnProvider(mapFns, new TextReadFileFnProvider());

        ServiceLoader loader = ServiceLoader.load(ReadFileFnProvider.class);
        for (ReadFileFnProvider readFileFnProvider : loader) {
            addMapFnProvider(mapFns, readFileFnProvider);
        }

        LocalFileSourceFactory.readFileFnProviders = Collections.unmodifiableMap(mapFns);
    }

    private static void addMapFnProvider(Map mapFns, ReadFileFnProvider provider) {
        mapFns.put(provider.format(), provider);
    }

    @Nonnull @Override
    public  ProcessorMetaSupplier create(@Nonnull FileSourceConfiguration fsc) {
        FileFormat format = requireNonNull(fsc.getFormat());
        ReadFileFnProvider readFileFnProvider = readFileFnProviders.get(format.format());
        if (readFileFnProvider == null) {
            throw new JetException("Could not find ReadFileFnProvider for FileFormat: " + format.format() + ". " +
                    "Did you provide correct modules on classpath?");
        }
        FunctionEx> mapFn = readFileFnProvider.createReadFileFn(format);
        return SourceProcessors.readFilesP(fsc.getPath(), fsc.getGlob(), fsc.isSharedFileSystem(),
                fsc.isIgnoreFileNotFound(), mapFn);
    }

    @SuppressFBWarnings("OBL_UNSATISFIED_OBLIGATION")
    private abstract static class AbstractReadFileFnProvider implements ReadFileFnProvider {

        @Nonnull @Override
        public  FunctionEx> createReadFileFn(@Nonnull FileFormat format) {
            FunctionEx> mapInputStreamFn = mapInputStreamFn(format);
            return path -> {
                FileInputStream fis = new FileInputStream(path.toFile());
                return mapInputStreamFn.apply(fis).onClose(() -> uncheckRun(fis::close));
            };
        }

        @Nonnull
        abstract  FunctionEx> mapInputStreamFn(FileFormat format);
    }

    private static class JsonReadFileFnProvider implements ReadFileFnProvider {

        @Nonnull
        @Override
        @SuppressWarnings("unchecked")
        public  FunctionEx> createReadFileFn(@Nonnull FileFormat format) {
            JsonFileFormat jsonFileFormat = (JsonFileFormat) format;
            Class formatClazz = jsonFileFormat.clazz();

            return path -> {
                // Jackson doesn't handle empty files
                if (path.toFile().length() == 0) {
                    return Stream.empty();
                }

                if (formatClazz == null) {
                    return (Stream) JsonUtil.mapSequenceFrom(path);
                } else {
                    return JsonUtil.beanSequenceFrom(path, formatClazz);
                }
            };
        }

        @Nonnull
        @Override
        public String format() {
            return JsonFileFormat.FORMAT_JSON;
        }
    }

    private static class LinesReadFileFnProvider extends AbstractReadFileFnProvider {

        @Nonnull @Override
        @SuppressWarnings("unchecked")
         FunctionEx> mapInputStreamFn(FileFormat format) {
            LinesTextFileFormat linesTextFileFormat = (LinesTextFileFormat) format;
            String thisCharset = linesTextFileFormat.charset().name();
            return is -> {
                BufferedReader reader = new BufferedReader(new InputStreamReader(is, thisCharset));
                return (Stream) reader.lines().onClose(() -> uncheckRun(reader::close));
            };
        }

        @Nonnull @Override
        public String format() {
            return LinesTextFileFormat.FORMAT_LINES;
        }
    }

    private static class ParquetReadFileFnProvider implements ReadFileFnProvider {

        @Nonnull @Override
        public  FunctionEx> createReadFileFn(@Nonnull FileFormat format) {
            throw new UnsupportedOperationException("Reading Parquet files is not supported in local filesystem mode." +
                    " " +
                    "Use Jet Hadoop module with FileSourceBuilder.useHadoopForLocalFiles option instead.");
        }

        @Nonnull @Override
        public String format() {
            return ParquetFileFormat.FORMAT_PARQUET;
        }
    }

    private static class RawBytesReadFileFnProvider extends AbstractReadFileFnProvider {

        @Nonnull @Override
        @SuppressWarnings("unchecked")
         FunctionEx> mapInputStreamFn(FileFormat format) {
            return is -> (Stream) Stream.of(is.readAllBytes());
        }

        @Nonnull @Override
        public String format() {
            return RawBytesFileFormat.FORMAT_BIN;
        }
    }

    private static class TextReadFileFnProvider extends AbstractReadFileFnProvider {

        @Nonnull @Override
        @SuppressWarnings("unchecked")
         FunctionEx> mapInputStreamFn(FileFormat format) {
            TextFileFormat textFileFormat = (TextFileFormat) format;
            String thisCharset = textFileFormat.charset().name();
            return is -> (Stream) Stream.of(new String(is.readAllBytes(), Charset.forName(thisCharset)));
        }

        @Nonnull @Override
        public String format() {
            return TextFileFormat.FORMAT_TXT;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy