
com.hazelcast.jet.pipeline.file.FileSources Maven / Gradle / Ivy
/*
* Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.pipeline.file;
/**
* Contains factory methods for the Unified File Connector.
*
* @since Jet 4.4
*/
public final class FileSources {
private FileSources() {
}
/**
* The main entry point to the Unified File Connector.
*
* Returns a {@link FileSourceBuilder} configured with default values, see
* its documentation for more options.
*
* The path specifies the filesystem type (for example {@code s3a://},
* {@code hdfs://}) and the path to the files. If it doesn't specify a file
* system, a local file system is used - in this case the path must be
* absolute. By "local" we mean local to each Jet cluster member, not to
* the client submitting the job.
*
* The following file systems are supported:
* - {@code s3a://} (Amazon S3)
*
- {@code hdfs://} (HDFS)
*
- {@code wasbs://} (Azure Cloud Storage)
*
- {@code adl://} (Azure Data Lake Gen 1)
*
- {@code abfs://} (Azure Data Lake Gen 2)
*
- {@code gs://} (Google Cloud Storage)
*
*
* The path must point to a directory. All files in the directory are
* processed. Subdirectories are not processed recursively.
* The path must not contain any wildcard characters.
*
* Example usage:
*
{@code
* Pipeline p = Pipeline.create();
* p.readFrom(FileSources.files("/path/to/directory").build())
* .map(line -> LogParser.parse(line))
* .filter(log -> log.level().equals("ERROR"))
* .writeTo(Sinks.logger());
* }
*
* @param path the path to the directory
* @return the builder object with fluent API
*/
public static FileSourceBuilder files(String path) {
return new FileSourceBuilder<>(path)
.format(new LinesTextFileFormat());
}
}