All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.pipeline.file.FileFormat Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright (c) 2008-2023, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.pipeline.file;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.List;

/**
 * Describes the data format of a file to be used as a Jet data source.
 * This is a data object that holds the configuration; actual
 * implementation code is looked up elsewhere, by using this object as a
 * key.
 *
 * @param  the type of items a source using this file format will emit
 * @since Jet 4.4
 */
public interface FileFormat extends Serializable {

    /**
     * Returns the name of the file format. The convention is to use the
     * well-known filename suffix or, if there is none, a short-form name of
     * the format.
     */
    @Nonnull
    String format();

    // Factory methods for supported file formats are here for easy discoverability.

    /**
     * Returns a file format for Avro files.
     */
    @Nonnull
    static  AvroFileFormat avro() {
        return avro(null);
    }

    /**
     * Returns a file format for Avro files that specifies to use reflection
     * to deserialize the data into instances of the provided Java class.
     * Jet will use the {@code ReflectDatumReader} to read Avro data. The
     * parameter may be {@code null}, disabling the option to deserialize
     * using reflection, but for that case you may prefer the no-argument
     * {@link #avro()} call.
     */
    @Nonnull
    static  AvroFileFormat avro(@Nullable Class clazz) {
        return new AvroFileFormat().withReflect(clazz);
    }

    /**
     * Returns a file format for CSV files which specifies to deserialize each
     * line into {@code String[]}. It assumes the CSV has a header line and
     * specifies to use it as the column names that map to the object's fields.
     * 

* {@code fieldNames} specify which column should be at which index in the * resulting string array. It is useful if the files have different field * order or don't have the same set of columns. *

* For example, if the argument is {@code [surname, name]}, then the format * will always return items of type String[2] where at index 0 is the * {@code surname} column and at index 1 is the {@code name} column, * regardless of the actual columns found in a particular file. If some * file doesn't have some field, the value at its index will always be 0. *

* If the given list is {@code null}, the length and order of the string * array will match the order found in each file. It can be different for * each file. If it's an empty array, a zero-length array will be returned. */ @Nonnull static CsvFileFormat csv(@Nullable List fieldNames) { return new CsvFileFormat<>(fieldNames); } /** * Returns a file format for CSV files which specifies to deserialize each * line into an instance of the given class. It assumes the CSV has a * header line and specifies to use it as the column names that map to the * object's fields. */ @Nonnull static CsvFileFormat csv(@Nonnull Class clazz) { return new CsvFileFormat<>(clazz); } /** * Returns a file format for JSON Lines files. */ @Nonnull static JsonFileFormat json() { return json(null); } /** * Returns a file format for JSON Lines files, where each line of text * is one JSON object. It specifies to deserialize the JSON data into * instances of the provided class. It uses Jackson jr, which * supports the basic data types such as strings, numbers, lists and maps, * objects with JavaBeans-style getters/setters, as well as public fields. * If parameter is {@code null}, data is deserialized into {@code * Map} but for that case you may prefer the no-argument * {@link #json()} call. */ @Nonnull static JsonFileFormat json(@Nullable Class clazz) { return new JsonFileFormat().withClass(clazz); } /** * Returns a file format for text files where each line is a {@code String} * data item. It uses the UTF-8 character encoding. */ @Nonnull static LinesTextFileFormat lines() { return new LinesTextFileFormat(); } /** * Returns a file format for text files where each line is a {@code String} * data item. This variant allows you to choose the character encoding. * Note that the Hadoop-based file connector only accepts UTF-8. * * @param charset character encoding of the file */ @Nonnull static LinesTextFileFormat lines(@Nonnull Charset charset) { return new LinesTextFileFormat(charset); } /** * Returns a file format for Parquet files. *

* NOTE: this format is supported only through the Hadoop * connector. */ @Nonnull static ParquetFileFormat parquet() { return new ParquetFileFormat<>(); } /** * Returns a file format for binary files. */ @Nonnull static RawBytesFileFormat bytes() { return new RawBytesFileFormat(); } /** * Returns a file format for text files where the whole file is a single * string item. It uses the UTF-8 character encoding. */ @Nonnull static TextFileFormat text() { return new TextFileFormat(); } /** * Returns a file format for text files where the whole file is a single * string item. This variant allows you to choose the character encoding. *

* NOTE: the Hadoop connector only supports UTF-8. This * option is supported for local files only. * * @param charset character encoding of the file */ @Nonnull static TextFileFormat text(@Nonnull Charset charset) { return new TextFileFormat(charset); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy