All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.pipeline.file.JsonFileFormat Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright (c) 2008-2021, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.pipeline.file;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Objects;

/**
 * {@code FileFormat} for the JSON Lines files. See {@link FileFormat#json}
 * for more details.
 *
 * @param  type of items a source using this file format will emit
 * @since Jet 4.4
 */
public class JsonFileFormat implements FileFormat {

    /**
     * Format ID for JSON.
     */
    public static final String FORMAT_JSON = "json";

    private static final long serialVersionUID = 1L;

    private Class clazz;
    private boolean multiline = true;

    /**
     * Creates {@link JsonFileFormat}. See {@link FileFormat#json} for more
     * details.
     */
    JsonFileFormat() {
    }

    /**
     * Specifies class that data will be deserialized into.
     * If parameter is {@code null} data is deserialized into
     * {@code Map}.
     *
     * @param clazz type of the object to deserialize JSON into
     */
    @Nonnull
    public JsonFileFormat withClass(@Nullable Class clazz) {
        this.clazz = clazz;
        return this;
    }

    /**
     * Specifies if the Json parser should accept json records spanning
     * multiple lines.
     * 

* The parser handles JSON records spanning multiple lines by default, * but it prevents reading the file in parallel when using the Hadoop * based connector, because the file is split at arbitrary positions. *

* Set this to false when reading large JSON files using Hadoop * connector. Each line in the file must contain exactly one JSON record. *

* This setting has no effect when Hadoop is not used. * * @param multiline true, if the JSON parser should accept records * spanning multiple lines, defaults to true */ @Nonnull public JsonFileFormat multiline(boolean multiline) { this.multiline = multiline; return this; } /** * Returns the class Jet will deserialize data into. * Null if not set. */ @Nullable public Class clazz() { return clazz; } /** * Specifies if the Json parser should accept json records spanning * multiple lines. */ public boolean isMultiline() { return multiline; } @Nonnull @Override public String format() { return FORMAT_JSON; } @Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || getClass() != o.getClass()) { return false; } JsonFileFormat that = (JsonFileFormat) o; return multiline == that.multiline && Objects.equals(clazz, that.clazz); } @Override public int hashCode() { return Objects.hash(clazz, multiline); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy