org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReader Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of spark-sql_2.11
There is a newer version: 2.4.8
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.sources.v2.reader.streaming;

import org.apache.spark.annotation.InterfaceStability;
import org.apache.spark.sql.execution.streaming.BaseStreamingSource;
import org.apache.spark.sql.sources.v2.reader.DataSourceReader;

import java.util.Optional;

/**
 * A mix-in interface for {@link DataSourceReader}. Data source readers can implement this
 * interface to allow reading in a continuous processing mode stream.
 *
 * Implementations must ensure each reader factory output is a {@link ContinuousDataReader}.
 *
 * Note: This class currently extends {@link BaseStreamingSource} to maintain compatibility with
 * DataSource V1 APIs. This extension will be removed once we get rid of V1 completely.
 */
@InterfaceStability.Evolving
public interface ContinuousReader extends BaseStreamingSource, DataSourceReader {
    /**
     * Merge partitioned offsets coming from {@link ContinuousDataReader} instances for each
     * partition to a single global offset.
     */
    Offset mergeOffsets(PartitionOffset[] offsets);

    /**
     * Deserialize a JSON string into an Offset of the implementation-defined offset type.
     * @throws IllegalArgumentException if the JSON does not encode a valid offset for this reader
     */
    Offset deserializeOffset(String json);

    /**
     * Set the desired start offset for reader factories created from this reader. The scan will
     * start from the first record after the provided offset, or from an implementation-defined
     * inferred starting point if no offset is provided.
     */
    void setStartOffset(Optional start);

    /**
     * Return the specified or inferred start offset for this reader.
     *
     * @throws IllegalStateException if setStartOffset has not been called
     */
    Offset getStartOffset();

    /**
     * The execution engine will call this method in every epoch to determine if new reader
     * factories need to be generated, which may be required if for example the underlying
     * source system has had partitions added or removed.
     *
     * If true, the query will be shut down and restarted with a new reader.
     */
    default boolean needsReconfiguration() {
        return false;
    }

    /**
     * Informs the source that Spark has completed processing all data for offsets less than or
     * equal to `end` and will only request offsets greater than `end` in the future.
     */
    void commit(Offset end);
}