All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.connectors.cassandra.CassandraSink Maven / Gradle / Ivy

There is a newer version: 3.2.0-1.19
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.connectors.cassandra;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.typeutils.PojoTypeInfo;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.scala.typeutils.CaseClassTypeInfo;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSink;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.transformations.LegacySinkTransformation;
import org.apache.flink.streaming.runtime.operators.CheckpointCommitter;
import org.apache.flink.types.Row;

import com.datastax.driver.core.Cluster;

import java.time.Duration;

import scala.Product;

/**
 * This class wraps different Cassandra sink implementations to provide a common interface for all
 * of them.
 *
 * @param  input type
 */
public class CassandraSink {
    private final boolean useDataStreamSink;
    private DataStreamSink sink1;
    private SingleOutputStreamOperator sink2;

    private CassandraSink(DataStreamSink sink) {
        sink1 = sink;
        useDataStreamSink = true;
    }

    private CassandraSink(SingleOutputStreamOperator sink) {
        sink2 = sink;
        useDataStreamSink = false;
    }

    private LegacySinkTransformation getSinkTransformation() {
        return sink1.getTransformation();
    }

    private Transformation getTransformation() {
        return sink2.getTransformation();
    }

    /**
     * Sets the name of this sink. This name is used by the visualization and logging during
     * runtime.
     *
     * @return The named sink.
     */
    public CassandraSink name(String name) {
        if (useDataStreamSink) {
            getSinkTransformation().setName(name);
        } else {
            getTransformation().setName(name);
        }
        return this;
    }

    /**
     * Sets an ID for this operator.
     *
     * 

The specified ID is used to assign the same operator ID across job submissions (for * example when starting a job from a savepoint). * *

Important: this ID needs to be unique per transformation and job. * Otherwise, job submission will fail. * * @param uid The unique user-specified ID of this transformation. * @return The operator with the specified ID. */ @PublicEvolving public CassandraSink uid(String uid) { if (useDataStreamSink) { getSinkTransformation().setUid(uid); } else { getTransformation().setUid(uid); } return this; } /** * Sets an user provided hash for this operator. This will be used AS IS the create the * JobVertexID. * *

The user provided hash is an alternative to the generated hashes, that is considered when * identifying an operator through the default hash mechanics fails (e.g. because of changes * between Flink versions). * *

Important: this should be used as a workaround or for trouble shooting. * The provided hash needs to be unique per transformation and job. Otherwise, job submission * will fail. Furthermore, you cannot assign user-specified hash to intermediate nodes in an * operator chain and trying so will let your job fail. * *

A use case for this is in migration between Flink versions or changing the jobs in a way * that changes the automatically generated hashes. In this case, providing the previous hashes * directly through this method (e.g. obtained from old logs) can help to reestablish a lost * mapping from states to their target operator. * * @param uidHash The user provided hash for this operator. This will become the JobVertexID, * which is shown in the logs and web ui. * @return The operator with the user provided hash. */ @PublicEvolving public CassandraSink setUidHash(String uidHash) { if (useDataStreamSink) { getSinkTransformation().setUidHash(uidHash); } else { getTransformation().setUidHash(uidHash); } return this; } /** * Sets the parallelism for this sink. The degree must be higher than zero. * * @param parallelism The parallelism for this sink. * @return The sink with set parallelism. */ public CassandraSink setParallelism(int parallelism) { if (useDataStreamSink) { sink1.setParallelism(parallelism); } else { sink2.setParallelism(parallelism); } return this; } /** * Turns off chaining for this operator so thread co-location will not be used as an * optimization. * *

Chaining can be turned off for the whole job by {@link * org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#disableOperatorChaining()} * however it is not advised for performance considerations. * * @return The sink with chaining disabled */ public CassandraSink disableChaining() { if (useDataStreamSink) { sink1.disableChaining(); } else { sink2.disableChaining(); } return this; } /** * Sets the slot sharing group of this operation. Parallel instances of operations that are in * the same slot sharing group will be co-located in the same TaskManager slot, if possible. * *

Operations inherit the slot sharing group of input operations if all input operations are * in the same slot sharing group and no slot sharing group was explicitly specified. * *

Initially an operation is in the default slot sharing group. An operation can be put into * the default group explicitly by setting the slot sharing group to {@code "default"}. * * @param slotSharingGroup The slot sharing group name. */ public CassandraSink slotSharingGroup(String slotSharingGroup) { if (useDataStreamSink) { getSinkTransformation().setSlotSharingGroup(slotSharingGroup); } else { getTransformation().setSlotSharingGroup(slotSharingGroup); } return this; } /** * Writes a DataStream into a Cassandra database. * * @param input input DataStream * @param input type * @return CassandraSinkBuilder, to further configure the sink */ public static CassandraSinkBuilder addSink( org.apache.flink.streaming.api.scala.DataStream input) { return addSink(input.javaStream()); } /** * Writes a DataStream into a Cassandra database. * * @param input input DataStream * @param input type * @return CassandraSinkBuilder, to further configure the sink */ public static CassandraSinkBuilder addSink(DataStream input) { TypeInformation typeInfo = input.getType(); if (typeInfo instanceof TupleTypeInfo) { DataStream tupleInput = (DataStream) input; return (CassandraSinkBuilder) new CassandraTupleSinkBuilder<>( tupleInput, tupleInput.getType(), tupleInput .getType() .createSerializer( tupleInput.getExecutionEnvironment().getConfig())); } if (typeInfo instanceof RowTypeInfo) { DataStream rowInput = (DataStream) input; return (CassandraSinkBuilder) new CassandraRowSinkBuilder( rowInput, rowInput.getType(), rowInput.getType() .createSerializer( rowInput.getExecutionEnvironment().getConfig())); } if (typeInfo instanceof PojoTypeInfo) { return new CassandraPojoSinkBuilder<>( input, input.getType(), input.getType().createSerializer(input.getExecutionEnvironment().getConfig())); } if (typeInfo instanceof CaseClassTypeInfo) { DataStream productInput = (DataStream) input; return (CassandraSinkBuilder) new CassandraScalaProductSinkBuilder<>( productInput, productInput.getType(), productInput .getType() .createSerializer(input.getExecutionEnvironment().getConfig())); } throw new IllegalArgumentException( "No support for the type of the given DataStream: " + input.getType()); } /** * Builder for a {@link CassandraSink}. * * @param */ public abstract static class CassandraSinkBuilder { protected final DataStream input; protected final TypeSerializer serializer; protected final TypeInformation typeInfo; protected final CassandraSinkBaseConfig.Builder configBuilder; protected ClusterBuilder builder; protected String keyspace; protected MapperOptions mapperOptions; protected String query; protected CheckpointCommitter committer; protected boolean isWriteAheadLogEnabled; protected CassandraFailureHandler failureHandler; public CassandraSinkBuilder( DataStream input, TypeInformation typeInfo, TypeSerializer serializer) { this.input = input; this.typeInfo = typeInfo; this.serializer = serializer; this.configBuilder = CassandraSinkBaseConfig.newBuilder(); } /** * Sets the query that is to be executed for every record. * * @param query query to use * @return this builder */ public CassandraSinkBuilder setQuery(String query) { this.query = query; return this; } /** * Sets the keyspace to be used. * * @param keyspace keyspace to use * @return this builder */ public CassandraSinkBuilder setDefaultKeyspace(String keyspace) { this.keyspace = keyspace; return this; } /** * Sets the cassandra host to connect to. * * @param host host to connect to * @return this builder */ public CassandraSinkBuilder setHost(String host) { return setHost(host, 9042); } /** * Sets the cassandra host/port to connect to. * * @param host host to connect to * @param port port to connect to * @return this builder */ public CassandraSinkBuilder setHost(final String host, final int port) { if (this.builder != null) { throw new IllegalArgumentException( "Builder was already set. You must use either setHost() or setClusterBuilder()."); } this.builder = new ClusterBuilder() { @Override protected Cluster buildCluster(Cluster.Builder builder) { return builder.addContactPoint(host).withPort(port).build(); } }; return this; } /** * Sets the ClusterBuilder for this sink. A ClusterBuilder is used to configure the * connection to cassandra. * * @param builder ClusterBuilder to configure the connection to cassandra * @return this builder */ public CassandraSinkBuilder setClusterBuilder(ClusterBuilder builder) { if (this.builder != null) { throw new IllegalArgumentException( "Builder was already set. You must use either setHost() or setClusterBuilder()."); } this.builder = builder; return this; } /** * Enables the write-ahead log, which allows exactly-once processing for non-deterministic * algorithms that use idempotent updates. * * @return this builder */ public CassandraSinkBuilder enableWriteAheadLog() { this.isWriteAheadLogEnabled = true; return this; } /** * Enables the write-ahead log, which allows exactly-once processing for non-deterministic * algorithms that use idempotent updates. * * @param committer CheckpointCommitter, that stores information about completed checkpoints * in an external resource. By default this information is stored within a separate * table within Cassandra. * @return this builder */ public CassandraSinkBuilder enableWriteAheadLog(CheckpointCommitter committer) { this.isWriteAheadLogEnabled = true; this.committer = committer; return this; } /** * Sets the mapper options for this sink. The mapper options are used to configure the * DataStax {@link com.datastax.driver.mapping.Mapper} when writing POJOs. * *

This call has no effect if the input {@link DataStream} for this sink does not contain * POJOs. * * @param options MapperOptions, that return an array of options that are used to configure * the DataStax mapper. * @return this builder */ public CassandraSinkBuilder setMapperOptions(MapperOptions options) { this.mapperOptions = options; return this; } /** * Sets the failure handler for this sink. The failure handler is used to provide custom * error handling. * * @param failureHandler CassandraFailureHandler, that handles any Throwable error. * @return this builder */ public CassandraSinkBuilder setFailureHandler(CassandraFailureHandler failureHandler) { this.failureHandler = failureHandler; return this; } /** * Sets the maximum allowed number of concurrent requests for this sink. * *

This call has no effect if {@link CassandraSinkBuilder#enableWriteAheadLog()} is * called. * * @param maxConcurrentRequests maximum number of concurrent requests allowed * @param timeout timeout duration when acquiring a permit to execute * @return this builder */ public CassandraSinkBuilder setMaxConcurrentRequests( int maxConcurrentRequests, Duration timeout) { this.configBuilder.setMaxConcurrentRequests(maxConcurrentRequests); this.configBuilder.setMaxConcurrentRequestsTimeout(timeout); return this; } /** * Sets the maximum allowed number of concurrent requests for this sink. * *

This call has no effect if {@link CassandraSinkBuilder#enableWriteAheadLog()} is * called. * * @param maxConcurrentRequests maximum number of concurrent requests allowed * @return this builder */ public CassandraSinkBuilder setMaxConcurrentRequests(int maxConcurrentRequests) { this.configBuilder.setMaxConcurrentRequests(maxConcurrentRequests); return this; } /** * Enables ignoring null values, treats null values as unset and avoids writing null fields * and creating tombstones. * *

This call has no effect if {@link CassandraSinkBuilder#enableWriteAheadLog()} is * called. * * @return this builder */ public CassandraSinkBuilder enableIgnoreNullFields() { this.configBuilder.setIgnoreNullFields(true); return this; } /** * Finalizes the configuration of this sink. * * @return finalized sink * @throws Exception */ public CassandraSink build() throws Exception { sanityCheck(); if (failureHandler == null) { failureHandler = new NoOpCassandraFailureHandler(); } return isWriteAheadLogEnabled ? createWriteAheadSink() : createSink(); } protected abstract CassandraSink createSink() throws Exception; protected abstract CassandraSink createWriteAheadSink() throws Exception; protected void sanityCheck() { if (builder == null) { throw new IllegalArgumentException( "Cassandra host information must be supplied using either setHost() or setClusterBuilder()."); } } } /** * Builder for a {@link CassandraTupleSink}. * * @param */ public static class CassandraTupleSinkBuilder extends CassandraSinkBuilder { public CassandraTupleSinkBuilder( DataStream input, TypeInformation typeInfo, TypeSerializer serializer) { super(input, typeInfo, serializer); } @Override protected void sanityCheck() { super.sanityCheck(); if (query == null || query.length() == 0) { throw new IllegalArgumentException("Query must not be null or empty."); } if (keyspace != null) { throw new IllegalArgumentException( "Specifying a default keyspace is only allowed when using a Pojo-Stream as input."); } } @Override public CassandraSink createSink() throws Exception { final CassandraTupleSink sink = new CassandraTupleSink<>(query, builder, configBuilder.build(), failureHandler); return new CassandraSink<>(input.addSink(sink).name("Cassandra Sink")); } @Override protected CassandraSink createWriteAheadSink() throws Exception { return committer == null ? new CassandraSink<>( input.transform( "Cassandra Sink", null, new CassandraTupleWriteAheadSink<>( query, serializer, builder, new CassandraCommitter(builder)))) : new CassandraSink<>( input.transform( "Cassandra Sink", null, new CassandraTupleWriteAheadSink<>( query, serializer, builder, committer))); } } /** Builder for a {@link CassandraRowSink}. */ public static class CassandraRowSinkBuilder extends CassandraSinkBuilder { public CassandraRowSinkBuilder( DataStream input, TypeInformation typeInfo, TypeSerializer serializer) { super(input, typeInfo, serializer); } @Override protected void sanityCheck() { super.sanityCheck(); if (query == null || query.length() == 0) { throw new IllegalArgumentException("Query must not be null or empty."); } if (keyspace != null) { throw new IllegalArgumentException( "Specifying a default keyspace is only allowed when using a Pojo-Stream as input."); } } @Override protected CassandraSink createSink() throws Exception { final CassandraRowSink sink = new CassandraRowSink( typeInfo.getArity(), query, builder, configBuilder.build(), failureHandler); return new CassandraSink<>(input.addSink(sink).name("Cassandra Sink")); } @Override protected CassandraSink createWriteAheadSink() throws Exception { return committer == null ? new CassandraSink<>( input.transform( "Cassandra Sink", null, new CassandraRowWriteAheadSink( query, serializer, builder, new CassandraCommitter(builder)))) : new CassandraSink<>( input.transform( "Cassandra Sink", null, new CassandraRowWriteAheadSink( query, serializer, builder, committer))); } } /** * Builder for a {@link CassandraPojoSink}. * * @param */ public static class CassandraPojoSinkBuilder extends CassandraSinkBuilder { public CassandraPojoSinkBuilder( DataStream input, TypeInformation typeInfo, TypeSerializer serializer) { super(input, typeInfo, serializer); } @Override protected void sanityCheck() { super.sanityCheck(); if (query != null) { throw new IllegalArgumentException( "Specifying a query is not allowed when using a Pojo-Stream as input."); } } @Override public CassandraSink createSink() throws Exception { final CassandraPojoSink sink = new CassandraPojoSink<>( typeInfo.getTypeClass(), builder, mapperOptions, keyspace, configBuilder.build(), failureHandler); return new CassandraSink<>(input.addSink(sink).name("Cassandra Sink")); } @Override protected CassandraSink createWriteAheadSink() throws Exception { throw new IllegalArgumentException( "Exactly-once guarantees can only be provided for tuple types."); } } /** * Builder for a {@link CassandraScalaProductSink}. * * @param */ public static class CassandraScalaProductSinkBuilder extends CassandraSinkBuilder { public CassandraScalaProductSinkBuilder( DataStream input, TypeInformation typeInfo, TypeSerializer serializer) { super(input, typeInfo, serializer); } @Override protected void sanityCheck() { super.sanityCheck(); if (query == null || query.length() == 0) { throw new IllegalArgumentException("Query must not be null or empty."); } if (keyspace != null) { throw new IllegalArgumentException( "Specifying a default keyspace is only allowed when using a Pojo-Stream as input."); } } @Override public CassandraSink createSink() throws Exception { final CassandraScalaProductSink sink = new CassandraScalaProductSink<>( query, builder, configBuilder.build(), failureHandler); return new CassandraSink<>(input.addSink(sink).name("Cassandra Sink")); } @Override protected CassandraSink createWriteAheadSink() throws Exception { throw new IllegalArgumentException( "Exactly-once guarantees can only be provided for flink tuple types."); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy