All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.connectors.cassandra.CassandraCommitter Maven / Gradle / Ivy

There is a newer version: 3.2.0-1.19
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.connectors.cassandra;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.java.ClosureCleaner;
import org.apache.flink.streaming.runtime.operators.CheckpointCommitter;

import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/**
 * CheckpointCommitter that saves information about completed checkpoints within a separate table in
 * a cassandra database.
 *
 * 

Entries are in the form |operator_id | subtask_id | last_completed_checkpoint| */ public class CassandraCommitter extends CheckpointCommitter { private static final long serialVersionUID = 1L; private final ClusterBuilder builder; private transient Cluster cluster; private transient Session session; private String keySpace = "flink_auxiliary"; private String table = "checkpoints_"; /** * A cache of the last committed checkpoint ids per subtask index. This is used to avoid * redundant round-trips to Cassandra (see {@link #isCheckpointCommitted(int, long)}. */ private final Map lastCommittedCheckpoints = new HashMap<>(); public CassandraCommitter(ClusterBuilder builder) { this.builder = builder; ClosureCleaner.clean(builder, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); } public CassandraCommitter(ClusterBuilder builder, String keySpace) { this(builder); this.keySpace = keySpace; } /** Internally used to set the job ID after instantiation. */ public void setJobId(String id) throws Exception { super.setJobId(id); table += id; } /** * Generates the necessary tables to store information. * * @throws Exception */ @Override public void createResource() throws Exception { cluster = builder.getCluster(); session = cluster.connect(); session.execute( String.format( "CREATE KEYSPACE IF NOT EXISTS %s with replication={'class':'SimpleStrategy', 'replication_factor':1};", keySpace)); session.execute( String.format( "CREATE TABLE IF NOT EXISTS %s.%s (sink_id text, sub_id int, checkpoint_id bigint, PRIMARY KEY (sink_id, sub_id));", keySpace, table)); try { session.close(); } catch (Exception e) { LOG.error("Error while closing session.", e); } try { cluster.close(); } catch (Exception e) { LOG.error("Error while closing cluster.", e); } } @Override public void open() throws Exception { if (builder == null) { throw new RuntimeException("No ClusterBuilder was set."); } cluster = builder.getCluster(); session = cluster.connect(); } @Override public void close() throws Exception { this.lastCommittedCheckpoints.clear(); try { session.close(); } catch (Exception e) { LOG.error("Error while closing session.", e); } try { cluster.close(); } catch (Exception e) { LOG.error("Error while closing cluster.", e); } } @Override public void commitCheckpoint(int subtaskIdx, long checkpointId) { String statement = String.format( "UPDATE %s.%s set checkpoint_id=%d where sink_id='%s' and sub_id=%d;", keySpace, table, checkpointId, operatorId, subtaskIdx); session.execute(statement); lastCommittedCheckpoints.put(subtaskIdx, checkpointId); } @Override public boolean isCheckpointCommitted(int subtaskIdx, long checkpointId) { // Pending checkpointed buffers are committed in ascending order of their // checkpoint id. This way we can tell if a checkpointed buffer was committed // just by asking the third-party storage system for the last checkpoint id // committed by the specified subtask. Long lastCommittedCheckpoint = lastCommittedCheckpoints.get(subtaskIdx); if (lastCommittedCheckpoint == null) { String statement = String.format( "SELECT checkpoint_id FROM %s.%s where sink_id='%s' and sub_id=%d;", keySpace, table, operatorId, subtaskIdx); Iterator resultIt = session.execute(statement).iterator(); if (resultIt.hasNext()) { lastCommittedCheckpoint = resultIt.next().getLong("checkpoint_id"); lastCommittedCheckpoints.put(subtaskIdx, lastCommittedCheckpoint); } } return lastCommittedCheckpoint != null && checkpointId <= lastCommittedCheckpoint; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy