All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.debezium.connector.cassandra.Cassandra4CommitLogProcessor Maven / Gradle / Ivy

/*
 * Copyright Debezium Authors.
 *
 * Licensed under the Apache Software License version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0
 */
package io.debezium.connector.cassandra;

import static java.nio.file.StandardWatchEventKinds.ENTRY_CREATE;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.WatchEvent;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.commitlog.CommitLogPosition;
import org.apache.cassandra.db.commitlog.CommitLogReader;
import org.apache.commons.math3.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.debezium.DebeziumException;
import io.debezium.connector.base.ChangeEventQueue;
import io.debezium.connector.cassandra.exceptions.CassandraConnectorTaskException;

/**
 * The {@link Cassandra4CommitLogProcessor} is used to process CommitLog in CDC directory.
 * Upon readCommitLog, it processes the entire CommitLog specified in the {@link CassandraConnectorConfig}
 * and converts each row change in the commit log into a {@link Record},
 * and then emit the log via a {@link KafkaRecordEmitter}.
 */
public class Cassandra4CommitLogProcessor extends AbstractProcessor {
    private static final Logger LOGGER = LoggerFactory.getLogger(Cassandra4CommitLogProcessor.class);

    private static final String NAME = "Commit Log Processor";

    private final CassandraConnectorContext context;
    private final File cdcDir;
    private AbstractDirectoryWatcher watcher;
    private final List> queues;
    private final CommitLogProcessorMetrics metrics = new CommitLogProcessorMetrics();
    private boolean initial = true;
    private final boolean errorCommitLogReprocessEnabled;
    private final CommitLogTransfer commitLogTransfer;
    private final ExecutorService executorService;

    public Cassandra4CommitLogProcessor(CassandraConnectorContext context) {
        super(NAME, Duration.ZERO);
        this.context = context;
        executorService = Executors.newSingleThreadExecutor();
        queues = this.context.getQueues();
        commitLogTransfer = this.context.getCassandraConnectorConfig().getCommitLogTransfer();
        errorCommitLogReprocessEnabled = this.context.getCassandraConnectorConfig().errorCommitLogReprocessEnabled();
        cdcDir = new File(DatabaseDescriptor.getCDCLogLocation());
    }

    @Override
    public void initialize() {
        metrics.registerMetrics();
    }

    @Override
    public void destroy() {
        metrics.unregisterMetrics();
    }

    @Override
    public void stop() {
        try {
            executorService.shutdown();
            for (final Pair> submittedProcessing : submittedProcessings) {
                try {
                    submittedProcessing.getFirst().complete();
                    submittedProcessing.getSecond().get();
                }
                catch (final Exception ex) {
                    LOGGER.warn("Waiting for submitted task to finish has failed.");
                }
            }
        }
        catch (final Exception ex) {
            throw new RuntimeException("Unable to close executor service in CommitLogProcessor in a timely manner");
        }
        super.stop();
    }

    final static Set>> submittedProcessings = ConcurrentHashMap.newKeySet();

    private void submit(Path index) {
        CommitLogProcessingCallable callable = new CommitLogProcessingCallable(new LogicalCommitLog(index.toFile()),
                queues,
                metrics,
                Cassandra4CommitLogProcessor.this.context);

        Future future = executorService.submit(callable);
        submittedProcessings.add(new Pair<>(callable, future));
    }

    @Override
    public boolean isRunning() {
        return super.isRunning() && !executorService.isShutdown() && !executorService.isTerminated();
    }

    @Override
    public void process() throws IOException, InterruptedException {
        LOGGER.debug("Processing commitLogFiles while initial is {}", initial);

        if (watcher == null) {
            watcher = new AbstractDirectoryWatcher(cdcDir.toPath(),
                    this.context.getCassandraConnectorConfig().cdcDirPollInterval(),
                    Collections.singleton(ENTRY_CREATE)) {
                @Override
                void handleEvent(WatchEvent event, Path path) {
                    if (isRunning()) {
                        // react only on _cdc.idx files, run a thread which will basically wait until it is COMPLETED
                        // and then read it all at once.
                        // if another commit log is created in while this just submitted is being processed,
                        // since executor service is single-threaded, it will block until the previous log is processed,
                        // basically achieving sequential log processing
                        if (path.getFileName().toString().endsWith("_cdc.idx")) {
                            submit(path);
                        }
                    }
                }
            };
        }

        if (initial) {
            LOGGER.info("Reading existing commit logs in {}", cdcDir);
            File[] indexes = CommitLogUtil.getIndexes(cdcDir);
            Arrays.sort(indexes, CommitLogUtil::compareCommitLogsIndexes);
            for (File index : indexes) {
                if (isRunning()) {
                    submit(index.toPath());
                }
            }
            // If commit.log.error.reprocessing.enabled is set to true, download all error commitLog files upon starting for re-processing.
            if (errorCommitLogReprocessEnabled) {
                LOGGER.info("CommitLog Error Processing is enabled. Attempting to get all error commitLog files for re-processing.");
                // this will place it back to cdc dir so watched detects it hence it will be processed again
                commitLogTransfer.getErrorCommitLogFiles();
            }
            initial = false;
        }
        watcher.poll();
    }

    public static class ProcessingResult {
        public enum Result {
            OK,
            ERROR,
            DOES_NOT_EXIST,
            COMPLETED_PREMATURELY;
        }

        public final LogicalCommitLog commitLog;
        public final Result result;
        public final Exception ex;

        public ProcessingResult(LogicalCommitLog commitLog) {
            this(commitLog, Result.OK, null);
        }

        public ProcessingResult(LogicalCommitLog commitLog, Result result) {
            this(commitLog, result, null);
        }

        public ProcessingResult(LogicalCommitLog commitLog, Result result, Exception ex) {
            this.commitLog = commitLog;
            this.result = result;
            this.ex = ex;
        }

        @Override
        public String toString() {
            return "ProcessingResult{" +
                    "commitLog=" + commitLog +
                    ", result=" + result +
                    ", ex=" + (ex != null ? ex.getMessage() : "none") + '}';
        }
    }

    public static class CommitLogProcessingCallable implements Callable {

        private static final Logger LOGGER = LoggerFactory.getLogger(CommitLogProcessingCallable.class);

        private final LogicalCommitLog commitLog;
        private CommitLogReader commitLogReader;
        private final List> queues;
        private final CommitLogProcessorMetrics metrics;
        private final Cassandra4CommitLogReadHandlerImpl commitLogReadHandler;

        private final CommitLogTransfer commitLogTransfer;
        private final Set erroneousCommitLogs;
        private boolean completePrematurely = false;

        public CommitLogProcessingCallable(final LogicalCommitLog commitLog,
                                           final List> queues,
                                           final CommitLogProcessorMetrics metrics,
                                           CassandraConnectorContext context) {
            this.commitLog = commitLog;
            this.commitLogReader = new CommitLogReader();
            this.queues = queues;
            this.metrics = metrics;

            this.commitLogReadHandler = new Cassandra4CommitLogReadHandlerImpl(
                    context.getSchemaHolder(),
                    context.getQueues(),
                    context.getOffsetWriter(),
                    new RecordMaker(context.getCassandraConnectorConfig().tombstonesOnDelete(),
                            new Filters(context.getCassandraConnectorConfig().fieldExcludeList()),
                            context.getCassandraConnectorConfig()),
                    metrics);

            commitLogTransfer = context.getCassandraConnectorConfig().getCommitLogTransfer();
            erroneousCommitLogs = context.getErroneousCommitLogs();
        }

        public void complete() {
            completePrematurely = true;
        }

        private ProcessingResult callInternal() {
            if (!commitLog.exists()) {
                LOGGER.warn("Commit log " + commitLog + " does not exist!");
                return new ProcessingResult(commitLog, ProcessingResult.Result.DOES_NOT_EXIST);
            }

            LOGGER.info("Processing commit log {}", commitLog.log.toString());

            CommitLogPosition position = new CommitLogPosition(commitLog.commitLogSegmentId, 0);
            metrics.setCommitLogFilename(commitLog.log.toString());
            metrics.setCommitLogPosition(position.position);

            try {
                parseIndexFile();

                while (!commitLog.completed) {
                    if (completePrematurely) {
                        LOGGER.info("{} completed prematurely", commitLog.toString());
                        return new ProcessingResult(commitLog, ProcessingResult.Result.COMPLETED_PREMATURELY);
                    }
                    // TODO make this configurable maybe
                    Thread.sleep(10000);
                    parseIndexFile();
                }

                LOGGER.info(commitLog.toString());
            }
            catch (final Exception ex) {
                LOGGER.error("Processing of {} errored out", commitLog.toString());
                return new ProcessingResult(commitLog, ProcessingResult.Result.ERROR, ex);
            }

            ProcessingResult result;

            // process commit log from start to the end as it is completed at this point
            try {
                processCommitLog(commitLog, new CommitLogPosition(commitLog.commitLogSegmentId, 0));
                result = new ProcessingResult(commitLog);
            }
            catch (final Exception ex) {
                result = new ProcessingResult(commitLog, ProcessingResult.Result.ERROR, ex);
            }

            LOGGER.info("{}", result);

            return result;
        }

        @Override
        public ProcessingResult call() {
            ProcessingResult result = callInternal();
            Cassandra4CommitLogProcessor.submittedProcessings.remove(this);
            return result;
        }

        private void processCommitLog(LogicalCommitLog logicalCommitLog, CommitLogPosition position) {
            try {
                try {
                    commitLogReader.readCommitLogSegment(commitLogReadHandler, logicalCommitLog.log, position, false);
                    queues.get(Math.abs(logicalCommitLog.log.getName().hashCode() % queues.size())).enqueue(new EOFEvent(logicalCommitLog.log));
                }
                catch (Exception e) {
                    if (commitLogTransfer.getClass().getName().equals(CassandraConnectorConfig.DEFAULT_COMMIT_LOG_TRANSFER_CLASS)) {
                        throw new DebeziumException(String.format("Error occurred while processing commit log %s",
                                logicalCommitLog.log), e);
                    }
                    Cassandra4CommitLogProcessor.LOGGER.error("Error occurred while processing commit log " + logicalCommitLog.log, e);
                    queues.get(Math.abs(logicalCommitLog.log.getName().hashCode() % queues.size())).enqueue(new EOFEvent(logicalCommitLog.log));
                    erroneousCommitLogs.add(logicalCommitLog.log.getName());
                }
            }
            catch (InterruptedException e) {
                throw new CassandraConnectorTaskException(String.format(
                        "Enqueuing has been interrupted while enqueuing EOF Event for file %s", logicalCommitLog.log.getName()), e);
            }
        }

        private void parseIndexFile() throws DebeziumException {
            try {
                commitLog.parseCommitLogIndex();
            }
            catch (final DebeziumException ex) {
                erroneousCommitLogs.add(commitLog.log.getName());
                throw ex;
            }
        }
    }

    public static class LogicalCommitLog {
        CommitLogPosition commitLogPosition;
        File log;
        File index;
        long commitLogSegmentId;
        int offsetOfEndOfLastWrittenCDCMutation = 0;
        boolean completed = false;

        public LogicalCommitLog(File index) {
            this.index = index;
            this.log = parseCommitLogName(index);
            this.commitLogSegmentId = parseSegmentId(log);
            this.commitLogPosition = new CommitLogPosition(commitLogSegmentId, 0);
        }

        public static File parseCommitLogName(File index) {
            String newFileName = index.toPath().getFileName().toString().replace("_cdc.idx", ".log");
            return index.toPath().getParent().resolve(newFileName).toFile();
        }

        public static long parseSegmentId(File logName) {
            return Long.parseLong(logName.getName().split("-")[2].split("\\.")[0]);
        }

        public boolean exists() {
            return log.exists();
        }

        public void parseCommitLogIndex() throws DebeziumException {
            if (!index.exists()) {
                return;
            }
            try {
                List lines = Files.readAllLines(index.toPath(), StandardCharsets.UTF_8);
                if (lines.isEmpty()) {
                    return;
                }

                offsetOfEndOfLastWrittenCDCMutation = Integer.parseInt(lines.get(0));

                if (lines.size() == 2) {
                    completed = "COMPLETED".equals(lines.get(1));
                }
            }
            catch (final Exception ex) {
                throw new DebeziumException(String.format("Unable to parse commit log index file %s", index.toPath()),
                        ex);
            }
        }

        @Override
        public String toString() {
            return "LogicalCommitLog{" +
                    "commitLogPosition=" + commitLogPosition +
                    ", synced=" + offsetOfEndOfLastWrittenCDCMutation +
                    ", completed=" + completed +
                    ", log=" + log +
                    ", index=" + index +
                    ", commitLogSegmentId=" + commitLogSegmentId +
                    '}';
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy