io.streamthoughts.kafka.connect.filepulse.fs.DefaultFileSystemMonitor Maven / Gradle / Ivy
/*
* Copyright 2019-2021 StreamThoughts.
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.streamthoughts.kafka.connect.filepulse.fs;
import io.streamthoughts.kafka.connect.filepulse.clean.BatchFileCleanupPolicy;
import io.streamthoughts.kafka.connect.filepulse.clean.DelegateBatchFileCleanupPolicy;
import io.streamthoughts.kafka.connect.filepulse.clean.FileCleanupPolicy;
import io.streamthoughts.kafka.connect.filepulse.clean.FileCleanupPolicyResult;
import io.streamthoughts.kafka.connect.filepulse.clean.FileCleanupPolicyResultSet;
import io.streamthoughts.kafka.connect.filepulse.clean.GenericFileCleanupPolicy;
import io.streamthoughts.kafka.connect.filepulse.source.FileObject;
import io.streamthoughts.kafka.connect.filepulse.source.FileObjectKey;
import io.streamthoughts.kafka.connect.filepulse.source.FileObjectMeta;
import io.streamthoughts.kafka.connect.filepulse.source.FileObjectStatus;
import io.streamthoughts.kafka.connect.filepulse.source.SourceOffsetPolicy;
import io.streamthoughts.kafka.connect.filepulse.storage.StateBackingStore;
import io.streamthoughts.kafka.connect.filepulse.storage.StateSnapshot;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.connect.connector.ConnectorContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Predicate;
/**
* A default {@link FileSystemMonitor} that can be used to trigger file
*/
public class DefaultFileSystemMonitor implements FileSystemMonitor {
// TODO: Timeout should be user-configurable
private static final long TASK_CONFIGURATION_DEFAULT_TIMEOUT = 15000L;
private static final Logger LOG = LoggerFactory.getLogger(DefaultFileSystemMonitor.class);
private static final Duration ON_START_READ_END_LOG_TIMEOUT = Duration.ofSeconds(30);
private static final Duration DEFAULT_READ_END_LOG_TIMEOUT = Duration.ofSeconds(5);
private static final int MAX_SCHEDULE_ATTEMPTS = 3;
private final FileSystemListing> fsListing;
private final StateBackingStore store;
// List of files currently being processed by tasks.
private final Map scheduled = new ConcurrentHashMap<>();
// List of files to be scheduled.
private final Map scanned = new ConcurrentHashMap<>();
// List of files that are cleanable.
private final LinkedBlockingQueue cleanable = new LinkedBlockingQueue<>();
private StateSnapshot fileState;
private final SourceOffsetPolicy offsetPolicy;
private final BatchFileCleanupPolicy cleaner;
private final Long allowTasksReconfigurationAfterTimeoutMs;
private Long nextAllowedTasksReconfiguration = -1L;
private final AtomicBoolean taskReconfigurationRequested = new AtomicBoolean(false);
private final AtomicBoolean running = new AtomicBoolean(false);
private final AtomicBoolean changed = new AtomicBoolean(false);
private final AtomicBoolean fileSystemListingEnabled = new AtomicBoolean(true);
private final Predicate cleanablePredicate;
private final TaskFileOrder taskFileOrder;
/**
* Creates a new {@link DefaultFileSystemMonitor} instance.
*
* @param allowTasksReconfigurationAfterTimeoutMs {@code true} to allow tasks reconfiguration after a timeout.
* @param fsListening the {@link FileSystemListing} to be used for listing object files.
* @param cleanPolicy the {@link GenericFileCleanupPolicy} to be used for cleaning object files.
* @param offsetPolicy the {@link SourceOffsetPolicy} to be used computing offset for object fileS.
* @param store the {@link StateBackingStore} used for storing object file cursor.
*/
public DefaultFileSystemMonitor(final Long allowTasksReconfigurationAfterTimeoutMs,
final FileSystemListing> fsListening,
final GenericFileCleanupPolicy cleanPolicy,
final Predicate cleanablePredicate,
final SourceOffsetPolicy offsetPolicy,
final StateBackingStore store,
final TaskFileOrder taskFileOrder) {
Objects.requireNonNull(fsListening, "'fsListening' should not be null");
Objects.requireNonNull(cleanPolicy, "'cleanPolicy' should not be null");
Objects.requireNonNull(offsetPolicy, "'offsetPolicy' should not be null");
Objects.requireNonNull(store, "'store' should not null");
Objects.requireNonNull(cleanablePredicate, "'cleanablePredicate' should not null");
Objects.requireNonNull(taskFileOrder, "'taskFileOrder' should not null");
this.fsListing = fsListening;
this.allowTasksReconfigurationAfterTimeoutMs = allowTasksReconfigurationAfterTimeoutMs;
this.cleanablePredicate = cleanablePredicate;
this.taskFileOrder = taskFileOrder;
if (cleanPolicy instanceof FileCleanupPolicy) {
this.cleaner = new DelegateBatchFileCleanupPolicy((FileCleanupPolicy) cleanPolicy);
} else if (cleanPolicy instanceof BatchFileCleanupPolicy) {
this.cleaner = (BatchFileCleanupPolicy) cleanPolicy;
} else {
throw new IllegalArgumentException("Cleaner must be one of 'FileCleanupPolicy', "
+ "'BatchFileCleanupPolicy'" + " not " + cleanPolicy.getClass().getName());
}
this.cleaner.setStorage(fsListening.storage());
this.offsetPolicy = offsetPolicy;
this.store = store;
LOG.info("Initializing FileSystemMonitor");
// The listener is not call until the store is fully STARTED.
this.store.setUpdateListener(new StateBackingStore.UpdateListener<>() {
@Override
public void onStateRemove(final String key) {
/* ignore */
}
@Override
public void onStateUpdate(final String key, final FileObject object) {
final FileObjectKey objectId = FileObjectKey.of(key);
final FileObjectStatus status = object.status();
LOG.debug("Received status '{} 'for: {}", status, object);
if (cleanablePredicate.test(status)) {
cleanable.add(object.withKey(objectId));
// We should always try to remove the object key from the list
// of last scanned files to avoid scheduling an object file that has just been completed.
if (scanned.remove(objectId) != null) {
changed.set(true);
}
} else if (status.isOneOf(FileObjectStatus.CLEANED, FileObjectStatus.INVALID)) {
final FileObjectMeta removed = scheduled.remove(objectId);
if (removed == null && status.isOneOf(FileObjectStatus.CLEANED)) {
LOG.debug(
"Received cleaned status but no object-file currently scheduled for: '{}'. " +
"This warn should only occurred during recovering step",
key
);
}
}
}
});
if (!this.store.isStarted()) {
this.store.start();
} else {
LOG.warn("The StateBackingStore used to synchronize this connector " +
"with tasks processing files is already started. You can ignore that warning if the connector " +
" is recovering from a crash or resuming after being paused.");
}
readStatesToEnd(ON_START_READ_END_LOG_TIMEOUT);
recoverPreviouslyCompletedSources();
// Trigger a cleanup during initialization to ensure that all cleanable
// object-files are eventually removed before scheduling any tasks.
cleanUpCompletedFiles();
LOG.info("Initialized FileSystemMonitor");
}
private void recoverPreviouslyCompletedSources() {
LOG.info("Recovering completed files from a previous execution");
fileState.states()
.entrySet()
.stream()
.map(it -> it.getValue().withKey(FileObjectKey.of(it.getKey())))
.filter(it -> cleanablePredicate.test(it.status()))
.forEach(cleanable::add);
LOG.info("Finished recovering previously completed files : " + cleanable);
}
private boolean readStatesToEnd(final Duration timeout) {
try {
store.refresh(timeout.toMillis(), TimeUnit.MILLISECONDS);
fileState = store.snapshot();
LOG.debug(
"Finished reading to end of log and updated states snapshot, new states log position: {}",
fileState.offset());
return true;
} catch (TimeoutException e) {
LOG.warn("Failed to reach end of states log quickly enough", e);
return false;
}
}
/**
* {@inheritDoc}
*/
@Override
public void invoke(final ConnectorContext context) {
// It seems to be OK to always run cleanup even if connector is not yet started or is being shut down.
cleanUpCompletedFiles();
if (running.get() && fileSystemListingEnabled.get()) {
if (!taskReconfigurationRequested.get()) {
if (updateFiles()) {
LOG.info("Requesting task reconfiguration");
taskReconfigurationRequested.set(true);
context.requestTaskReconfiguration();
}
} else {
LOG.info("Task reconfiguration requested. Skip filesystem listing.");
}
} else if (fileSystemListingEnabled.get()) {
LOG.info("The connector is not completely started or is being shut down. Skip filesystem listing.");
}
}
/**
* {@inheritDoc}
*/
@Override
public void setFileSystemListingEnabled(final boolean enabled) {
this.fileSystemListingEnabled.set(enabled);
}
private void cleanUpCompletedFiles() {
if (cleanable.isEmpty()) {
LOG.debug("Skipped cleanup. No object file completed.");
return;
}
LOG.info("Cleaning up completed object files '{}'", cleanable.size());
final List cleanable = new ArrayList<>(this.cleanable.size());
this.cleanable.drainTo(cleanable);
FileCleanupPolicyResultSet cleaned = cleaner.apply(cleanable);
cleaned.forEach((fileObject, result) -> {
if (result.equals(FileCleanupPolicyResult.SUCCEED)) {
final String key = fileObject.key().get().original();
store.putAsync(key, fileObject.withStatus(FileObjectStatus.CLEANED));
} else {
LOG.warn("Postpone clean up for object file: '{}'", fileObject.metadata().stringURI());
this.cleanable.add(fileObject);
}
});
LOG.info("Finished cleaning all completed object files");
}
private synchronized boolean updateFiles() {
final boolean noScheduledFiles = scheduled.isEmpty();
if (!noScheduledFiles && allowTasksReconfigurationAfterTimeoutMs == Long.MAX_VALUE) {
LOG.info(
"Scheduled files still being processed: {}. Skip filesystem listing while waiting for tasks completion",
scheduled.size()
);
return false;
}
boolean toEnd = readStatesToEnd(DEFAULT_READ_END_LOG_TIMEOUT);
if (noScheduledFiles && !toEnd) {
LOG.warn("Failed to read state changelog. Skip filesystem listing due to timeout");
return false;
}
LOG.info("Starting to list object files using: {}", fsListing.getClass().getSimpleName());
long started = Time.SYSTEM.milliseconds();
final Collection objects = fsListing.listObjects();
long took = Time.SYSTEM.milliseconds() - started;
LOG.info("Completed object files listing. '{}' object files found in {}ms", objects.size(), took);
final StateSnapshot snapshot = store.snapshot();
final Map toScheduled = FileObjectCandidatesFilter.filter(
offsetPolicy,
fileObjectKey -> {
final FileObject fileObject = snapshot.getForKey(fileObjectKey.original());
if (fileObject == null) return true;
final FileObjectStatus status = fileObject.status();
return !(cleanablePredicate.test(status) || status.isDone());
},
objects
);
// Some scheduled files are still being processed, but new files are detected
if (!noScheduledFiles) {
if (scheduled.keySet().containsAll(toScheduled.keySet())) {
LOG.info(
"Scheduled files still being processed ({}) and no new files found. Skip task reconfiguration",
scheduled.size()
);
return false;
}
if (nextAllowedTasksReconfiguration == -1) {
nextAllowedTasksReconfiguration = Time.SYSTEM.milliseconds() + allowTasksReconfigurationAfterTimeoutMs;
}
long timeout = Math.max(0, nextAllowedTasksReconfiguration - Time.SYSTEM.milliseconds());
if (timeout > 0) {
LOG.info(
"Scheduled files still being processed ({}) but new files detected. " +
"Waiting for {} ms before allowing task reconfiguration",
scheduled.size(),
timeout
);
return false;
}
}
nextAllowedTasksReconfiguration = -1L;
scanned.putAll(toScheduled);
notifyAll();
LOG.info("Finished lookup for new object files: '{}' files can be scheduled for processing", scanned.size());
// Only return true if the status is started, i.e. if this was not the first filesystem scan
// This is used to not trigger task reconfiguration before the connector is fully started.
return !scanned.isEmpty() && running.get();
}
/**
* {@inheritDoc}
*/
@Override
public List listFilesToSchedule(final int maxFilesToSchedule) {
if (!running.get()) {
// This is the first call of partitionFilesAndGet, hence the connector is starting or restarting after
// a configuration update. An empty list must be returned to ensure that all running tasks is stopped
// before scheduling new object files.
LOG.info("Started FileSystemMonitor");
running.set(true);
return Collections.emptyList();
}
try {
long started = Time.SYSTEM.milliseconds();
long now = started;
while (scanned.isEmpty() && now - started < TASK_CONFIGURATION_DEFAULT_TIMEOUT) {
try {
synchronized (this) {
LOG.info("No file to be scheduled, waiting for next filesystem scan execution");
wait(Math.max(0, TASK_CONFIGURATION_DEFAULT_TIMEOUT - (now - started)));
}
} catch (InterruptedException ignore) {
}
now = Time.SYSTEM.milliseconds();
}
List partitions = new LinkedList<>();
// Re-check if there is still object files that may be scheduled.
if (!scanned.isEmpty()) {
int attempts = 0;
do {
changed.set(false);
LOG.info(
"Preparing next scheduling using the object files found during last iteration (attempt={}/{}).",
attempts + 1,
MAX_SCHEDULE_ATTEMPTS
);
// Try to read states to end to make sure we do not attempt
// to schedule an object file that has been cleanup.
final boolean toEnd = readStatesToEnd(DEFAULT_READ_END_LOG_TIMEOUT);
if (!toEnd) {
LOG.warn("Failed to read state changelog while scheduling object files. Timeout.");
}
// Check if all scanned object-files can be schedule.
if (scanned.size() <= maxFilesToSchedule) {
scheduled.putAll(scanned);
} else {
final Iterator> it = scanned.entrySet().iterator();
while (scheduled.size() < maxFilesToSchedule && it.hasNext()) {
final Map.Entry next = it.next();
scheduled.put(next.getKey(), next.getValue());
}
}
partitions = new ArrayList<>(scheduled.values());
attempts++;
if (changed.get()) {
if (attempts == MAX_SCHEDULE_ATTEMPTS) {
LOG.warn(
"Failed to prepare the object files after attempts: {}.",
MAX_SCHEDULE_ATTEMPTS
);
// Make sure to clear the schedule list before returning.
scheduled.clear();
return Collections.emptyList();
} else {
LOG.warn("State updates was received while preparing the object files to be scheduled");
}
}
} while (changed.get() && attempts < MAX_SCHEDULE_ATTEMPTS);
}
if (partitions.isEmpty()) {
LOG.warn(
"Filesystem could not be scanned quickly enough, " +
"or no object file was detected after starting the connector."
);
}
return taskFileOrder.sort(partitions);
} finally {
scanned.clear();
taskReconfigurationRequested.set(false);
}
}
/**
* {@inheritDoc}
*/
@Override
public void close() {
if (running.compareAndSet(true, false)) {
try {
LOG.info("Closing FileSystemMonitor resources");
readStatesToEnd(DEFAULT_READ_END_LOG_TIMEOUT);
cleanUpCompletedFiles();
LOG.info("Closed FileSystemMonitor resources");
} catch (final Exception e) {
LOG.warn("Unexpected error while closing FileSystemMonitor.", e);
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy