org.springframework.integration.file.FileReadingMessageSource Maven / Gradle / Ivy
Show all versions of spring-integration-file Show documentation
/*
* Copyright 2002-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.integration.file;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.StandardWatchEventKinds;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Predicate;
import org.springframework.context.Lifecycle;
import org.springframework.integration.endpoint.AbstractMessageSource;
import org.springframework.integration.file.filters.DiscardAwareFileListFilter;
import org.springframework.integration.file.filters.FileListFilter;
import org.springframework.integration.file.filters.ResettableFileListFilter;
import org.springframework.integration.support.AbstractIntegrationMessageBuilder;
import org.springframework.integration.support.management.ManageableLifecycle;
import org.springframework.lang.Nullable;
import org.springframework.messaging.Message;
import org.springframework.util.Assert;
/**
* {@link org.springframework.integration.core.MessageSource} that creates messages
* from a file system directory.
* To prevent messages for certain files, you may supply a {@link FileListFilter}.
* By default, when configuring with XML or the DSL,
* an {@link org.springframework.integration.file.filters.AcceptOnceFileListFilter} is used.
* It ensures files are picked up only once from the directory.
*
* A common problem with reading files is that a file may be detected before it
* is ready. The default
* {@link org.springframework.integration.file.filters.AcceptOnceFileListFilter}
* does not prevent this. In most cases, this can be prevented if the
* file-writing process renames each file as soon as it is ready for reading. A
* pattern-matching filter that accepts only files that are ready (e.g. based on
* a known suffix), composed with the default
* {@link org.springframework.integration.file.filters.AcceptOnceFileListFilter}
* would allow for this.
*
* If a external {@link DirectoryScanner} is used, then the {@link FileLocker}
* and {@link FileListFilter} objects should be set on the external
* {@link DirectoryScanner}, not the instance of FileReadingMessageSource. An
* {@link IllegalStateException} will result otherwise.
*
* A {@link Comparator} can be used to ensure internal ordering of the Files in
* a {@link PriorityBlockingQueue}. This does not provide the same guarantees as
* a {@link org.springframework.integration.aggregator.ResequencingMessageGroupProcessor},
* but in cases where writing files
* and failure downstream are rare it might be sufficient.
*
* FileReadingMessageSource is fully thread-safe under concurrent
* receive()
invocations and message delivery callbacks.
*
* @author Iwein Fuld
* @author Mark Fisher
* @author Oleg Zhurakousky
* @author Gary Russell
* @author Artem Bilan
* @author Steven Pearce
* @author Patryk Ziobron
*/
public class FileReadingMessageSource extends AbstractMessageSource implements ManageableLifecycle {
private static final int DEFAULT_INTERNAL_QUEUE_CAPACITY = 5;
private final AtomicBoolean running = new AtomicBoolean();
/*
* {@link PriorityBlockingQueue#iterator()} throws
* {@link java.util.ConcurrentModificationException} in Java 5.
* There is no locking around the queue, so there is also no iteration.
*/
private final Queue toBeReceived;
private File directory;
private DirectoryScanner scanner = new DefaultDirectoryScanner();
private boolean scannerExplicitlySet;
private boolean autoCreateDirectory = true;
private boolean scanEachPoll = false;
private FileListFilter filter;
private FileLocker locker;
private boolean useWatchService;
private WatchEventType[] watchEvents = {WatchEventType.CREATE};
private int watchMaxDepth = Integer.MAX_VALUE;
private Predicate watchDirPredicate = path -> true;
/**
* Create a FileReadingMessageSource with a naturally ordered queue of unbounded capacity.
*/
public FileReadingMessageSource() {
this(null);
}
/**
* Create a FileReadingMessageSource with a bounded queue of the given
* capacity. This can be used to reduce the memory footprint of this
* component when reading from a large directory.
* @param internalQueueCapacity
* the size of the queue used to cache files to be received
* internally. This queue can be made larger to optimize the
* directory scanning. With scanEachPoll set to false and the
* queue to a large size, it will be filled once and then
* completely emptied before a new directory listing is done.
* This is particularly useful to reduce scans of large numbers
* of files in a directory.
*/
public FileReadingMessageSource(int internalQueueCapacity) {
this(null);
Assert.isTrue(internalQueueCapacity > 0, "Cannot create a queue with non positive capacity");
this.scanner = new HeadDirectoryScanner(internalQueueCapacity);
}
/**
* Create a FileReadingMessageSource with a {@link PriorityBlockingQueue}
* ordered with the passed in {@link Comparator}.
* The size of the queue used should be large enough to hold all the files
* in the input directory in order to sort all of them, so restricting the
* size of the queue is mutually exclusive with ordering. No guarantees
* about file delivery order can be made under concurrent access.
* @param receptionOrderComparator the comparator to be used to order the files in the internal queue
*/
public FileReadingMessageSource(@Nullable Comparator receptionOrderComparator) {
this.toBeReceived = new PriorityBlockingQueue<>(DEFAULT_INTERNAL_QUEUE_CAPACITY, receptionOrderComparator);
}
/**
* Specify the input directory.
* @param directory to monitor
*/
public void setDirectory(File directory) {
Assert.notNull(directory, "directory must not be null");
this.directory = directory;
}
/**
* Optionally specify a custom scanner, for example the
* {@link WatchServiceDirectoryScanner}.
* @param scanner scanner implementation
*/
public void setScanner(DirectoryScanner scanner) {
Assert.notNull(scanner, "'scanner' must not be null.");
this.scanner = scanner;
this.scannerExplicitlySet = true;
}
/**
* The {@link #scanner} property accessor to allow to modify its options
* ({@code filter}, {@code locker} etc.) at runtime using the
* {@link FileReadingMessageSource} bean.
* @return the {@link DirectoryScanner} of this {@link FileReadingMessageSource}.
* @since 4.2
*/
public DirectoryScanner getScanner() {
return this.scanner;
}
/**
* Specify whether to create the source directory automatically if it does
* not yet exist upon initialization. By default, this value is
* true. If set to false and the
* source directory does not exist, an Exception will be thrown upon
* initialization.
* @param autoCreateDirectory
* should the directory to be monitored be created when this
* component starts up?
*/
public void setAutoCreateDirectory(boolean autoCreateDirectory) {
this.autoCreateDirectory = autoCreateDirectory;
}
/**
* Set a {@link FileListFilter}.
* By default a {@link org.springframework.integration.file.filters.AcceptOnceFileListFilter}
* with no bounds is used. In most cases a customized {@link FileListFilter} will
* be needed to deal with modification and duplication concerns.
* If multiple filters are required a
* {@link org.springframework.integration.file.filters.CompositeFileListFilter}
* can be used to group them together.
* The supplied filter must be thread safe..
* @param filter a filter
*/
public void setFilter(FileListFilter filter) {
Assert.notNull(filter, "'filter' must not be null");
this.filter = filter;
}
/**
* Set a {@link FileLocker} to be used to guard files against duplicate processing.
* The supplied FileLocker must be thread safe
* @param locker a locker
*/
public void setLocker(FileLocker locker) {
Assert.notNull(locker, "'fileLocker' must not be null.");
this.locker = locker;
}
/**
* Set this flag if you want to make sure the internal queue is
* refreshed with the latest content of the input directory on each poll.
*
* By default, this implementation will empty its queue before looking at the
* directory again. In cases where order is relevant it is important to
* consider the effects of setting this flag. The internal
* {@link java.util.concurrent.BlockingQueue} that this class is keeping
* will more likely be out of sync with the file system if this flag is set
* to false, but it will change more often (causing expensive reordering) if it is set to true.
* @param scanEachPoll whether the component should re-scan (as opposed to not
* rescanning until the entire backlog has been delivered)
*/
public void setScanEachPoll(boolean scanEachPoll) {
this.scanEachPoll = scanEachPoll;
}
/**
* Switch this {@link FileReadingMessageSource} to use its internal
* {@link FileReadingMessageSource.WatchServiceDirectoryScanner}.
* @param useWatchService the {@code boolean} flag to switch to
* {@link FileReadingMessageSource.WatchServiceDirectoryScanner} on {@code true}.
* @since 4.3
* @see #setWatchEvents
*/
public void setUseWatchService(boolean useWatchService) {
this.useWatchService = useWatchService;
}
public boolean isUseWatchService() {
return this.useWatchService;
}
/**
* The {@link WatchService} event types.
* If {@link #setUseWatchService} isn't {@code true}, this option is ignored.
* @param watchEvents the set of {@link WatchEventType}.
* @since 4.3
* @see #setUseWatchService
*/
public void setWatchEvents(WatchEventType... watchEvents) {
Assert.notEmpty(watchEvents, "'watchEvents' must not be empty.");
Assert.noNullElements(watchEvents, "'watchEvents' must not contain null elements.");
Assert.state(!this.running.get(), "Cannot change watch events while running.");
this.watchEvents = Arrays.copyOf(watchEvents, watchEvents.length);
}
/**
* Set a max depth for the {@link Files#walkFileTree(Path, Set, int, FileVisitor)} API when
* {@link #useWatchService} is enabled.
* Defaults to {@link Integer#MAX_VALUE} - walk the whole tree.
* @param watchMaxDepth the depth for {@link Files#walkFileTree(Path, Set, int, FileVisitor)}.
* @since 6.1
*/
public void setWatchMaxDepth(int watchMaxDepth) {
this.watchMaxDepth = watchMaxDepth;
}
/**
* Set a {@link Predicate} to check a directory in the {@link Files#walkFileTree(Path, Set, int, FileVisitor)} call
* if it is eligible for {@link WatchService}.
* @param watchDirPredicate the {@link Predicate} to check dirs for walking.
* @since 6.1
*/
public void setWatchDirPredicate(Predicate watchDirPredicate) {
Assert.notNull(watchDirPredicate, "'watchDirPredicate' must not be null.");
this.watchDirPredicate = watchDirPredicate;
}
@Override
public String getComponentType() {
return "file:inbound-channel-adapter";
}
@Override
public void start() {
if (!this.running.getAndSet(true)) {
if (!this.directory.exists() && this.autoCreateDirectory && !this.directory.mkdirs()) {
throw new IllegalStateException("Cannot create directory or its parents: " + this.directory);
}
Assert.isTrue(this.directory.exists(),
() -> "Source directory [" + this.directory + "] does not exist.");
Assert.isTrue(this.directory.isDirectory(),
() -> "Source path [" + this.directory + "] does not point to a directory.");
Assert.isTrue(this.directory.canRead(),
() -> "Source directory [" + this.directory + "] is not readable.");
if (this.scanner instanceof Lifecycle lifecycle) {
lifecycle.start();
}
}
}
@Override
public void stop() {
if (this.running.getAndSet(false) && this.scanner instanceof Lifecycle lifecycle) {
lifecycle.stop();
}
}
@Override
public boolean isRunning() {
return this.running.get();
}
@Override
protected void onInit() {
Assert.notNull(this.directory, "'directory' must not be null");
Assert.state(!(this.scannerExplicitlySet && this.useWatchService),
() -> "The 'scanner' and 'useWatchService' options are mutually exclusive: " + this.scanner);
if (this.useWatchService) {
this.scanner = new WatchServiceDirectoryScanner();
}
// Check that the filter and locker options are _NOT_ set if an external scanner has been set.
// The external scanner is responsible for the filter and locker options in that case.
Assert.state(!(this.scannerExplicitlySet && (this.filter != null || this.locker != null)),
() -> "When using an external scanner the 'filter' and 'locker' options should not be used. " +
"Instead, set these options on the external DirectoryScanner: " + this.scanner);
if (this.filter != null) {
this.scanner.setFilter(this.filter);
}
if (this.locker != null) {
this.scanner.setLocker(this.locker);
}
}
@Override
protected AbstractIntegrationMessageBuilder doReceive() {
// rescan only if needed or explicitly configured
if (this.scanEachPoll || this.toBeReceived.isEmpty()) {
scanInputDirectory();
}
File file = this.toBeReceived.poll();
// file == null means the queue was empty
// we can't rely on isEmpty for concurrency reasons
while ((file != null) && !this.scanner.tryClaim(file)) {
file = this.toBeReceived.poll();
}
if (file != null) {
return getMessageBuilderFactory()
.withPayload(file)
.setHeader(FileHeaders.RELATIVE_PATH, this.directory.toPath().relativize(file.toPath()).toString())
.setHeader(FileHeaders.FILENAME, file.getName())
.setHeader(FileHeaders.ORIGINAL_FILE, file);
}
return null;
}
private void scanInputDirectory() {
List filteredFiles = this.scanner.listFiles(this.directory);
Set freshFiles = new LinkedHashSet<>(filteredFiles);
if (!freshFiles.isEmpty()) {
this.toBeReceived.addAll(freshFiles);
logger.debug(() -> "Added to queue: " + freshFiles);
}
}
/**
* Adds the failed message back to the 'toBeReceived' queue if there is room.
* @param failedMessage the {@link Message} that failed
*/
public void onFailure(Message failedMessage) {
logger.warn(() -> "Failed to send: " + failedMessage);
this.toBeReceived.offer(failedMessage.getPayload());
}
public enum WatchEventType {
CREATE(StandardWatchEventKinds.ENTRY_CREATE),
MODIFY(StandardWatchEventKinds.ENTRY_MODIFY),
DELETE(StandardWatchEventKinds.ENTRY_DELETE);
private final WatchEvent.Kind kind;
WatchEventType(WatchEvent.Kind kind) {
this.kind = kind;
}
}
private final class WatchServiceDirectoryScanner extends DefaultDirectoryScanner implements ManageableLifecycle {
private final ConcurrentMap pathKeys = new ConcurrentHashMap<>();
private final Set filesToPoll = ConcurrentHashMap.newKeySet();
private WatchService watcher;
private WatchEvent.Kind>[] kinds;
@Override
public void setFilter(FileListFilter filter) {
if (filter instanceof DiscardAwareFileListFilter discardAwareFileListFilter) {
discardAwareFileListFilter.addDiscardCallback(this.filesToPoll::add);
}
super.setFilter(filter);
}
@Override
public void start() {
try {
this.watcher = FileSystems.getDefault().newWatchService();
}
catch (IOException ex) {
logger.error(ex, () -> "Failed to create watcher for " + FileReadingMessageSource.this.directory);
}
this.kinds = new WatchEvent.Kind>[FileReadingMessageSource.this.watchEvents.length];
for (int i = 0; i < FileReadingMessageSource.this.watchEvents.length; i++) {
this.kinds[i] = FileReadingMessageSource.this.watchEvents[i].kind;
}
Set initialFiles = walkDirectory(FileReadingMessageSource.this.directory.toPath(), null);
initialFiles.addAll(filesFromEvents());
this.filesToPoll.addAll(initialFiles);
}
@Override
public void stop() {
try {
this.watcher.close();
this.watcher = null;
this.pathKeys.clear();
}
catch (IOException ex) {
logger.error(ex, () -> "Failed to close watcher for " + FileReadingMessageSource.this.directory);
}
}
@Override
public boolean isRunning() {
return true;
}
@Override
protected File[] listEligibleFiles(File directory) {
Assert.state(this.watcher != null, "The WatchService hasn't been started");
Set files = new LinkedHashSet<>();
for (Iterator iterator = this.filesToPoll.iterator(); iterator.hasNext(); ) {
files.add(iterator.next());
iterator.remove();
}
files.addAll(filesFromEvents());
return files.toArray(new File[0]);
}
private Set filesFromEvents() {
WatchKey key = this.watcher.poll();
Set files = new LinkedHashSet<>();
while (key != null) {
File parentDir = ((Path) key.watchable()).toAbsolutePath().toFile();
for (WatchEvent> event : key.pollEvents()) {
if (StandardWatchEventKinds.ENTRY_CREATE.equals(event.kind()) ||
StandardWatchEventKinds.ENTRY_MODIFY.equals(event.kind()) ||
StandardWatchEventKinds.ENTRY_DELETE.equals(event.kind())) {
processFilesFromNormalEvent(files, parentDir, event);
}
else if (StandardWatchEventKinds.OVERFLOW.equals(event.kind())) {
processFilesFromOverflowEvent(files, event);
}
}
key.reset();
key = this.watcher.poll();
}
return files;
}
private void processFilesFromNormalEvent(Set files, File parentDir, WatchEvent> event) {
Path item = (Path) event.context();
File file = new File(parentDir, item.toFile().getName());
logger.debug(() -> "Watch event [" + event.kind() + "] for file [" + file + "]");
if (StandardWatchEventKinds.ENTRY_DELETE.equals(event.kind())) {
Path filePath = file.toPath();
if (this.pathKeys.containsKey(filePath)) {
WatchKey watchKey = this.pathKeys.remove(filePath);
watchKey.cancel();
}
if (getFilter() instanceof ResettableFileListFilter resettableFileListFilter) {
resettableFileListFilter.remove(file);
}
boolean fileRemoved = files.remove(file);
if (fileRemoved) {
logger.debug(() -> "The file [" + file +
"] has been removed from the queue because of DELETE event.");
}
}
else {
if (file.exists()) {
if (file.isDirectory()) {
files.addAll(walkDirectory(file.toPath(), event.kind()));
}
else {
files.remove(file);
files.add(file);
}
}
else {
logger.debug(() -> "A file [" + file + "] for the event [" + event.kind() +
"] doesn't exist. Ignored. Maybe DELETE event is not watched ?");
}
}
}
private void processFilesFromOverflowEvent(Set files, WatchEvent> event) {
logger.debug(() -> "Watch event [" + StandardWatchEventKinds.OVERFLOW +
"] with context [" + event.context() + "]");
for (WatchKey watchKey : this.pathKeys.values()) {
watchKey.cancel();
}
this.pathKeys.clear();
if (event.context() != null && event.context() instanceof Path path) {
files.addAll(walkDirectory(path, event.kind()));
}
else {
files.addAll(walkDirectory(FileReadingMessageSource.this.directory.toPath(), event.kind()));
}
}
private Set walkDirectory(Path directory, final WatchEvent.Kind> kind) {
final Set walkedFiles = new LinkedHashSet<>();
try {
registerWatch(directory);
Files.walkFileTree(directory, Collections.emptySet(), FileReadingMessageSource.this.watchMaxDepth,
new SimpleFileVisitor<>() {
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
throws IOException {
if (FileReadingMessageSource.this.watchDirPredicate.test(dir)) {
registerWatch(dir);
return FileVisitResult.CONTINUE;
}
else {
return FileVisitResult.SKIP_SUBTREE;
}
}
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
FileVisitResult fileVisitResult = super.visitFile(file, attrs);
if (!StandardWatchEventKinds.ENTRY_MODIFY.equals(kind)) {
walkedFiles.add(file.toFile());
}
return fileVisitResult;
}
});
}
catch (IOException ex) {
logger.error(ex, () -> "Failed to walk directory: " + directory.toString());
}
return walkedFiles;
}
private void registerWatch(Path dir) throws IOException {
if (!this.pathKeys.containsKey(dir)) {
logger.debug(() -> "registering: " + dir + " for file events");
WatchKey watchKey = dir.register(this.watcher, this.kinds);
this.pathKeys.putIfAbsent(dir, watchKey);
}
}
}
}