All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.exasol.adapter.document.documentfetcher.files.FilesDocumentFetcher Maven / Gradle / Ivy

There is a newer version: 8.1.3
Show newest version
package com.exasol.adapter.document.documentfetcher.files;

import java.util.logging.Logger;

import com.exasol.adapter.document.connection.ConnectionPropertiesReader;
import com.exasol.adapter.document.documentfetcher.DocumentFetcher;
import com.exasol.adapter.document.documentfetcher.FetchedDocument;
import com.exasol.adapter.document.documentfetcher.files.csv.CsvDocumentFetcher;
import com.exasol.adapter.document.documentfetcher.files.segmentation.*;
import com.exasol.adapter.document.files.FileTypeSpecificDocumentFetcher;
import com.exasol.adapter.document.files.stringfilter.StringFilter;
import com.exasol.adapter.document.iterators.*;

/**
 * This is a basis for {@link DocumentFetcher}s that fetches data/documents from files.
 */
public class FilesDocumentFetcher implements DocumentFetcher {
    private static final long serialVersionUID = 3556762980241219690L;
    private static final Logger LOGGER = Logger.getLogger(FilesDocumentFetcher.class.getName());
    /** @serial */
    private final StringFilter filePattern;
    /** @serial */
    private final SegmentDescription segmentDescription;
    /** @serial */
    private final FileFinderFactory fileFinderFactory;
    /** @serial */
    private final FileTypeSpecificDocumentFetcher fileTypeSpecificDocumentFetcher;
    /** @serial */
    private final String additionalConfiguration;

    /**
     * Create a new instance of {@link FilesDocumentFetcher}.
     *
     * @param filePattern                     files to load
     * @param segmentDescription              segmentation for parallel execution
     * @param fileFinderFactory               dependency in injection of {@link FileFinderFactory}
     * @param fileTypeSpecificDocumentFetcher file type specific document fetcher part
     * @param additionalConfiguration         additional configuration
     */
    public FilesDocumentFetcher(final StringFilter filePattern, final SegmentDescription segmentDescription,
            final FileFinderFactory fileFinderFactory,
            final FileTypeSpecificDocumentFetcher fileTypeSpecificDocumentFetcher,
            final String additionalConfiguration) {
        this.filePattern = filePattern;
        this.segmentDescription = segmentDescription;
        this.fileFinderFactory = fileFinderFactory;
        this.fileTypeSpecificDocumentFetcher = fileTypeSpecificDocumentFetcher;
        this.additionalConfiguration = additionalConfiguration;
    }

    /**
     * Get the file pattern.
     *
     * @return file pattern
     */
    public StringFilter getFilePattern() {
        return this.filePattern;
    }

    /**
     * Get the segment description.
     *
     * @return segment description
     */
    public SegmentDescription getSegmentDescription() {
        return this.segmentDescription;
    }

    @Override
    public final CloseableIterator run(final ConnectionPropertiesReader connectionInformation) {
        final RemoteFileFinder remoteFileFinder = this.fileFinderFactory.getFinder(this.filePattern,
                connectionInformation);
        final CloseableIterator files = remoteFileFinder.loadFiles();
        final SegmentMatcher segmentMatcher = SegmentMatcherFactory.buildSegmentMatcher(this.segmentDescription);
        final CloseableIterator filteredFiles = new FilteringIterator<>(files, segmentMatcher::matchesFile);
        final CloseableIterator prefetchedFiles = new RemoteFilePrefetchingIterator(filteredFiles);
        final CloseableIterator segments = new FlatMapIterator<>(prefetchedFiles,
                file -> new CloseableIteratorWrapper<>(segmentMatcher.getMatchingSegmentsFor(file).iterator()));
        return new FlatMapIterator<>(segments, this::readLoadedFile);
    }

    private CloseableIterator readLoadedFile(final FileSegment fileSegment) {
        final RemoteFile remoteFile = fileSegment.getFile();
        LOGGER.finest(() -> "Reading segment " + fileSegment.getSegmentDescription() + " from file "
                + remoteFile.getResourceName() + " with size " + remoteFile.getSize() + " bytes");
        // add the property if csv
        if (this.fileTypeSpecificDocumentFetcher instanceof CsvDocumentFetcher) {
            final CsvDocumentFetcher csvDocumentFetcher = (CsvDocumentFetcher) this.fileTypeSpecificDocumentFetcher;
            csvDocumentFetcher.setAdditionalConfiguration(this.additionalConfiguration);
        }
        return new TransformingIterator<>(this.fileTypeSpecificDocumentFetcher.readDocuments(fileSegment),
                document -> new FetchedDocument(document, remoteFile.getResourceName()));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy