All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.camel.component.hdfs2.HdfsConsumer Maven / Gradle / Ivy

There is a newer version: 3.0.0-M4
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.camel.component.hdfs2;

import java.io.IOException;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import javax.security.auth.login.Configuration;

import org.apache.camel.Exchange;
import org.apache.camel.Message;
import org.apache.camel.Processor;
import org.apache.camel.impl.DefaultMessage;
import org.apache.camel.impl.ScheduledPollConsumer;
import org.apache.camel.util.IOHelper;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;

public final class HdfsConsumer extends ScheduledPollConsumer {

    private final HdfsConfiguration config;
    private final StringBuilder hdfsPath;
    private final Processor processor;
    private final ReadWriteLock rwlock = new ReentrantReadWriteLock();
    private volatile HdfsInputStream istream;

    public HdfsConsumer(HdfsEndpoint endpoint, Processor processor, HdfsConfiguration config) {
        super(endpoint, processor);
        this.config = config;
        this.hdfsPath = config.getFileSystemType().getHdfsPath(config);
        this.processor = processor;

        setInitialDelay(config.getInitialDelay());
        setDelay(config.getDelay());
        setUseFixedDelay(true);
    }

    @Override
    public HdfsEndpoint getEndpoint() {
        return (HdfsEndpoint) super.getEndpoint();
    }

    @Override
    protected void doStart() throws Exception {
        super.doStart();

        if (config.isConnectOnStartup()) {
            // setup hdfs if configured to do on startup
            setupHdfs(true);
        }
    }

    private HdfsInfo setupHdfs(boolean onStartup) throws Exception {
        // if we are starting up then log at info level, and if runtime then log at debug level to not flood the log
        if (onStartup) {
            log.info("Connecting to hdfs file-system {}:{}/{} (may take a while if connection is not available)", new Object[]{config.getHostName(), config.getPort(), hdfsPath.toString()});
        } else {
            if (log.isDebugEnabled()) {
                log.debug("Connecting to hdfs file-system {}:{}/{} (may take a while if connection is not available)", new Object[]{config.getHostName(), config.getPort(), hdfsPath.toString()});
            }
        }

        // hadoop will cache the connection by default so its faster to get in the poll method
        HdfsInfo answer = HdfsInfoFactory.newHdfsInfo(this.hdfsPath.toString());

        if (onStartup) {
            log.info("Connected to hdfs file-system {}:{}/{}", new Object[]{config.getHostName(), config.getPort(), hdfsPath.toString()});
        } else {
            if (log.isDebugEnabled()) {
                log.debug("Connected to hdfs file-system {}:{}/{}", new Object[]{config.getHostName(), config.getPort(), hdfsPath.toString()});
            }
        }
        return answer;
    }

    @Override
    protected int poll() throws Exception {
        // need to remember auth as Hadoop will override that, which otherwise means the Auth is broken afterwards
        Configuration auth = HdfsComponent.getJAASConfiguration();
        try {
            return doPoll();
        } finally {
            HdfsComponent.setJAASConfiguration(auth);
        }
    }

    protected int doPoll() throws Exception {
        class ExcludePathFilter implements PathFilter {
            public boolean accept(Path path) {
                return !(path.toString().endsWith(config.getOpenedSuffix()) || path.toString().endsWith(config.getReadSuffix()));
            }
        }

        int numMessages = 0;

        HdfsInfo info = setupHdfs(false);
        FileStatus fileStatuses[];
        if (info.getFileSystem().isFile(info.getPath())) {
            fileStatuses = info.getFileSystem().globStatus(info.getPath());
        } else {
            Path pattern = info.getPath().suffix("/" + this.config.getPattern());
            fileStatuses = info.getFileSystem().globStatus(pattern, new ExcludePathFilter());
        }

        for (FileStatus status : fileStatuses) {

            if (normalFileIsDirectoryNoSuccessFile(status, info)) {
                continue;
            }

            if (config.getOwner() != null) {
                // must match owner
                if (!config.getOwner().equals(status.getOwner())) {
                    if (log.isDebugEnabled()) {
                        log.debug("Skipping file: {} as not matching owner: {}", status.getPath().toString(), config.getOwner());
                    }
                    continue;
                }
            }

            try {
                this.rwlock.writeLock().lock();
                this.istream = HdfsInputStream.createInputStream(status.getPath().toString(), this.config);
                if (!this.istream.isOpened()) {
                    if (log.isDebugEnabled()) {
                        log.debug("Skipping file: {} because it doesn't exist anymore", status.getPath().toString());
                    }
                    continue;
                }
            } finally {
                this.rwlock.writeLock().unlock();
            }

            try {
                Holder key = new Holder();
                Holder value = new Holder();
                while (this.istream.next(key, value) >= 0) {
                    Exchange exchange = this.getEndpoint().createExchange();
                    Message message = new DefaultMessage();
                    String fileName = StringUtils.substringAfterLast(status.getPath().toString(), "/");
                    message.setHeader(Exchange.FILE_NAME, fileName);
                    if (key.value != null) {
                        message.setHeader(HdfsHeader.KEY.name(), key.value);
                    }
                    message.setBody(value.value);
                    exchange.setIn(message);

                    log.debug("Processing file {}", fileName);
                    try {
                        processor.process(exchange);
                    } catch (Exception e) {
                        exchange.setException(e);
                    }

                    // in case of unhandled exceptions then let the exception handler handle them
                    if (exchange.getException() != null) {
                        getExceptionHandler().handleException(exchange.getException());
                    }

                    numMessages++;
                }
            } finally {
                IOHelper.close(istream, "input stream", log);
            }
        }

        return numMessages;
    }

    private boolean normalFileIsDirectoryNoSuccessFile(FileStatus status, HdfsInfo info) throws IOException {
        if (config.getFileType().equals(HdfsFileType.NORMAL_FILE) && status.isDirectory()) {
            Path successPath = new Path(status.getPath().toString() + "/_SUCCESS");
            if (!info.getFileSystem().exists(successPath)) {
                return true;
            }
        }
        return false;
    }

}