All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.saly.elasticsearch.river.imap.IMAPRiver Maven / Gradle / Ivy

/***********************************************************************************************************************
 *
 * Elasticsearch IMAP River - open source IMAP river for Elasticsearch
 * ==========================================
 *
 * Copyright (C) 2014 by Hendrik Saly (http://saly.de) and others.
 * 
 * Contains (partially) copied code from Jörg Prante's Elasticsearch JDBC river (https://github.com/jprante/elasticsearch-river-jdbc)
 *
 ***********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 ***********************************************************************************************************************
 *
 * $Id:$
 *
 **********************************************************************************************************************/
package de.saly.elasticsearch.river.imap;

import static org.quartz.CronScheduleBuilder.cronSchedule;
import static org.quartz.JobBuilder.newJob;
import static org.quartz.SimpleScheduleBuilder.simpleSchedule;
import static org.quartz.TriggerBuilder.newTrigger;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import javax.mail.MessagingException;

import org.apache.commons.lang.StringUtils;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;
import org.quartz.JobDataMap;
import org.quartz.JobDetail;
import org.quartz.Scheduler;
import org.quartz.SchedulerException;
import org.quartz.Trigger;
import org.quartz.impl.StdSchedulerFactory;

import de.saly.elasticsearch.ldap.ILoginSource;
import de.saly.elasticsearch.ldap.LdapLoginSource;
import de.saly.elasticsearch.maildestination.ElasticsearchBulkMailDestination;
import de.saly.elasticsearch.maildestination.MailDestination;
import de.saly.elasticsearch.mailsource.MailSource;
import de.saly.elasticsearch.mailsource.ParallelPollingIMAPMailSource;
import de.saly.elasticsearch.mailsource.ParallelPollingPOPMailSource;
import de.saly.elasticsearch.riverstate.ElasticsearchRiverStateManager;
import de.saly.elasticsearch.riverstate.RiverStateManager;
import de.saly.elasticsearch.support.MailFlowJob;

public class IMAPRiver extends AbstractRiverComponent implements River {

    public final static String NAME = "river-imap";

    public final static String TYPE = "imap";

    private volatile boolean closed;

    private final String folderPattern;

    private final String indexName;
    
    private final String indexNameStrategy;

    private final TimeValue interval;

    private static final ESLogger logger = ESLoggerFactory.getLogger(IMAPRiver.class.getName());

    private final List mailSources = new ArrayList();

    private final List passwords = new ArrayList();

    private final Properties props = new Properties();

    private final Client client;

    private Scheduler sched;

    private final String schedule;

    private final String typeName;

    private final List indices = new ArrayList();
    
    private final List users = new ArrayList();

    private final List headersToFields;

    @Inject
    public IMAPRiver(final RiverName riverName, final RiverSettings riverSettings, final Client client) {
        super(riverName, riverSettings);
        
        this.client = client;

        final Map imapSettings = settings.settings();

        getUserLogins(imapSettings);

        folderPattern = XContentMapValues.nodeStringValue(imapSettings.get("folderpattern"), null);

        indexName = XContentMapValues.nodeStringValue(imapSettings.get("mail_index_name"), "imapriverdata");
        
        indexNameStrategy = XContentMapValues.nodeStringValue(imapSettings.get("mail_index_name_strategy"), "all_in_one");
        
        typeName = XContentMapValues.nodeStringValue(imapSettings.get("mail_type_name"), "mail");

        schedule = imapSettings.containsKey("schedule") ? XContentMapValues.nodeStringValue(imapSettings.get("schedule"), null) : null;

        interval = XContentMapValues.nodeTimeValue(imapSettings.get("interval"), TimeValue.timeValueMinutes(1));

        headersToFields = arrayNodeToList(imapSettings.get("headers_to_fields"));

        final int bulkSize = XContentMapValues.nodeIntegerValue(imapSettings.get("bulk_size"), 100);
        final int maxBulkRequests = XContentMapValues.nodeIntegerValue(imapSettings.get("max_bulk_requests"), 30);
        // flush interval for bulk indexer
        final TimeValue flushInterval = XContentMapValues.nodeTimeValue(imapSettings.get("bulk_flush_interval"),
                TimeValue.timeValueSeconds(5));

        final int threads = XContentMapValues.nodeIntegerValue(imapSettings.get("threads"), 5);

        final boolean withTextContent = XContentMapValues.nodeBooleanValue(imapSettings.get("with_text_content"), true);

        final boolean withHtmlContent = XContentMapValues.nodeBooleanValue(imapSettings.get("with_html_content"), false);

        final boolean preferHtmlContent = XContentMapValues.nodeBooleanValue(imapSettings.get("prefer_html_content"), false);

        final boolean withFlagSync = XContentMapValues.nodeBooleanValue(imapSettings.get("with_flag_sync"), true);

        final boolean withAttachments = XContentMapValues.nodeBooleanValue(imapSettings.get("with_attachments"), false);

        final boolean stripTagsFromTextContent = XContentMapValues.nodeBooleanValue(imapSettings.get("with_striptags_from_textcontent"),
                true);
        
        final boolean keepExpungedMessages = XContentMapValues.nodeBooleanValue(imapSettings.get("keep_expunged_messages"), false);

        // get two maps from the river settings to improve index creation
        final Map indexSettings = imapSettings.get("index_settings") != null ? XContentMapValues.nodeMapValue(
                imapSettings.get("index_settings"), null) : null;

        final Map typeMapping = imapSettings.get("type_mapping") != null ? XContentMapValues.nodeMapValue(
                imapSettings.get("type_mapping"), null) : null;

        for (final Map.Entry entry : imapSettings.entrySet()) {

            if (entry != null && entry.getKey().startsWith("mail.")) {
                props.setProperty(entry.getKey(), String.valueOf(entry.getValue()));
            }
        }

        if (StringUtils.isEmpty(props.getProperty("mail.store.protocol"))) {
            logger.warn("mail.store.protocol not set, assume its 'imaps'");
            props.setProperty("mail.store.protocol", "imaps");
        }

        logger.debug("river settings " + imapSettings);
        logger.debug("mail settings " + props);

        for(int i=0; i getIndexNames() {
        return Collections.unmodifiableList(indices);
    }

    public String getIndexNameStrategy() {
        return indexNameStrategy;
    }

    public String getTypeName() {
        return typeName;
    }

    public void once() throws MessagingException, IOException {

        for(int i=0;i arrayNodeToList(Object arrayNode) {
        ArrayList list = new ArrayList<>();
        if(XContentMapValues.isArray(arrayNode)) {
            for(Object node : (List) arrayNode) {
                String value = XContentMapValues.nodeStringValue(node, null);
                if(value != null) {
                    list.add(value);
                }
            }
        }
        return list;
    }
    
    private void getUserLogins(final Map imapSettings) {
        String userSource = XContentMapValues.nodeStringValue(imapSettings.get("user_source"), null);
        ILoginSource source = null;

        if ("ldap".equals(userSource)) {
            //master user credentials for Dovecot
            String masterUser = XContentMapValues.nodeStringValue(imapSettings.get("master_user"), null);
            String masterPassword = XContentMapValues.nodeStringValue(imapSettings.get("master_password"), null);
            source = new LdapLoginSource(imapSettings, masterUser, masterPassword);
        } else {
            //read logins directly
            String _user = XContentMapValues.nodeStringValue(imapSettings.get("user"), null);
            String _password = XContentMapValues.nodeStringValue(imapSettings.get("password"), null);

            if (_user != null && !_user.isEmpty()) {
                users.add(_user);
                passwords.add(_password);
            }

            List _users = arrayNodeToList(imapSettings.get("users"));
            List _passwords = arrayNodeToList(imapSettings.get("passwords"));

            //TODO: inject master user credentials?
            if (_users != null && !_users.isEmpty()) {
                users.addAll(_users);
                passwords.addAll(_passwords);
            }
        }

        //read from generic source
        if (source != null) {
            users.addAll(source.getUserNames());
            passwords.addAll(source.getUserPasswords());
        }
    }
    
    public static void waitForYellowCluster(Client client) throws IOException {

        ClusterHealthStatus status = ClusterHealthStatus.YELLOW;
        
        try {
            logger.debug("waiting for cluster state {}", status.name());
            final ClusterHealthResponse healthResponse = client.admin().cluster().prepareHealth().setWaitForStatus(status)
                    .setTimeout(TimeValue.timeValueSeconds(30)).execute().actionGet();
            if (healthResponse.isTimedOut()) {
                logger.error("Timeout while waiting for cluster state: {}, current cluster state is: {}", status.name(), healthResponse.getStatus().name());
                throw new IOException("cluster state is " + healthResponse.getStatus().name() + " and not " + status.name()
                       + ", cowardly refusing to continue with operations");
            } else {
                logger.debug("... cluster state ok");
            }
        } catch (final Exception e) {
            logger.error("Exception while waiting for cluster state: {} due to ", e, status.name(), e.toString());
            throw new IOException("timeout, cluster does not respond to health request, cowardly refusing to continue with operations", e);
        }
    }
}