de.saly.elasticsearch.river.imap.IMAPRiver Maven / Gradle / Ivy
/***********************************************************************************************************************
*
* Elasticsearch IMAP River - open source IMAP river for Elasticsearch
* ==========================================
*
* Copyright (C) 2014 by Hendrik Saly (http://saly.de) and others.
*
* Contains (partially) copied code from Jörg Prante's Elasticsearch JDBC river (https://github.com/jprante/elasticsearch-river-jdbc)
*
***********************************************************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
***********************************************************************************************************************
*
* $Id:$
*
**********************************************************************************************************************/
package de.saly.elasticsearch.river.imap;
import static org.quartz.CronScheduleBuilder.cronSchedule;
import static org.quartz.JobBuilder.newJob;
import static org.quartz.SimpleScheduleBuilder.simpleSchedule;
import static org.quartz.TriggerBuilder.newTrigger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.mail.MessagingException;
import org.apache.commons.lang.StringUtils;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;
import org.quartz.JobDataMap;
import org.quartz.JobDetail;
import org.quartz.Scheduler;
import org.quartz.SchedulerException;
import org.quartz.Trigger;
import org.quartz.impl.StdSchedulerFactory;
import de.saly.elasticsearch.ldap.ILoginSource;
import de.saly.elasticsearch.ldap.LdapLoginSource;
import de.saly.elasticsearch.maildestination.ElasticsearchBulkMailDestination;
import de.saly.elasticsearch.maildestination.MailDestination;
import de.saly.elasticsearch.mailsource.MailSource;
import de.saly.elasticsearch.mailsource.ParallelPollingIMAPMailSource;
import de.saly.elasticsearch.mailsource.ParallelPollingPOPMailSource;
import de.saly.elasticsearch.riverstate.ElasticsearchRiverStateManager;
import de.saly.elasticsearch.riverstate.RiverStateManager;
import de.saly.elasticsearch.support.MailFlowJob;
public class IMAPRiver extends AbstractRiverComponent implements River {
public final static String NAME = "river-imap";
public final static String TYPE = "imap";
private volatile boolean closed;
private final String folderPattern;
private final String indexName;
private final String indexNameStrategy;
private final TimeValue interval;
private static final ESLogger logger = ESLoggerFactory.getLogger(IMAPRiver.class.getName());
private final List mailSources = new ArrayList();
private final List passwords = new ArrayList();
private final Properties props = new Properties();
private final Client client;
private Scheduler sched;
private final String schedule;
private final String typeName;
private final List indices = new ArrayList();
private final List users = new ArrayList();
private final List headersToFields;
@Inject
public IMAPRiver(final RiverName riverName, final RiverSettings riverSettings, final Client client) {
super(riverName, riverSettings);
this.client = client;
final Map imapSettings = settings.settings();
getUserLogins(imapSettings);
folderPattern = XContentMapValues.nodeStringValue(imapSettings.get("folderpattern"), null);
indexName = XContentMapValues.nodeStringValue(imapSettings.get("mail_index_name"), "imapriverdata");
indexNameStrategy = XContentMapValues.nodeStringValue(imapSettings.get("mail_index_name_strategy"), "all_in_one");
typeName = XContentMapValues.nodeStringValue(imapSettings.get("mail_type_name"), "mail");
schedule = imapSettings.containsKey("schedule") ? XContentMapValues.nodeStringValue(imapSettings.get("schedule"), null) : null;
interval = XContentMapValues.nodeTimeValue(imapSettings.get("interval"), TimeValue.timeValueMinutes(1));
headersToFields = arrayNodeToList(imapSettings.get("headers_to_fields"));
final int bulkSize = XContentMapValues.nodeIntegerValue(imapSettings.get("bulk_size"), 100);
final int maxBulkRequests = XContentMapValues.nodeIntegerValue(imapSettings.get("max_bulk_requests"), 30);
// flush interval for bulk indexer
final TimeValue flushInterval = XContentMapValues.nodeTimeValue(imapSettings.get("bulk_flush_interval"),
TimeValue.timeValueSeconds(5));
final int threads = XContentMapValues.nodeIntegerValue(imapSettings.get("threads"), 5);
final boolean withTextContent = XContentMapValues.nodeBooleanValue(imapSettings.get("with_text_content"), true);
final boolean withHtmlContent = XContentMapValues.nodeBooleanValue(imapSettings.get("with_html_content"), false);
final boolean preferHtmlContent = XContentMapValues.nodeBooleanValue(imapSettings.get("prefer_html_content"), false);
final boolean withFlagSync = XContentMapValues.nodeBooleanValue(imapSettings.get("with_flag_sync"), true);
final boolean withAttachments = XContentMapValues.nodeBooleanValue(imapSettings.get("with_attachments"), false);
final boolean stripTagsFromTextContent = XContentMapValues.nodeBooleanValue(imapSettings.get("with_striptags_from_textcontent"),
true);
final boolean keepExpungedMessages = XContentMapValues.nodeBooleanValue(imapSettings.get("keep_expunged_messages"), false);
// get two maps from the river settings to improve index creation
final Map indexSettings = imapSettings.get("index_settings") != null ? XContentMapValues.nodeMapValue(
imapSettings.get("index_settings"), null) : null;
final Map typeMapping = imapSettings.get("type_mapping") != null ? XContentMapValues.nodeMapValue(
imapSettings.get("type_mapping"), null) : null;
for (final Map.Entry entry : imapSettings.entrySet()) {
if (entry != null && entry.getKey().startsWith("mail.")) {
props.setProperty(entry.getKey(), String.valueOf(entry.getValue()));
}
}
if (StringUtils.isEmpty(props.getProperty("mail.store.protocol"))) {
logger.warn("mail.store.protocol not set, assume its 'imaps'");
props.setProperty("mail.store.protocol", "imaps");
}
logger.debug("river settings " + imapSettings);
logger.debug("mail settings " + props);
for(int i=0; i getIndexNames() {
return Collections.unmodifiableList(indices);
}
public String getIndexNameStrategy() {
return indexNameStrategy;
}
public String getTypeName() {
return typeName;
}
public void once() throws MessagingException, IOException {
for(int i=0;i arrayNodeToList(Object arrayNode) {
ArrayList list = new ArrayList<>();
if(XContentMapValues.isArray(arrayNode)) {
for(Object node : (List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy