de.saly.elasticsearch.mailsource.ParallelPollingIMAPMailSource Maven / Gradle / Ivy
/***********************************************************************************************************************
*
* Elasticsearch IMAP River - open source IMAP river for Elasticsearch
* ==========================================
*
* Copyright (C) 2014 by Hendrik Saly (http://saly.de) and others.
*
* Contains (partially) copied code from Jörg Prante's Elasticsearch JDBC river (https://github.com/jprante/elasticsearch-river-jdbc)
*
***********************************************************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
***********************************************************************************************************************
*
* $Id:$
*
**********************************************************************************************************************/
package de.saly.elasticsearch.mailsource;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.regex.Pattern;
import javax.mail.Folder;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.Session;
import javax.mail.Store;
import javax.mail.UIDFolder;
import javax.mail.internet.MimeMessage;
import org.apache.commons.lang.StringUtils;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.indices.IndexMissingException;
import de.saly.elasticsearch.maildestination.MailDestination;
import de.saly.elasticsearch.riverstate.RiverState;
import de.saly.elasticsearch.riverstate.RiverStateManager;
import de.saly.elasticsearch.support.IMAPUtils;
public class ParallelPollingIMAPMailSource implements MailSource {
private final ExecutorService es;
private MailDestination mailDestination;
private final String password;
private final Properties props;
private RiverStateManager stateManager;
private final int threadCount;
private final String user;
private boolean withFlagSync = true;
private boolean deleteExpungedMessages = true;
protected final ESLogger logger = ESLoggerFactory.getLogger(this.getClass().getName());
public ParallelPollingIMAPMailSource(final Properties props, final int threadCount, final String user, final String password) {
super();
this.props = props;
this.threadCount = threadCount < 1 ? 1 : threadCount;
this.es = Executors.newFixedThreadPool(threadCount);
this.user = user;
this.password = password;
}
@Override
public void close() {
if (es != null) {
logger.info("Initiate shutdown");
es.shutdown();
/*try {
if (es.awaitTermination(2, TimeUnit.SECONDS)) {
logger.info("Shutdown completed gracefully");
} else {
logger.warn("Shutdown completed not gracefully, timeout elapsed");
}
} catch (final InterruptedException e) {
logger.warn("Shutdown completed not gracefully, thread interrupted");
}*/
}
}
@Override
public void fetch(final Pattern pattern) throws MessagingException, IOException {
fetch(pattern, null);
}
@Override
public void fetch(final String folderName) throws MessagingException, IOException {
fetch(null, folderName);
}
@Override
public void fetchAll() throws MessagingException, IOException {
fetch(null, null);
}
public MailDestination getMailDestination() {
return mailDestination;
}
public Properties getProps() {
return props;
}
public RiverStateManager getStateManager() {
return stateManager;
}
public int getThreadCount() {
return threadCount;
}
@Override
public void setMailDestination(final MailDestination mailDestination) {
this.mailDestination = mailDestination;
}
@Override
public void setStateManager(final RiverStateManager stateManager) {
this.stateManager = stateManager;
}
public ParallelPollingIMAPMailSource setWithFlagSync(final boolean withFlagSync) {
this.withFlagSync = withFlagSync;
return this;
}
public void setDeleteExpungedMessages(final boolean deleteExpungedMessages) {
this.deleteExpungedMessages = deleteExpungedMessages;
}
private ProcessResult process(final int messageCount, final int start, final String folderName) {
final long startTime = System.currentTimeMillis();
//final int netCount = messageCount - (start == 1 ? 0 : start);
final int netCount = messageCount - (start == 1 ? 0 : start) + 1; //fixed #15
logger.debug("netCount: {}", netCount);
if (netCount == 0) {
return new ProcessResult(0, 0, 0);
}
final int block = netCount / threadCount;
final List> fl = new ArrayList>();
logger.debug(netCount + "/" + threadCount + "=" + block);
int lastStart = 0;
for (int i = 1; i <= threadCount; i++) {
final int _start = i == 1 ? start : lastStart + block + 1;
lastStart = _start;
final int _end = Math.min(_start + block, messageCount);
if (_end < _start) {
continue;
}
logger.debug("Schedule: " + _start + " - " + _end);
final Future f = es.submit(new Callable() {
@Override
public ProcessResult call() throws Exception {
return processMessageSlice(_start, _end, folderName);
}
});
fl.add(f);
}
long highestUid = 0;
int processedCount = 0;
for (final Future fu : fl) {
try {
highestUid = Math.max(highestUid, fu.get().highestUid);
processedCount += fu.get().processedCount;
logger.debug("Finished with " + fu.get());
} catch (final Exception e) {
logger.error("Unable to process some mails due to {}", e, e.toString());
}
}
final long endTime = System.currentTimeMillis() + 1;
return new ProcessResult(highestUid, processedCount, endTime - startTime);
}
private ProcessResult processMessageSlice(final int start, final int end, final String folderName) throws Exception {
logger.debug("processMessageSlice() started with " + start + "/" + end + "/" + folderName);
final long startTime = System.currentTimeMillis();
final Store store = Session.getInstance(props).getStore();
store.connect(user, password);
final Folder folder = store.getFolder(folderName);
final UIDFolder uidfolder = (UIDFolder) folder;
IMAPUtils.open(folder);
try {
final Message[] msgs = folder.getMessages(start, end);
folder.fetch(msgs, IMAPUtils.FETCH_PROFILE_HEAD);
logger.debug("folder fetch done");
long highestUid = 0;
int processedCount = 0;
for (final Message m : msgs) {
try {
IMAPUtils.open(folder);
final long uid = uidfolder.getUID(m);
mailDestination.onMessage(m);
highestUid = Math.max(highestUid, uid);
processedCount++;
if (Thread.currentThread().isInterrupted()) {
break;
}
} catch (final Exception e) {
stateManager.onError("Unable to make indexable message", m, e);
logger.error("Unable to make indexable message due to {}", e, e.toString());
IMAPUtils.open(folder);
}
}
final long endTime = System.currentTimeMillis() + 1;
final ProcessResult pr = new ProcessResult(highestUid, processedCount, endTime - startTime);
logger.debug("processMessageSlice() ended with " + pr);
return pr;
} finally {
IMAPUtils.close(folder);
IMAPUtils.close(store);
}
}
@SuppressWarnings({ "rawtypes", "unchecked" })
protected void fetch(final Folder folder) throws MessagingException, IOException {
if ((folder.getType() & Folder.HOLDS_MESSAGES) == 0) {
logger.warn("Folder {} cannot hold messages", folder.getFullName());
return;
}
final int messageCount = folder.getMessageCount();
final UIDFolder uidfolder = (UIDFolder) folder;
final long servervalidity = uidfolder.getUIDValidity();
final RiverState riverState = stateManager.getRiverState(folder);
final Long localvalidity = riverState.getUidValidity();
logger.info("Fetch mails from folder {} ({})", folder.getURLName().toString(), messageCount);
logger.debug("Server uid validity: {}, Local uid validity: {}", servervalidity, localvalidity);
if (localvalidity == null || localvalidity.longValue() != servervalidity) {
logger.debug("UIDValidity fail, full resync " + localvalidity + "!=" + servervalidity);
if (localvalidity != null) {
mailDestination.clearDataForFolder(folder.getFullName());
}
final ProcessResult result = process(messageCount, 1, folder.getFullName());
riverState.setLastCount(result.getProcessedCount());
if (result.getProcessedCount() > 0) {
riverState.setLastIndexed(new Date());
}
if (result.getProcessedCount() > 0) {
riverState.setLastTook(result.getTook());
}
riverState.setLastSchedule(new Date());
if (result.getProcessedCount() > 0 && result.getHighestUid() > 0) {
riverState.setLastUid(result.getHighestUid());
}
riverState.setUidValidity(servervalidity);
stateManager.setRiverState(riverState);
logger.info("Initiailly processed {} mails for folder {}", result.getProcessedCount(), folder.getFullName());
logger.debug("Processed result {}", result.toString());
} else {
if (messageCount == 0) {
logger.debug("Folder {} is empty", folder.getFullName());
} else {
if (withFlagSync) {
// detect flag change
final Message[] flagMessages = folder.getMessages();
folder.fetch(flagMessages, IMAPUtils.FETCH_PROFILE_FLAGS_UID);
for (final Message message : flagMessages) {
try {
final long uid = ((UIDFolder) message.getFolder()).getUID(message);
final String id = uid + "::" + message.getFolder().getURLName();
final int storedHashcode = mailDestination.getFlaghashcode(id);
if (storedHashcode == -1) {
// New mail which is not indexed yet
continue;
}
final int flagHashcode = message.getFlags().hashCode();
if (flagHashcode != storedHashcode) {
// flags change for this message, must update
mailDestination.onMessage(message);
if (logger.isDebugEnabled()) {
logger.debug("Update " + id + " because of flag change");
}
}
} catch (final Exception e) {
logger.error("Error detecting flagchanges for message " + ((MimeMessage) message).getMessageID(), e);
stateManager.onError("Error detecting flagchanges", message, e);
}
}
}
final long highestUID = riverState.getLastUid(); // this uid is
// already
// processed
logger.debug("highestUID: {}", highestUID);
final Message[] msgsnew = uidfolder.getMessagesByUID(highestUID, UIDFolder.LASTUID);
// msgnew.size is always >= 1
if (highestUID > 0 && uidfolder.getUID(msgsnew[0]) <= highestUID) {
// msgsnew = (Message[]) ArrayUtils.remove(msgsnew, 0);
}
if (msgsnew.length > 0) {
logger.info("{} new messages in folder {}", msgsnew.length, folder.getFullName());
final int start = msgsnew[0].getMessageNumber();
final ProcessResult result = process(messageCount, start, folder.getFullName());
riverState.setLastCount(result.getProcessedCount());
if (result.getProcessedCount() > 0) {
riverState.setLastIndexed(new Date());
}
if (result.getProcessedCount() > 0) {
riverState.setLastTook(result.getTook());
}
riverState.setLastSchedule(new Date());
if (result.getProcessedCount() > 0 && result.getHighestUid() > 0) {
riverState.setLastUid(result.getHighestUid());
}
riverState.setUidValidity(servervalidity);
stateManager.setRiverState(riverState);
logger.info("Not initiailly processed {} mails for folder {}", result.getProcessedCount(), folder.getFullName());
logger.debug("Processed result {}", result.toString());
} else {
logger.debug("no new messages");
}
}
// check for expunged/deleted messages
final Set serverMailSet = new HashSet();
final long oldmailUid = riverState.getLastUid();
logger.debug("oldmailuid {}", oldmailUid);
final Message[] msgsold = uidfolder.getMessagesByUID(1, oldmailUid);
folder.fetch(msgsold, IMAPUtils.FETCH_PROFILE_UID);
for (final Message m : msgsold) {
try {
final long uid = uidfolder.getUID(m);
serverMailSet.add(uid);
} catch (final Exception e) {
stateManager.onError("Unable to handle old message ", m, e);
logger.error("Unable to handle old message due to {}", e, e.toString());
IMAPUtils.open(folder);
}
}
if(deleteExpungedMessages) {
final Set localMailSet = new HashSet(mailDestination.getCurrentlyStoredMessageUids(folder.getFullName(), false));
logger.debug("Check now " + localMailSet.size() + " server mails for expunge");
localMailSet.removeAll(serverMailSet);
// localMailSet has now the ones that are not on server
logger.info(localMailSet.size() + " messages were locally deleted, because they are expunged on server.");
mailDestination.onMessageDeletes(localMailSet, folder.getFullName(), false);
}
}
}
protected void fetch(final Pattern pattern, final String folderName) throws MessagingException, IOException {
logger.debug("fetch() - pattern: {}, folderName: {}", pattern, folderName);
final Store store = Session.getInstance(props).getStore();
store.connect(user, password);
try {
for (final String fol : mailDestination.getFolderNames()) {
if (store.getFolder(fol).exists()) {
logger.debug("{} exists", fol);
} else {
logger.info("Folder {} does not exist on the server, will remove it (and its content) also locally", fol);
final RiverState riverState = stateManager.getRiverState(store.getFolder(fol));
riverState.setExists(false);
stateManager.setRiverState(riverState);
try {
mailDestination.clearDataForFolder(fol);
} catch (final Exception e) {
stateManager.onError("Unable to clean data for stale folder", store.getFolder(fol), e);
}
}
}
} catch (final IndexMissingException ime) {
logger.debug(ime.toString());
} catch (final Exception e) {
logger.error("Error checking for stale folders", e);
}
final boolean isRoot = StringUtils.isEmpty(folderName);
final Folder folder = isRoot ? store.getDefaultFolder() : store.getFolder(folderName);
try {
if (!folder.exists()) {
logger.error("Folder {} does not exist on the server", folder.getFullName());
return;
}
logger.debug("folderName: {} is root: {}", folderName, isRoot);
/*if (logger.isDebugEnabled() && store instanceof IMAPStore) {
IMAPStore imapStore = (IMAPStore) store;
logger.debug("IMAP server capability info");
logger.debug("IMAP4rev1: "
+ imapStore.hasCapability("IMAP4rev1"));
logger.debug("IDLE: " + imapStore.hasCapability("IDLE"));
logger.debug("ID: " + imapStore.hasCapability("ID"));
logger.debug("UIDPLUS: " + imapStore.hasCapability("UIDPLUS"));
logger.debug("CHILDREN: " + imapStore.hasCapability("CHILDREN"));
logger.debug("NAMESPACE: "
+ imapStore.hasCapability("NAMESPACE"));
logger.debug("NOTIFY: " + imapStore.hasCapability("NOTIFY"));
logger.debug("CONDSTORE: "
+ imapStore.hasCapability("CONDSTORE"));
logger.debug("QRESYNC: " + imapStore.hasCapability("QRESYNC"));
logger.debug("STATUS: " + imapStore.hasCapability("STATUS"));
}*/
if (pattern != null && !isRoot && !pattern.matcher(folder.getFullName()).matches()) {
logger.info(folder.getFullName() + " does not match pattern " + pattern.toString());
return;
}
IMAPUtils.open(folder);
recurseFolders(folder, pattern);
} finally {
IMAPUtils.close(folder);
IMAPUtils.close(store);
}
}
protected void recurseFolders(final Folder folder, final Pattern pattern) throws MessagingException, IOException {
if (folder != null) {
if (es == null || es.isShutdown() || es.isTerminated() || Thread.currentThread().isInterrupted()) {
logger.warn("Stop processing of mails due to mail source is closed");
return;
}
if ((folder.getType() & Folder.HOLDS_MESSAGES) != 0) {
if (pattern != null && !pattern.matcher(folder.getFullName()).matches()) {
logger.info("Pattern {} does not match {}", pattern.pattern(), folder.getFullName());
return;
}
IMAPUtils.open(folder);
try {
fetch(folder);
} finally {
IMAPUtils.close(folder);
logger.debug("fetch {} done", folder.getFullName());
}
}
if ((folder.getType() & Folder.HOLDS_FOLDERS) != 0) {
for (final Folder subfolder : folder.list()) {
recurseFolders(subfolder, pattern);
}
}
}
}
private static class ProcessResult {
private long highestUid = 1;
private final int processedCount;
private final long took;
public ProcessResult(final long highestUid, final int processedCount, final long took) {
super();
this.highestUid = highestUid < 1 ? 1L : highestUid;
this.processedCount = processedCount;
this.took = took;
}
public long getHighestUid() {
return highestUid;
}
public int getProcessedCount() {
return processedCount;
}
public long getTook() {
return took;
}
@Override
public String toString() {
return "ProcessResult [highestUid=" + highestUid + ", processedCount=" + processedCount + ", took=" + took + "]";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy