com.google.sitebricks.mail.imap.MessageStatusExtractor Maven / Gradle / Ivy
package com.google.sitebricks.mail.imap;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.mail.internet.MailDateFormat;
import java.text.ParseException;
import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Extracts a MessageStatus from a partial IMAP fetch. Specifically
* a "fetch all" command which comes back with subject, sender, uid
* internaldate and rfc822.size (length).
*
* A more robust form of fetch exists for message body parts which
* would handle email body, html mail, attachments, etc.
*
* @author [email protected] (Dhanji R. Prasanna)
*/
class MessageStatusExtractor implements Extractor> {
private static final Logger log = LoggerFactory.getLogger(MessageStatusExtractor.class);
// 10 Sep 2011 14:19:55 -0700
static final Pattern ALTERNATE_RECEIVED_DATE_PATTERN = Pattern.compile(
"\\d?\\d \\w\\w\\w [0-9]{4} \\d?\\d:\\d?\\d:\\d?\\d [-+]?[0-9]{4}");
static final DateTimeFormatter INTERNAL_DATE = DateTimeFormat.forPattern(
"dd-MMM-yyyy HH:mm:ss Z");
static final DateTimeFormatter ALTERNATE_INTERNAL_DATE = DateTimeFormat.forPattern(
"MMM-yyyy HH:mm:ss Z");
static final Pattern HELPFUL_NOTIFICATION_PATTERN = Pattern.compile("[*] \\d+ (EXISTS|EXPUNGE)\\s*",
Pattern.CASE_INSENSITIVE);
static final Pattern SIZE_MARKER = Pattern.compile("\\{(\\d+)\\}$", Pattern.MULTILINE);
@Override
public List extract(List messages) {
List statuses = Lists.newArrayList();
for (int i = 0, messagesSize = messages.size(); i < messagesSize; i++) {
String message = messages.get(i);
if (null == message || message.isEmpty())
continue;
// Discard the success token and any EXISTS or EXPUNGE tokens.
try {
if (Command.isEndOfSequence(message) || HELPFUL_NOTIFICATION_PATTERN.matcher(message).matches())
continue;
} catch (ExtractionException ee) {
log.error("Warning: error parsing email message status! {}", messages, ee);
continue;
}
// The only newlines allowed are inside strings, so check whether the message
// might have been split between lines and unfold as appropriate.
boolean isUnterminatedString = isUnterminatedString(message, false);
while (isUnterminatedString && (i + 1 < messagesSize)) {
String next = messages.get(i + 1);
message = message + '\n' + next;
isUnterminatedString = isUnterminatedString(next, isUnterminatedString);
// Skip next.
i++;
}
// Newlines are actually also allowed outside strings if a length marker is specified.
Matcher matcher = SIZE_MARKER.matcher(message);
while (matcher.find()) {
int size = Integer.parseInt(matcher.group(1));
StringBuilder stringToken = new StringBuilder("\n");
String rest = "";
int newlines = 1;
boolean done = false;
while (stringToken.length() <= size + 1 && !done && (i + 1 < messagesSize)) {
String next = messages.get(i + 1).trim();
++i;
if (next.length() + stringToken.length() <= size) {
stringToken.append(next).append('\n');
newlines++;
} else {
int offset = Math.max(0, size - stringToken.length() - newlines * 2);
stringToken.append(next.substring(0, offset));
rest = next.substring(offset);
// We could have over-counted as newlines are not always counted as 2 characters.
// For sanity
int bracketPos = rest.indexOf("((");
if (bracketPos > 0 && rest.charAt(bracketPos - 1) == ' ')
bracketPos--;
int nilPos = rest.indexOf(" NIL");
int delim = Math.min(bracketPos == -1 ? Integer.MAX_VALUE : bracketPos,
nilPos == -1 ? Integer.MAX_VALUE : nilPos);
if (delim == Integer.MAX_VALUE) {
int spacePos = rest.indexOf(" ");
delim = spacePos == -1 ? Integer.MAX_VALUE : spacePos;
}
if (delim > 0 && delim != Integer.MAX_VALUE) {
stringToken.append(rest.substring(0, delim));
rest = rest.substring(delim);
done = true;
}
}
}
// Now take the extracted subject and compose it into the message header as though it
// were quoted.
message = matcher.replaceAll("");
// Escape nested quotes:
String newToken = stringToken.toString().replaceAll("\"", "\\\\\"");
message += '"' + newToken + '"' + rest;
// The new message string might have further size markers.
matcher = SIZE_MARKER.matcher(message);
}
final MessageStatus messageStatus = parseStatus(message.replaceFirst("^[*] ", ""));
if (messageStatus != null)
statuses.add(messageStatus);
}
return statuses;
}
/**
Check for string termination, will check for quote escaping, but only if it's escaped
within a string... otherwise it's illegal and we'll treat it as a regular quote.
A trailing backslash indicates a \CRLF was received (as envisaged in RFC 822 3.4.5).
*/
@VisibleForTesting
static boolean isUnterminatedString(String message, boolean alreadyInString) {
boolean escaped = false;
boolean inString = alreadyInString;
for (int i = 0; i < message.length(); i++) {
final char c = message.charAt(i);
if (inString) {
if (c == '\\') {
escaped = !escaped;
} else if (c == '"') {
if (!escaped)
inString = false;
escaped = false;
} else
escaped = false;
} else
inString = c == '"';
}
return inString;
}
private static MessageStatus parseStatus(String message) {
if (message.startsWith("BAD")) {
log.warn("error, got BAD response ignoring: {}", message);
return null;
}
Queue tokens = Parsing.tokenize(message);
MessageStatus status = new MessageStatus();
try {
// Assert that we have an envelope.
Parsing.match(tokens, int.class);
Parsing.eat(tokens, "FETCH", "(");
while (!tokens.isEmpty()) {
boolean match = parseUid(tokens, status);
match |= parseEnvelope(tokens, status);
match |= parseFlags(tokens, status);
match |= parseInternalDate(tokens, status);
match |= parseRfc822Size(tokens, status);
match |= parseGmailUid(tokens, status);
match |= parseGmailThreadId(tokens, status);
match |= parseGmailLabels(tokens, status);
if (!match) {
break;
}
}
} catch (IllegalArgumentException e) {
log.warn("Error parsing status: {}", message);
throw e;
}
// We don't really need to bother closing the last ')'
return status;
}
private static boolean parseRfc822Size(Queue tokens, MessageStatus status) {
if (Parsing.matchAnyOf(tokens, "RFC822.SIZE") == null)
return false;
status.setSize(Parsing.match(tokens, int.class));
return true;
}
private static boolean parseGmailThreadId(Queue tokens, MessageStatus status) {
if (Parsing.matchAnyOf(tokens, "X-GM-THRID") == null)
return false;
status.setThreadId(Parsing.match(tokens, long.class));
return true;
}
private static boolean parseGmailUid(Queue tokens, MessageStatus status) {
if (Parsing.matchAnyOf(tokens, "X-GM-MSGID") == null)
return false;
status.setGmailMsgId(Parsing.match(tokens, long.class));
return true;
}
private static boolean parseInternalDate(Queue tokens, MessageStatus status) {
if (Parsing.matchAnyOf(tokens, "INTERNALDATE") == null)
return false;
String internalDate = tokens.peek();
if (Parsing.isValid(internalDate)) {
internalDate = Parsing.normalizeDateToken(Parsing.match(tokens, String.class));
DateTime dateTime;
try {
dateTime = INTERNAL_DATE.parseDateTime(internalDate);
} catch (IllegalArgumentException e) {
try {
dateTime = ALTERNATE_INTERNAL_DATE.parseDateTime(internalDate);
} catch (IllegalArgumentException iae) {
log.warn("Unable to extract internal date--message may be incomplete", e);
dateTime = null;
}
}
if (dateTime != null)
status.setInternalDate(dateTime.toDate());
}
return true;
}
public static boolean parseFlags(Queue tokens, MessageStatus status) {
Set flags = Flag.parseFlagList(tokens);
if (flags == null)
return false;
status.getFlags().addAll(flags);
return true;
}
private static boolean parseUid(Queue tokens, MessageStatus status) {
if (Parsing.matchAnyOf(tokens, "UID") == null)
return false;
status.setImapUid(Parsing.match(tokens, int.class));
return true;
}
private static boolean parseGmailLabels(Queue tokens, MessageStatus status) {
if (Parsing.matchAnyOf(tokens, "X-GM-LABELS") == null)
return false;
Parsing.eat(tokens, "(");
// Create a label set for this message status only if there are labels.
status.setLabels(Sets.newHashSet());
// Check if there are labels to add.
while (!")".equals(tokens.peek())) { // \Inbox
String token = tokens.poll();
// HACK: horrible hack!!!
// The original Parser incorrectly left escaped backslashes intact. We now
// emulate this by putting them back in...
// this code replaces all single backslashes (escaped here as "\\\\") to double backslashes.
token = token.replaceAll("\\\\", "\\\\\\\\");
status.getLabels().add(token);
}
Parsing.eat(tokens, ")");
return true;
}
private static boolean parseEnvelope(Queue tokens, MessageStatus status) {
if (Parsing.matchAnyOf(tokens, "ENVELOPE") == null)
return false;
Parsing.eat(tokens, "(");
String receivedDate = tokens.peek();
if (Parsing.isValid(receivedDate)) {
receivedDate = Parsing.normalizeDateToken(Parsing.match(tokens, String.class));
try {
MailDateFormat dateFormat = new MailDateFormat();
dateFormat.setTimeZone(TimeZone.getTimeZone("Australia/Sydney"));
status.setReceivedDate(dateFormat.parse(receivedDate));
} catch (ParseException e) {
log.error("Malformed received date format {}. Unable to parse.", receivedDate, e);
}
} else if (receivedDate != null) {
Parsing.eat(tokens, "NIL");
}
status.setSubject(Parsing.decode(Parsing.match(tokens, String.class)));
status.setFrom(Parsing.readAddresses(tokens));
status.setSender(Parsing.readAddresses(tokens));
status.setReplyTo(Parsing.readAddresses(tokens));
status.setTo(Parsing.readAddresses(tokens));
status.setCc(Parsing.readAddresses(tokens));
status.setBcc(Parsing.readAddresses(tokens));
status.setInReplyTo(Parsing.match(tokens, String.class));
status.setMessageUid(Parsing.match(tokens, String.class));
// Close envelope.
Parsing.eat(tokens, ")");
return true;
}
}