javax.mail.internet.MimeUtility Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package javax.mail.internet;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
import javax.activation.DataHandler;
import javax.activation.DataSource;
import javax.mail.MessagingException;
import org.apache.geronimo.mail.util.ASCIIUtil;
import org.apache.geronimo.mail.util.Base64;
import org.apache.geronimo.mail.util.Base64DecoderStream;
import org.apache.geronimo.mail.util.Base64Encoder;
import org.apache.geronimo.mail.util.Base64EncoderStream;
import org.apache.geronimo.mail.util.QuotedPrintableDecoderStream;
import org.apache.geronimo.mail.util.QuotedPrintableEncoderStream;
import org.apache.geronimo.mail.util.QuotedPrintableEncoder;
import org.apache.geronimo.mail.util.QuotedPrintable;
import org.apache.geronimo.mail.util.SessionUtil;
import org.apache.geronimo.mail.util.UUDecoderStream;
import org.apache.geronimo.mail.util.UUEncoderStream;
// encodings include "base64", "quoted-printable", "7bit", "8bit" and "binary".
// In addition, "uuencode" is also supported. The
/**
* @version $Rev: 627556 $ $Date: 2008-02-13 13:27:22 -0500 (Wed, 13 Feb 2008) $
*/
public class MimeUtility {
private static final String MIME_FOLDENCODEDWORDS = "mail.mime.foldencodedwords";
private static final String MIME_DECODE_TEXT_STRICT = "mail.mime.decodetext.strict";
private static final String MIME_FOLDTEXT = "mail.mime.foldtext";
private static final int FOLD_THRESHOLD = 76;
private MimeUtility() {
}
public static final int ALL = -1;
private static String defaultJavaCharset;
private static String escapedChars = "\"\\\r\n";
private static String linearWhiteSpace = " \t\r\n";
private static String QP_WORD_SPECIALS = "=_?\"#$%&'(),.:;<>@[\\]^`{|}~";
private static String QP_TEXT_SPECIALS = "=_?";
// the javamail spec includes the ability to map java encoding names to MIME-specified names. Normally,
// these values are loaded from a character mapping file.
private static Map java2mime;
private static Map mime2java;
static {
// we need to load the mapping tables used by javaCharset() and mimeCharset().
loadCharacterSetMappings();
}
public static InputStream decode(InputStream in, String encoding) throws MessagingException {
encoding = encoding.toLowerCase();
// some encodies are just pass-throughs, with no real decoding.
if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
return in;
}
else if (encoding.equals("base64")) {
return new Base64DecoderStream(in);
}
// UUEncode is known by a couple historical extension names too.
else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
return new UUDecoderStream(in);
}
else if (encoding.equals("quoted-printable")) {
return new QuotedPrintableDecoderStream(in);
}
else {
throw new MessagingException("Unknown encoding " + encoding);
}
}
/**
* Decode a string of text obtained from a mail header into
* it's proper form. The text generally will consist of a
* string of tokens, some of which may be encoded using
* base64 encoding.
*
* @param text The text to decode.
*
* @return The decoded test string.
* @exception UnsupportedEncodingException
*/
public static String decodeText(String text) throws UnsupportedEncodingException {
// if the text contains any encoded tokens, those tokens will be marked with "=?". If the
// source string doesn't contain that sequent, no decoding is required.
if (text.indexOf("=?") < 0) {
return text;
}
// we have two sets of rules we can apply.
if (!SessionUtil.getBooleanProperty(MIME_DECODE_TEXT_STRICT, true)) {
return decodeTextNonStrict(text);
}
int offset = 0;
int endOffset = text.length();
int startWhiteSpace = -1;
int endWhiteSpace = -1;
StringBuffer decodedText = new StringBuffer(text.length());
boolean previousTokenEncoded = false;
while (offset < endOffset) {
char ch = text.charAt(offset);
// is this a whitespace character?
if (linearWhiteSpace.indexOf(ch) != -1) {
startWhiteSpace = offset;
while (offset < endOffset) {
// step over the white space characters.
ch = text.charAt(offset);
if (linearWhiteSpace.indexOf(ch) != -1) {
offset++;
}
else {
// record the location of the first non lwsp and drop down to process the
// token characters.
endWhiteSpace = offset;
break;
}
}
}
else {
// we have a word token. We need to scan over the word and then try to parse it.
int wordStart = offset;
while (offset < endOffset) {
// step over the white space characters.
ch = text.charAt(offset);
if (linearWhiteSpace.indexOf(ch) == -1) {
offset++;
}
else {
break;
}
//NB: Trailing whitespace on these header strings will just be discarded.
}
// pull out the word token.
String word = text.substring(wordStart, offset);
// is the token encoded? decode the word
if (word.startsWith("=?")) {
try {
// if this gives a parsing failure, treat it like a non-encoded word.
String decodedWord = decodeWord(word);
// are any whitespace characters significant? Append 'em if we've got 'em.
if (!previousTokenEncoded) {
if (startWhiteSpace != -1) {
decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
startWhiteSpace = -1;
}
}
// this is definitely a decoded token.
previousTokenEncoded = true;
// and add this to the text.
decodedText.append(decodedWord);
// we continue parsing from here...we allow parsing errors to fall through
// and get handled as normal text.
continue;
} catch (ParseException e) {
}
}
// this is a normal token, so it doesn't matter what the previous token was. Add the white space
// if we have it.
if (startWhiteSpace != -1) {
decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
startWhiteSpace = -1;
}
// this is not a decoded token.
previousTokenEncoded = false;
decodedText.append(word);
}
}
return decodedText.toString();
}
/**
* Decode a string of text obtained from a mail header into
* it's proper form. The text generally will consist of a
* string of tokens, some of which may be encoded using
* base64 encoding. This is for non-strict decoded for mailers that
* violate the RFC 2047 restriction that decoded tokens must be delimited
* by linear white space. This will scan tokens looking for inner tokens
* enclosed in "=?" -- "?=" pairs.
*
* @param text The text to decode.
*
* @return The decoded test string.
* @exception UnsupportedEncodingException
*/
private static String decodeTextNonStrict(String text) throws UnsupportedEncodingException {
int offset = 0;
int endOffset = text.length();
int startWhiteSpace = -1;
int endWhiteSpace = -1;
StringBuffer decodedText = new StringBuffer(text.length());
boolean previousTokenEncoded = false;
while (offset < endOffset) {
char ch = text.charAt(offset);
// is this a whitespace character?
if (linearWhiteSpace.indexOf(ch) != -1) {
startWhiteSpace = offset;
while (offset < endOffset) {
// step over the white space characters.
ch = text.charAt(offset);
if (linearWhiteSpace.indexOf(ch) != -1) {
offset++;
}
else {
// record the location of the first non lwsp and drop down to process the
// token characters.
endWhiteSpace = offset;
break;
}
}
}
else {
// we're at the start of a word token. We potentially need to break this up into subtokens
int wordStart = offset;
while (offset < endOffset) {
// step over the white space characters.
ch = text.charAt(offset);
if (linearWhiteSpace.indexOf(ch) == -1) {
offset++;
}
else {
break;
}
//NB: Trailing whitespace on these header strings will just be discarded.
}
// pull out the word token.
String word = text.substring(wordStart, offset);
int decodeStart = 0;
// now scan and process each of the bits within here.
while (decodeStart < word.length()) {
int tokenStart = word.indexOf("=?", decodeStart);
if (tokenStart == -1) {
// this is a normal token, so it doesn't matter what the previous token was. Add the white space
// if we have it.
if (startWhiteSpace != -1) {
decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
startWhiteSpace = -1;
}
// this is not a decoded token.
previousTokenEncoded = false;
decodedText.append(word.substring(decodeStart));
// we're finished.
break;
}
// we have something to process
else {
// we might have a normal token preceeding this.
if (tokenStart != decodeStart) {
// this is a normal token, so it doesn't matter what the previous token was. Add the white space
// if we have it.
if (startWhiteSpace != -1) {
decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
startWhiteSpace = -1;
}
// this is not a decoded token.
previousTokenEncoded = false;
decodedText.append(word.substring(decodeStart, tokenStart));
}
// now find the end marker.
int tokenEnd = word.indexOf("?=", tokenStart);
// sigh, an invalid token. Treat this as plain text.
if (tokenEnd == -1) {
// this is a normal token, so it doesn't matter what the previous token was. Add the white space
// if we have it.
if (startWhiteSpace != -1) {
decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
startWhiteSpace = -1;
}
// this is not a decoded token.
previousTokenEncoded = false;
decodedText.append(word.substring(tokenStart));
// we're finished.
break;
}
else {
// update our ticker
decodeStart = tokenEnd + 2;
String token = word.substring(tokenStart, tokenEnd);
try {
// if this gives a parsing failure, treat it like a non-encoded word.
String decodedWord = decodeWord(token);
// are any whitespace characters significant? Append 'em if we've got 'em.
if (!previousTokenEncoded) {
if (startWhiteSpace != -1) {
decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
startWhiteSpace = -1;
}
}
// this is definitely a decoded token.
previousTokenEncoded = true;
// and add this to the text.
decodedText.append(decodedWord);
// we continue parsing from here...we allow parsing errors to fall through
// and get handled as normal text.
continue;
} catch (ParseException e) {
}
// this is a normal token, so it doesn't matter what the previous token was. Add the white space
// if we have it.
if (startWhiteSpace != -1) {
decodedText.append(text.substring(startWhiteSpace, endWhiteSpace));
startWhiteSpace = -1;
}
// this is not a decoded token.
previousTokenEncoded = false;
decodedText.append(token);
}
}
}
}
}
return decodedText.toString();
}
/**
* Parse a string using the RFC 2047 rules for an "encoded-word"
* type. This encoding has the syntax:
*
* encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
*
* @param word The possibly encoded word value.
*
* @return The decoded word.
* @exception ParseException
* @exception UnsupportedEncodingException
*/
public static String decodeWord(String word) throws ParseException, UnsupportedEncodingException {
// encoded words start with the characters "=?". If this not an encoded word, we throw a
// ParseException for the caller.
if (!word.startsWith("=?")) {
throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
}
int charsetPos = word.indexOf('?', 2);
if (charsetPos == -1) {
throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
}
// pull out the character set information (this is the MIME name at this point).
String charset = word.substring(2, charsetPos).toLowerCase();
// now pull out the encoding token the same way.
int encodingPos = word.indexOf('?', charsetPos + 1);
if (encodingPos == -1) {
throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
}
String encoding = word.substring(charsetPos + 1, encodingPos);
// and finally the encoded text.
int encodedTextPos = word.indexOf("?=", encodingPos + 1);
if (encodedTextPos == -1) {
throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
}
String encodedText = word.substring(encodingPos + 1, encodedTextPos);
// seems a bit silly to encode a null string, but easy to deal with.
if (encodedText.length() == 0) {
return "";
}
try {
// the decoder writes directly to an output stream.
ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
byte[] encodedData = encodedText.getBytes("US-ASCII");
// Base64 encoded?
if (encoding.equals("B")) {
Base64.decode(encodedData, out);
}
// maybe quoted printable.
else if (encoding.equals("Q")) {
QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
dataEncoder.decodeWord(encodedData, out);
}
else {
throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
}
// get the decoded byte data and convert into a string.
byte[] decodedData = out.toByteArray();
return new String(decodedData, javaCharset(charset));
} catch (IOException e) {
throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
}
}
/**
* Wrap an encoder around a given output stream.
*
* @param out The output stream to wrap.
* @param encoding The name of the encoding.
*
* @return A instance of FilterOutputStream that manages on the fly
* encoding for the requested encoding type.
* @exception MessagingException
*/
public static OutputStream encode(OutputStream out, String encoding) throws MessagingException {
// no encoding specified, so assume it goes out unchanged.
if (encoding == null) {
return out;
}
encoding = encoding.toLowerCase();
// some encodies are just pass-throughs, with no real decoding.
if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
return out;
}
else if (encoding.equals("base64")) {
return new Base64EncoderStream(out);
}
// UUEncode is known by a couple historical extension names too.
else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
return new UUEncoderStream(out);
}
else if (encoding.equals("quoted-printable")) {
return new QuotedPrintableEncoderStream(out);
}
else {
throw new MessagingException("Unknown encoding " + encoding);
}
}
/**
* Wrap an encoder around a given output stream.
*
* @param out The output stream to wrap.
* @param encoding The name of the encoding.
* @param filename The filename of the data being sent (only used for UUEncode).
*
* @return A instance of FilterOutputStream that manages on the fly
* encoding for the requested encoding type.
* @exception MessagingException
*/
public static OutputStream encode(OutputStream out, String encoding, String filename) throws MessagingException {
encoding = encoding.toLowerCase();
// some encodies are just pass-throughs, with no real decoding.
if (encoding.equals("binary") || encoding.equals("7bit") || encoding.equals("8bit")) {
return out;
}
else if (encoding.equals("base64")) {
return new Base64EncoderStream(out);
}
// UUEncode is known by a couple historical extension names too.
else if (encoding.equals("uuencode") || encoding.equals("x-uuencode") || encoding.equals("x-uue")) {
return new UUEncoderStream(out, filename);
}
else if (encoding.equals("quoted-printable")) {
return new QuotedPrintableEncoderStream(out);
}
else {
throw new MessagingException("Unknown encoding " + encoding);
}
}
public static String encodeText(String word) throws UnsupportedEncodingException {
return encodeText(word, null, null);
}
public static String encodeText(String word, String charset, String encoding) throws UnsupportedEncodingException {
return encodeWord(word, charset, encoding, false);
}
public static String encodeWord(String word) throws UnsupportedEncodingException {
return encodeWord(word, null, null);
}
public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException {
return encodeWord(word, charset, encoding, true);
}
private static String encodeWord(String word, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException {
// figure out what we need to encode this.
String encoder = ASCIIUtil.getTextTransferEncoding(word);
// all ascii? We can return this directly,
if (encoder.equals("7bit")) {
return word;
}
// if not given a charset, use the default.
if (charset == null) {
charset = getDefaultMIMECharset();
}
// sort out the encoder. If not explicitly given, use the best guess we've already established.
if (encoding != null) {
if (encoding.equalsIgnoreCase("B")) {
encoder = "base64";
}
else if (encoding.equalsIgnoreCase("Q")) {
encoder = "quoted-printable";
}
else {
throw new UnsupportedEncodingException("Unknown transfer encoding: " + encoding);
}
}
try {
// we'll format this directly into the string buffer
StringBuffer result = new StringBuffer();
// this is the maximum size of a segment of encoded data, which is based off
// of a 75 character size limit and all of the encoding overhead elements.
int sizeLimit = 75 - 7 - charset.length();
// now do the appropriate encoding work
if (encoder.equals("base64")) {
Base64Encoder dataEncoder = new Base64Encoder();
// this may recurse on the encoding if the string is too long. The left-most will not
// get a segment delimiter
encodeBase64(word, result, sizeLimit, charset, dataEncoder, true, SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false));
}
else {
QuotedPrintableEncoder dataEncoder = new QuotedPrintableEncoder();
encodeQuotedPrintable(word, result, sizeLimit, charset, dataEncoder, true,
SessionUtil.getBooleanProperty(MIME_FOLDENCODEDWORDS, false), encodingWord ? QP_WORD_SPECIALS : QP_TEXT_SPECIALS);
}
return result.toString();
} catch (IOException e) {
throw new UnsupportedEncodingException("Invalid encoding");
}
}
/**
* Encode a string into base64 encoding, taking into
* account the maximum segment length.
*
* @param data The string data to encode.
* @param out The output buffer used for the result.
* @param sizeLimit The maximum amount of encoded data we're allowed
* to have in a single encoded segment.
* @param charset The character set marker that needs to be added to the
* encoding header.
* @param encoder The encoder instance we're using.
* @param firstSegment
* If true, this is the first (left-most) segment in the
* data. Used to determine if segment delimiters need to
* be added between sections.
* @param foldSegments
* Indicates the type of delimiter to use (blank or newline sequence).
*/
static private void encodeBase64(String data, StringBuffer out, int sizeLimit, String charset, Base64Encoder encoder, boolean firstSegment, boolean foldSegments) throws IOException
{
// this needs to be converted into the appropriate transfer encoding.
byte [] bytes = data.getBytes(javaCharset(charset));
int estimatedSize = encoder.estimateEncodedLength(bytes);
// if the estimated encoding size is over our segment limit, split the string in half and
// recurse. Eventually we'll reach a point where things are small enough.
if (estimatedSize > sizeLimit) {
// the first segment indicator travels with the left half.
encodeBase64(data.substring(0, data.length() / 2), out, sizeLimit, charset, encoder, firstSegment, foldSegments);
// the second half can never be the first segment
encodeBase64(data.substring(data.length() / 2), out, sizeLimit, charset, encoder, false, foldSegments);
}
else
{
// if this is not the first sement of the encoding, we need to add either a blank or
// a newline sequence to the data
if (!firstSegment) {
if (foldSegments) {
out.append("\r\n");
}
else {
out.append(' ');
}
}
// do the encoding of the segment.
encoder.encodeWord(bytes, out, charset);
}
}
/**
* Encode a string into quoted printable encoding, taking into
* account the maximum segment length.
*
* @param data The string data to encode.
* @param out The output buffer used for the result.
* @param sizeLimit The maximum amount of encoded data we're allowed
* to have in a single encoded segment.
* @param charset The character set marker that needs to be added to the
* encoding header.
* @param encoder The encoder instance we're using.
* @param firstSegment
* If true, this is the first (left-most) segment in the
* data. Used to determine if segment delimiters need to
* be added between sections.
* @param foldSegments
* Indicates the type of delimiter to use (blank or newline sequence).
*/
static private void encodeQuotedPrintable(String data, StringBuffer out, int sizeLimit, String charset, QuotedPrintableEncoder encoder,
boolean firstSegment, boolean foldSegments, String specials) throws IOException
{
// this needs to be converted into the appropriate transfer encoding.
byte [] bytes = data.getBytes(javaCharset(charset));
int estimatedSize = encoder.estimateEncodedLength(bytes, specials);
// if the estimated encoding size is over our segment limit, split the string in half and
// recurse. Eventually we'll reach a point where things are small enough.
if (estimatedSize > sizeLimit) {
// the first segment indicator travels with the left half.
encodeQuotedPrintable(data.substring(0, data.length() / 2), out, sizeLimit, charset, encoder, firstSegment, foldSegments, specials);
// the second half can never be the first segment
encodeQuotedPrintable(data.substring(data.length() / 2), out, sizeLimit, charset, encoder, false, foldSegments, specials);
}
else
{
// if this is not the first sement of the encoding, we need to add either a blank or
// a newline sequence to the data
if (!firstSegment) {
if (foldSegments) {
out.append("\r\n");
}
else {
out.append(' ');
}
}
// do the encoding of the segment.
encoder.encodeWord(bytes, out, charset, specials);
}
}
/**
* Examine the content of a data source and decide what type
* of transfer encoding should be used. For text streams,
* we'll decided between 7bit, quoted-printable, and base64.
* For binary content types, we'll use either 7bit or base64.
*
* @param handler The DataHandler associated with the content.
*
* @return The string name of an encoding used to transfer the content.
*/
public static String getEncoding(DataHandler handler) {
// if this handler has an associated data source, we can read directly from the
// data source to make this judgment. This is generally MUCH faster than asking the
// DataHandler to write out the data for us.
DataSource ds = handler.getDataSource();
if (ds != null) {
return getEncoding(ds);
}
try {
// get a parser that allows us to make comparisons.
ContentType content = new ContentType(ds.getContentType());
// The only access to the content bytes at this point is by asking the handler to write
// the information out to a stream. We're going to pipe this through a special stream
// that examines the bytes as they go by.
ContentCheckingOutputStream checker = new ContentCheckingOutputStream();
handler.writeTo(checker);
// figure this out based on whether we believe this to be a text type or not.
if (content.match("text/*")) {
return checker.getTextTransferEncoding();
}
else {
return checker.getBinaryTransferEncoding();
}
} catch (Exception e) {
// any unexpected I/O exceptions we'll force to a "safe" fallback position.
return "base64";
}
}
/**
* Determine the what transfer encoding should be used for
* data retrieved from a DataSource.
*
* @param source The DataSource for the transmitted data.
*
* @return The string name of the encoding form that should be used for
* the data.
*/
public static String getEncoding(DataSource source) {
InputStream in = null;
try {
// get a parser that allows us to make comparisons.
ContentType content = new ContentType(source.getContentType());
// we're probably going to have to scan the data.
in = source.getInputStream();
if (!content.match("text/*")) {
// Not purporting to be a text type? Examine the content to see we might be able to
// at least pretend it is an ascii type.
return ASCIIUtil.getBinaryTransferEncoding(in);
}
else {
return ASCIIUtil.getTextTransferEncoding(in);
}
} catch (Exception e) {
// this was a problem...not sure what makes sense here, so we'll assume it's binary
// and we need to transfer this using Base64 encoding.
return "base64";
} finally {
// make sure we close the stream
try {
if (in != null) {
in.close();
}
} catch (IOException e) {
}
}
}
/**
* Quote a "word" value. If the word contains any character from
* the specified "specials" list, this value is returned as a
* quoted strong. Otherwise, it is returned unchanged (an "atom").
*
* @param word The word requiring quoting.
* @param specials The set of special characters that can't appear in an unquoted
* string.
*
* @return The quoted value. This will be unchanged if the word doesn't contain
* any of the designated special characters.
*/
public static String quote(String word, String specials) {
int wordLength = word.length();
boolean requiresQuoting = false;
// scan the string looking for problem characters
for (int i =0; i < wordLength; i++) {
char ch = word.charAt(i);
// special escaped characters require escaping, which also implies quoting.
if (escapedChars.indexOf(ch) >= 0) {
return quoteAndEscapeString(word);
}
// now check for control characters or the designated special characters.
if (ch < 32 || ch >= 127 || specials.indexOf(ch) >= 0) {
// we know this requires quoting, but we still need to scan the entire string to
// see if contains chars that require escaping. Just go ahead and treat it as if it does.
return quoteAndEscapeString(word);
}
}
return word;
}
/**
* Take a string and return it as a formatted quoted string, with
* all characters requiring escaping handled properly.
*
* @param word The string to quote.
*
* @return The quoted string.
*/
private static String quoteAndEscapeString(String word) {
int wordLength = word.length();
// allocate at least enough for the string and two quotes plus a reasonable number of escaped chars.
StringBuffer buffer = new StringBuffer(wordLength + 10);
// add the leading quote.
buffer.append('"');
for (int i = 0; i < wordLength; i++) {
char ch = word.charAt(i);
// is this an escaped char?
if (escapedChars.indexOf(ch) >= 0) {
// add the escape marker before appending.
buffer.append('\\');
}
buffer.append(ch);
}
// now the closing quote
buffer.append('"');
return buffer.toString();
}
/**
* Translate a MIME standard character set name into the Java
* equivalent.
*
* @param charset The MIME standard name.
*
* @return The Java equivalent for this name.
*/
public static String javaCharset(String charset) {
// nothing in, nothing out.
if (charset == null) {
return null;
}
String mappedCharset = (String)mime2java.get(charset.toLowerCase());
// if there is no mapping, then the original name is used. Many of the MIME character set
// names map directly back into Java. The reverse isn't necessarily true.
return mappedCharset == null ? charset : mappedCharset;
}
/**
* Map a Java character set name into the MIME equivalent.
*
* @param charset The java character set name.
*
* @return The MIME standard equivalent for this character set name.
*/
public static String mimeCharset(String charset) {
// nothing in, nothing out.
if (charset == null) {
return null;
}
String mappedCharset = (String)java2mime.get(charset.toLowerCase());
// if there is no mapping, then the original name is used. Many of the MIME character set
// names map directly back into Java. The reverse isn't necessarily true.
return mappedCharset == null ? charset : mappedCharset;
}
/**
* Get the default character set to use, in Java name format.
* This either be the value set with the mail.mime.charset
* system property or obtained from the file.encoding system
* property. If neither of these is set, we fall back to
* 8859_1 (basically US-ASCII).
*
* @return The character string value of the default character set.
*/
public static String getDefaultJavaCharset() {
String charset = SessionUtil.getProperty("mail.mime.charset");
if (charset != null) {
return javaCharset(charset);
}
return SessionUtil.getProperty("file.encoding", "8859_1");
}
/**
* Get the default character set to use, in MIME name format.
* This either be the value set with the mail.mime.charset
* system property or obtained from the file.encoding system
* property. If neither of these is set, we fall back to
* 8859_1 (basically US-ASCII).
*
* @return The character string value of the default character set.
*/
static String getDefaultMIMECharset() {
// if the property is specified, this can be used directly.
String charset = SessionUtil.getProperty("mail.mime.charset");
if (charset != null) {
return charset;
}
// get the Java-defined default and map back to a MIME name.
return mimeCharset(SessionUtil.getProperty("file.encoding", "8859_1"));
}
/**
* Load the default mapping tables used by the javaCharset()
* and mimeCharset() methods. By default, these tables are
* loaded from the /META-INF/javamail.charset.map file. If
* something goes wrong loading that file, we configure things
* with a default mapping table (which just happens to mimic
* what's in the default mapping file).
*/
static private void loadCharacterSetMappings() {
java2mime = new HashMap();
mime2java = new HashMap();
// normally, these come from a character map file contained in the jar file.
try {
InputStream map = javax.mail.internet.MimeUtility.class.getResourceAsStream("/META-INF/javamail.charset.map");
if (map != null) {
// get a reader for this so we can load.
BufferedReader reader = new BufferedReader(new InputStreamReader(map));
readMappings(reader, java2mime);
readMappings(reader, mime2java);
}
} catch (Exception e) {
}
// if any sort of error occurred reading the preferred file version, we could end up with empty
// mapping tables. This could cause all sorts of difficulty, so ensure they are populated with at
// least a reasonable set of defaults.
// these mappings echo what's in the default file.
if (java2mime.isEmpty()) {
java2mime.put("8859_1", "ISO-8859-1");
java2mime.put("iso8859_1", "ISO-8859-1");
java2mime.put("iso8859-1", "ISO-8859-1");
java2mime.put("8859_2", "ISO-8859-2");
java2mime.put("iso8859_2", "ISO-8859-2");
java2mime.put("iso8859-2", "ISO-8859-2");
java2mime.put("8859_3", "ISO-8859-3");
java2mime.put("iso8859_3", "ISO-8859-3");
java2mime.put("iso8859-3", "ISO-8859-3");
java2mime.put("8859_4", "ISO-8859-4");
java2mime.put("iso8859_4", "ISO-8859-4");
java2mime.put("iso8859-4", "ISO-8859-4");
java2mime.put("8859_5", "ISO-8859-5");
java2mime.put("iso8859_5", "ISO-8859-5");
java2mime.put("iso8859-5", "ISO-8859-5");
java2mime.put ("8859_6", "ISO-8859-6");
java2mime.put("iso8859_6", "ISO-8859-6");
java2mime.put("iso8859-6", "ISO-8859-6");
java2mime.put("8859_7", "ISO-8859-7");
java2mime.put("iso8859_7", "ISO-8859-7");
java2mime.put("iso8859-7", "ISO-8859-7");
java2mime.put("8859_8", "ISO-8859-8");
java2mime.put("iso8859_8", "ISO-8859-8");
java2mime.put("iso8859-8", "ISO-8859-8");
java2mime.put("8859_9", "ISO-8859-9");
java2mime.put("iso8859_9", "ISO-8859-9");
java2mime.put("iso8859-9", "ISO-8859-9");
java2mime.put("sjis", "Shift_JIS");
java2mime.put ("jis", "ISO-2022-JP");
java2mime.put("iso2022jp", "ISO-2022-JP");
java2mime.put("euc_jp", "euc-jp");
java2mime.put("koi8_r", "koi8-r");
java2mime.put("euc_cn", "euc-cn");
java2mime.put("euc_tw", "euc-tw");
java2mime.put("euc_kr", "euc-kr");
}
if (mime2java.isEmpty ()) {
mime2java.put("iso-2022-cn", "ISO2022CN");
mime2java.put("iso-2022-kr", "ISO2022KR");
mime2java.put("utf-8", "UTF8");
mime2java.put("utf8", "UTF8");
mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
mime2java.put("ja_jp.eucjp", "EUCJIS");
mime2java.put ("euc-kr", "KSC5601");
mime2java.put("euckr", "KSC5601");
mime2java.put("us-ascii", "ISO-8859-1");
mime2java.put("x-us-ascii", "ISO-8859-1");
}
}
/**
* Read a section of a character map table and populate the
* target mapping table with the information. The table end
* is marked by a line starting with "--" and also ending with
* "--". Blank lines and comment lines (beginning with '#') are
* ignored.
*
* @param reader The source of the file information.
* @param table The mapping table used to store the information.
*/
static private void readMappings(BufferedReader reader, Map table) throws IOException {
// process lines to the EOF or the end of table marker.
while (true) {
String line = reader.readLine();
// no line returned is an EOF
if (line == null) {
return;
}
// trim so we're not messed up by trailing blanks
line = line.trim();
if (line.length() == 0 || line.startsWith("#")) {
continue;
}
// stop processing if this is the end-of-table marker.
if (line.startsWith("--") && line.endsWith("--")) {
return;
}
// we allow either blanks or tabs as token delimiters.
StringTokenizer tokenizer = new StringTokenizer(line, " \t");
try {
String from = tokenizer.nextToken().toLowerCase();
String to = tokenizer.nextToken();
table.put(from, to);
} catch (NoSuchElementException e) {
// just ignore the line if invalid.
}
}
}
/**
* Perform RFC 2047 text folding on a string of text.
*
* @param used The amount of text already "used up" on this line. This is
* typically the length of a message header that this text
* get getting added to.
* @param s The text to fold.
*
* @return The input text, with linebreaks inserted at appropriate fold points.
*/
public static String fold(int used, String s) {
// if folding is disable, unfolding is also. Return the string unchanged.
if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
return s;
}
int end;
// now we need to strip off any trailing "whitespace", where whitespace is blanks, tabs,
// and line break characters.
for (end = s.length() - 1; end >= 0; end--) {
int ch = s.charAt(end);
if (ch != ' ' && ch != '\t' ) {
break;
}
}
// did we actually find something to remove? Shorten the String to the trimmed length
if (end != s.length() - 1) {
s = s.substring(0, end + 1);
}
// does the string as it exists now not require folding? We can just had that back right off.
if (s.length() + used <= FOLD_THRESHOLD) {
return s;
}
// get a buffer for the length of the string, plus room for a few line breaks.
// these are soft line breaks, so we generally need more that just the line breaks (an escape +
// CR + LF + leading space on next line);
StringBuffer newString = new StringBuffer(s.length() + 8);
// now keep chopping this down until we've accomplished what we need.
while (used + s.length() > FOLD_THRESHOLD) {
int breakPoint = -1;
char breakChar = 0;
// now scan for the next place where we can break.
for (int i = 0; i < s.length(); i++) {
// have we passed the fold limit?
if (used + i > FOLD_THRESHOLD) {
// if we've already seen a blank, then stop now. Otherwise
// we keep going until we hit a fold point.
if (breakPoint != -1) {
break;
}
}
char ch = s.charAt(i);
// a white space character?
if (ch == ' ' || ch == '\t') {
// this might be a run of white space, so skip over those now.
breakPoint = i;
// we need to maintain the same character type after the inserted linebreak.
breakChar = ch;
i++;
while (i < s.length()) {
ch = s.charAt(i);
if (ch != ' ' && ch != '\t') {
break;
}
i++;
}
}
// found an embedded new line. Escape this so that the unfolding process preserves it.
else if (ch == '\n') {
newString.append('\\');
newString.append('\n');
}
else if (ch == '\r') {
newString.append('\\');
newString.append('\n');
i++;
// if this is a CRLF pair, add the second char also
if (i < s.length() && s.charAt(i) == '\n') {
newString.append('\r');
}
}
}
// no fold point found, we punt, append the remainder and leave.
if (breakPoint == -1) {
newString.append(s);
return newString.toString();
}
newString.append(s.substring(0, breakPoint));
newString.append("\r\n");
newString.append(breakChar);
// chop the string
s = s.substring(breakPoint + 1);
// start again, and we've used the first char of the limit already with the whitespace char.
used = 1;
}
// add on the remainder, and return
newString.append(s);
return newString.toString();
}
/**
* Unfold a folded string. The unfolding process will remove
* any line breaks that are not escaped and which are also followed
* by whitespace characters.
*
* @param s The folded string.
*
* @return A new string with unfolding rules applied.
*/
public static String unfold(String s) {
// if folding is disable, unfolding is also. Return the string unchanged.
if (!SessionUtil.getBooleanProperty(MIME_FOLDTEXT, true)) {
return s;
}
// if there are no line break characters in the string, we can just return this.
if (s.indexOf('\n') < 0 && s.indexOf('\r') < 0) {
return s;
}
// we need to scan and fix things up.
int length = s.length();
StringBuffer newString = new StringBuffer(length);
// scan the entire string
for (int i = 0; i < length; i++) {
char ch = s.charAt(i);
// we have a backslash. In folded strings, escape characters are only processed as such if
// they preceed line breaks. Otherwise, we leave it be.
if (ch == '\\') {
// escape at the very end? Just add the character.
if (i == length - 1) {
newString.append(ch);
}
else {
int nextChar = s.charAt(i + 1);
// naked newline? Add the new line to the buffer, and skip the escape char.
if (nextChar == '\n') {
newString.append('\n');
i++;
}
else if (nextChar == '\r') {
// just the CR left? Add it, removing the escape.
if (i == length - 2 || s.charAt(i + 2) != '\r') {
newString.append('\r');
i++;
}
else {
// toss the escape, add both parts of the CRLF, and skip over two chars.
newString.append('\r');
newString.append('\n');
i += 2;
}
}
else {
// an escape for another purpose, just copy it over.
newString.append(ch);
}
}
}
// we have an unescaped line break
else if (ch == '\n' || ch == '\r') {
// remember the position in case we need to backtrack.
int lineBreak = i;
boolean CRLF = false;
if (ch == '\r') {
// check to see if we need to step over this.
if (i < length - 1 && s.charAt(i + 1) == '\n') {
i++;
// flag the type so we know what we might need to preserve.
CRLF = true;
}
}
// get a temp position scanner.
int scan = i + 1;
// does a blank follow this new line? we need to scrap the new line and reduce the leading blanks
// down to a single blank.
if (scan < length && s.charAt(scan) == ' ') {
// add the character
newString.append(' ');
// scan over the rest of the blanks
i = scan + 1;
while (i < length && s.charAt(i) == ' ') {
i++;
}
// we'll increment down below, so back up to the last blank as the current char.
i--;
}
else {
// we must keep this line break. Append the appropriate style.
if (CRLF) {
newString.append("\r\n");
}
else {
newString.append(ch);
}
}
}
else {
// just a normal, ordinary character
newString.append(ch);
}
}
return newString.toString();
}
}
/**
* Utility class for examining content information written out
* by a DataHandler object. This stream gathers statistics on
* the stream so it can make transfer encoding determinations.
*/
class ContentCheckingOutputStream extends OutputStream {
private int asciiChars = 0;
private int nonAsciiChars = 0;
private boolean containsLongLines = false;
private boolean containsMalformedEOL = false;
private int previousChar = 0;
private int span = 0;
ContentCheckingOutputStream() {
}
public void write(byte[] data) throws IOException {
write(data, 0, data.length);
}
public void write(byte[] data, int offset, int length) throws IOException {
for (int i = 0; i < length; i++) {
write(data[offset + i]);
}
}
public void write(int ch) {
// we found a linebreak. Reset the line length counters on either one. We don't
// really need to validate here.
if (ch == '\n' || ch == '\r') {
// we found a newline, this is only valid if the previous char was the '\r'
if (ch == '\n') {
// malformed linebreak? force this to base64 encoding.
if (previousChar != '\r') {
containsMalformedEOL = true;
}
}
// hit a line end, reset our line length counter
span = 0;
}
else {
span++;
// the text has long lines, we can't transfer this as unencoded text.
if (span > 998) {
containsLongLines = true;
}
// non-ascii character, we have to transfer this in binary.
if (!ASCIIUtil.isAscii(ch)) {
nonAsciiChars++;
}
else {
asciiChars++;
}
}
previousChar = ch;
}
public String getBinaryTransferEncoding() {
if (nonAsciiChars != 0 || containsLongLines || containsMalformedEOL) {
return "base64";
}
else {
return "7bit";
}
}
public String getTextTransferEncoding() {
// looking good so far, only valid chars here.
if (nonAsciiChars == 0) {
// does this contain long text lines? We need to use a Q-P encoding which will
// be only slightly longer, but handles folding the longer lines.
if (containsLongLines) {
return "quoted-printable";
}
else {
// ideal! Easiest one to handle.
return "7bit";
}
}
else {
// mostly characters requiring encoding? Base64 is our best bet.
if (nonAsciiChars > asciiChars) {
return "base64";
}
else {
// Q-P encoding will use fewer bytes than the full Base64.
return "quoted-printable";
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy