org.simplejavamail.jakarta.mail.internet.MimeUtility Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of outlook-message-parser Show documentation
A Java parser for Outlook messages (.msg files)
The newest version!
/*
 * Copyright (c) 1997, 2020 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package org.simplejavamail.jakarta.mail.internet;

import java.util.*;

import org.simplejavamail.com.sun.mail.util.BEncoderStream;
import org.simplejavamail.com.sun.mail.util.LineInputStream;
import org.simplejavamail.com.sun.mail.util.PropUtil;
import org.simplejavamail.com.sun.mail.util.QEncoderStream;

import java.io.*;
import java.nio.charset.Charset;

/**
 * This is a utility class that provides various MIME related
 * functionality. 
 *
 * There are a set of methods to encode and decode MIME headers as 
 * per RFC 2047.  Note that, in general, these methods are
 * not needed when using methods such as
 * setSubject and setRecipients; Jakarta Mail
 * will automatically encode and decode data when using these "higher
 * level" methods.  The methods below are only needed when maniuplating
 * raw MIME headers using setHeader and getHeader
 * methods.  A brief description on handling such headers is given below: 

 *
 * RFC 822 mail headers must contain only US-ASCII
 * characters. Headers that contain non US-ASCII characters must be
 * encoded so that they contain only US-ASCII characters. Basically,
 * this process involves using either BASE64 or QP to encode certain
 * characters. RFC 2047 describes this in detail. 

 *
 * In Java, Strings contain (16 bit) Unicode characters. ASCII is a
 * subset of Unicode (and occupies the range 0 - 127). A String
 * that contains only ASCII characters is already mail-safe. If the
 * String contains non US-ASCII characters, it must be encoded. An
 * additional complexity in this step is that since Unicode is not
 * yet a widely used charset, one might want to first charset-encode
 * the String into another charset and then do the transfer-encoding.
 * 

 * Note that to get the actual bytes of a mail-safe String (say,
 * for sending over SMTP), one must do 
 * 
 *
 *	byte[] bytes = string.getBytes("iso-8859-1");	
 *
 * 

 * 
 * The setHeader and addHeader methods
 * on MimeMessage and MimeBodyPart assume that the given header values
 * are Unicode strings that contain only US-ASCII characters. Hence
 * the callers of those methods must insure that the values they pass
 * do not contain non US-ASCII characters. The methods in this class 
 * help do this. 

 *
 * The getHeader family of methods on MimeMessage and
 * MimeBodyPart return the raw header value. These might be encoded
 * as per RFC 2047, and if so, must be decoded into Unicode Strings.
 * The methods in this class help to do this. 

 *
 * Several System properties control strict conformance to the MIME
 * spec.  Note that these are not session properties but must be set
 * globally as System properties. 

 *
 * The mail.mime.decodetext.strict property controls
 * decoding of MIME encoded words.  The MIME spec requires that encoded
 * words start at the beginning of a whitespace separated word.  Some
 * mailers incorrectly include encoded words in the middle of a word.
 * If the mail.mime.decodetext.strict System property is
 * set to "false", an attempt will be made to decode these
 * illegal encoded words. The default is true. 

 *
 * The mail.mime.encodeeol.strict property controls the
 * choice of Content-Transfer-Encoding for MIME parts that are not of
 * type "text".  Often such parts will contain textual data for which
 * an encoding that allows normal end of line conventions is appropriate.
 * In rare cases, such a part will appear to contain entirely textual
 * data, but will require an encoding that preserves CR and LF characters
 * without change.  If the mail.mime.encodeeol.strict
 * System property is set to "true", such an encoding will
 * be used when necessary.  The default is false. 

 *
 * In addition, the mail.mime.charset System property can
 * be used to specify the default MIME charset to use for encoded words
 * and text parts that don't otherwise specify a charset.  Normally, the
 * default MIME charset is derived from the default Java charset, as
 * specified in the file.encoding System property.  Most
 * applications will have no need to explicitly set the default MIME
 * charset.  In cases where the default MIME charset to be used for
 * mail messages is different than the charset used for files stored on
 * the system, this property should be set. 

 *
 * The current implementation also supports the following System property.
 * 

 * The mail.mime.ignoreunknownencoding property controls
 * whether unknown values in the Content-Transfer-Encoding
 * header, as passed to the decode method, cause an exception.
 * If set to "true", unknown values are ignored and 8bit
 * encoding is assumed.  Otherwise, unknown values cause a MessagingException
 * to be thrown.
 *
 * @author  John Mani
 * @author  Bill Shannon
 */

public class MimeUtility {

    // This class cannot be instantiated
    private MimeUtility() { }

    public static final int ALL = -1;

    // cached map of whether a charset is compatible with ASCII
    // Map
    private static final Map nonAsciiCharsetMap
	    = new HashMap<>();

    private static final boolean decodeStrict =
	PropUtil.getBooleanSystemProperty("mail.mime.decodetext.strict", true);
    private static final boolean encodeEolStrict =
	PropUtil.getBooleanSystemProperty("mail.mime.encodeeol.strict", false);
    private static final boolean ignoreUnknownEncoding =
	PropUtil.getBooleanSystemProperty(
	    "mail.mime.ignoreunknownencoding", false);
    private static final boolean allowUtf8 =
	PropUtil.getBooleanSystemProperty("mail.mime.allowutf8", false);
    /*
     * The following two properties allow disabling the fold()
     * and unfold() methods and reverting to the previous behavior.
     * They should never need to be changed and are here only because
     * of my paranoid concern with compatibility.
     */
    private static final boolean foldEncodedWords =
	PropUtil.getBooleanSystemProperty("mail.mime.foldencodedwords", false);
    private static final boolean foldText =
	PropUtil.getBooleanSystemProperty("mail.mime.foldtext", true);


    /**
     * Encode a RFC 822 "word" token into mail-safe form as per
     * RFC 2047. 

     *
     * The given Unicode string is examined for non US-ASCII
     * characters. If the string contains only US-ASCII characters,
     * it is returned as-is.  If the string contains non US-ASCII
     * characters, it is first character-encoded using the platform's
     * default charset, then transfer-encoded using either the B or 
     * Q encoding. The resulting bytes are then returned as a Unicode 
     * string containing only ASCII  characters. 

     * 
     * This method is meant to be used when creating RFC 822 "phrases".
     * The InternetAddress class, for example, uses this to encode
     * it's 'phrase' component.
     *
     * @param	word	Unicode string
     * @return	Array of Unicode strings containing only US-ASCII 
     *		characters.
     * @exception UnsupportedEncodingException if the encoding fails
     */
    public static String encodeWord(String word) 
			throws UnsupportedEncodingException {
	return encodeWord(word, null, null);
    }

    /**
     * Encode a RFC 822 "word" token into mail-safe form as per
     * RFC 2047. 

     *
     * The given Unicode string is examined for non US-ASCII
     * characters. If the string contains only US-ASCII characters,
     * it is returned as-is.  If the string contains non US-ASCII
     * characters, it is first character-encoded using the specified
     * charset, then transfer-encoded using either the B or Q encoding.
     * The resulting bytes are then returned as a Unicode string 
     * containing only ASCII characters. 

     * 
     * @param	word	Unicode string
     * @param	charset	the MIME charset
     * @param	encoding the encoding to be used. Currently supported
     *		values are "B" and "Q". If this parameter is null, then
     *		the "Q" encoding is used if most of characters to be
     *		encoded are in the ASCII charset, otherwise "B" encoding
     *		is used.
     * @return	Unicode string containing only US-ASCII characters
     * @exception UnsupportedEncodingException if the encoding fails
     */
    public static String encodeWord(String word, String charset, 
				    String encoding)
    			throws UnsupportedEncodingException {
	return encodeWord(word, charset, encoding, true);
    }

    /*
     * Encode the given string. The parameter 'encodingWord' should
     * be true if a RFC 822 "word" token is being encoded and false if a
     * RFC 822 "text" token is being encoded. This is because the 
     * "Q" encoding defined in RFC 2047 has more restrictions when
     * encoding "word" tokens. (Sigh)
     */ 
    private static String encodeWord(String string, String charset,
				     String encoding, boolean encodingWord)
			throws UnsupportedEncodingException {

	// If 'string' contains only US-ASCII characters, just
	// return it.
	int ascii = checkAscii(string);
	if (ascii == ALL_ASCII)
	    return string;

	// Else, apply the specified charset conversion.
	String jcharset;
	if (charset == null) { // use default charset
	    jcharset = getDefaultJavaCharset(); // the java charset
	    charset = getDefaultMIMECharset(); // the MIME equivalent
	} else // MIME charset -> java charset
	    jcharset = javaCharset(charset);

	// If no transfer-encoding is specified, figure one out.
	if (encoding == null) {
	    if (ascii != MOSTLY_NONASCII)
		encoding = "Q";
	    else
		encoding = "B";
	}

	boolean b64;
	if (encoding.equalsIgnoreCase("B")) 
	    b64 = true;
	else if (encoding.equalsIgnoreCase("Q"))
	    b64 = false;
	else
	    throw new UnsupportedEncodingException(
			"Unknown transfer encoding: " + encoding);

	StringBuilder outb = new StringBuilder(); // the output buffer
	doEncode(string, b64, jcharset, 
		 // As per RFC 2047, size of an encoded string should not
		 // exceed 75 bytes.
		 // 7 = size of "=?", '?', 'B'/'Q', '?', "?="
		 75 - 7 - charset.length(), // the available space
		 "=?" + charset + "?" + encoding + "?", // prefix
		 true, encodingWord, outb);

	return outb.toString();
    }

    private static void doEncode(String string, boolean b64, 
		String jcharset, int avail, String prefix, 
		boolean first, boolean encodingWord, StringBuilder buf)
			throws UnsupportedEncodingException {

	// First find out what the length of the encoded version of
	// 'string' would be.
	byte[] bytes = string.getBytes(jcharset);
	int len;
	if (b64) // "B" encoding
	    len = BEncoderStream.encodedLength(bytes);
	else // "Q"
	    len = QEncoderStream.encodedLength(bytes, encodingWord);
	
	int size;
	if ((len > avail) && ((size = string.length()) > 1)) { 
	    // If the length is greater than 'avail', split 'string'
	    // into two and recurse.
	    // Have to make sure not to split a Unicode surrogate pair.
	    int split = size / 2;
	    if (Character.isHighSurrogate(string.charAt(split-1)))
		split--;
	    if (split > 0)
		doEncode(string.substring(0, split), b64, jcharset, 
			 avail, prefix, first, encodingWord, buf);
	    doEncode(string.substring(split, size), b64, jcharset,
		     avail, prefix, false, encodingWord, buf);
	} else {
	    // length <= than 'avail'. Encode the given string
	    ByteArrayOutputStream os = new ByteArrayOutputStream();
	    OutputStream eos; // the encoder
	    if (b64) // "B" encoding
		eos = new BEncoderStream(os);
	    else // "Q" encoding
		eos = new QEncoderStream(os, encodingWord);
	    
	    try { // do the encoding
		eos.write(bytes);
		eos.close();
	    } catch (IOException ioex) { }

	    byte[] encodedBytes = os.toByteArray(); // the encoded stuff
	    // Now write out the encoded (all ASCII) bytes into our
	    // StringBuilder
	    if (!first) // not the first line of this sequence
		if (foldEncodedWords)
		    buf.append("\r\n "); // start a continuation line
		else
		    buf.append(" "); // line will be folded later

	    buf.append(prefix);
	    for (int i = 0; i < encodedBytes.length; i++)
		buf.append((char)encodedBytes[i]);
	    buf.append("?="); // terminate the current sequence
	}
    }

    /**
	 * A utility method to quote a word, if the word contains any
	 * characters from the specified 'specials' list.

	 *
	 * The HeaderTokenizer class defines two special
	 * sets of delimiters - MIME and RFC 822. 

	 *
	 * This method is typically used during the generation of 
	 * RFC 822 and MIME header fields.
	 *
	 * @param	word	word to be quoted
	 * @param	specials the set of special characters
	 * @return		the possibly quoted word
	 * @see	org.simplejavamail.jakarta.mail.internet.HeaderTokenizer#MIME
	 * @see	org.simplejavamail.jakarta.mail.internet.HeaderTokenizer#RFC822
	 */
	public static String quote(String word, String specials) {
	int len = word == null ? 0 : word.length();
	if (len == 0)
	    return "\"\"";	// an empty string is handled specially
	
	/*
	 * Look for any "bad" characters, Escape and
	 *  quote the entire string if necessary.
	 */
	boolean needQuoting = false;
	for (int i = 0; i < len; i++) {
	    char c = word.charAt(i);
	    if (c == '"' || c == '\\' || c == '\r' || c == '\n') {
		// need to escape them and then quote the whole string
		StringBuilder sb = new StringBuilder(len + 3);
		sb.append('"');
		sb.append(word.substring(0, i));
		int lastc = 0;
		for (int j = i; j < len; j++) {
		    char cc = word.charAt(j);
		    if ((cc == '"') || (cc == '\\') || 
			(cc == '\r') || (cc == '\n'))
			if (cc == '\n' && lastc == '\r')
			    ;	// do nothing, CR was already escaped
			else
			    sb.append('\\');	// Escape the character
		    sb.append(cc);
		    lastc = cc;
		}
		sb.append('"');
		return sb.toString();
	    } else if (c < 040 || (c >= 0177 && !allowUtf8) ||
		    specials.indexOf(c) >= 0)
		// These characters cause the string to be quoted
		needQuoting = true;
	}
	
	if (needQuoting) {
	    StringBuilder sb = new StringBuilder(len + 2);
	    sb.append('"').append(word).append('"');
	    return sb.toString();
	} else 
	    return word;
	}

	/**
	 * Fold a string at linear whitespace so that each line is no longer
	 * than 76 characters, if possible.  If there are more than 76
	 * non-whitespace characters consecutively, the string is folded at
	 * the first whitespace after that sequence.  The parameter
	 * used indicates how many characters have been used in
	 * the current line; it is usually the length of the header name. 

	 *
	 * Note that line breaks in the string aren't escaped; they probably
	 * should be.
	 *
	 * @param	used	characters used in line so far
	 * @param	s	the string to fold
	 * @return		the folded string
	 * @since		JavaMail 1.4
	 */
	public static String fold(int used, String s) {
	if (!foldText)
	    return s;
	
	int end;
	char c;
	// Strip trailing spaces and newlines
	for (end = s.length() - 1; end >= 0; end--) {
	    c = s.charAt(end);
	    if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
		break;
	}
	if (end != s.length() - 1)
	    s = s.substring(0, end + 1);
	
	// if the string fits now, just return it
	if (used + s.length() <= 76)
	    return makesafe(s);
	
	// have to actually fold the string
	StringBuilder sb = new StringBuilder(s.length() + 4);
	char lastc = 0;
	while (used + s.length() > 76) {
	    int lastspace = -1;
	    for (int i = 0; i < s.length(); i++) {
		if (lastspace != -1 && used + i > 76)
		    break;
		c = s.charAt(i);
		if (c == ' ' || c == '\t')
		    if (!(lastc == ' ' || lastc == '\t'))
			lastspace = i;
		lastc = c;
	    }
	    if (lastspace == -1) {
		// no space, use the whole thing
		sb.append(s);
		s = "";
		used = 0;
		break;
	    }
	    sb.append(s.substring(0, lastspace));
	    sb.append("\r\n");
	    lastc = s.charAt(lastspace);
	    sb.append(lastc);
	    s = s.substring(lastspace + 1);
	    used = 1;
	}
	sb.append(s);
	return makesafe(sb);
	}

	/**
	 * If the String or StringBuilder has any embedded newlines,
	 * make sure they're followed by whitespace, to prevent header
	 * injection errors.
	 */
	private static String makesafe(CharSequence s) {
	int i;
	for (i = 0; i < s.length(); i++) {
	    char c = s.charAt(i);
	    if (c == '\r' || c == '\n')
		break;
	}
	if (i == s.length())	// went through whole string with no CR or LF
	    return s.toString();
	
	// read the lines in the string and reassemble them,
	// eliminating blank lines and inserting whitespace as necessary
	StringBuilder sb = new StringBuilder(s.length() + 1);
	BufferedReader r = new BufferedReader(new StringReader(s.toString()));
	String line;
	try {
	    while ((line = r.readLine()) != null) {
		if (line.trim().length() == 0)
		    continue;	// ignore empty lines
		if (sb.length() > 0) {
		    sb.append("\r\n");
		    assert line.length() > 0; // proven above
		    char c = line.charAt(0);
		    if (c != ' ' && c != '\t')
			sb.append(' ');
		}
		sb.append(line);
	    }
	} catch (IOException ex) {
	    // XXX - should never happen when reading from a string
	    return s.toString();
	}
	return sb.toString();
	}

	/**
     * Unfold a folded header.  Any line breaks that aren't escaped and
     * are followed by whitespace are removed.
     *
     * @param	s	the string to unfold
     * @return		the unfolded string
     * @since		JavaMail 1.4
     */
    public static String unfold(String s) {
	if (!foldText)
	    return s;

	StringBuilder sb = null;
	int i;
	while ((i = indexOfAny(s, "\r\n")) >= 0) {
	    int start = i;
	    int slen = s.length();
	    i++;		// skip CR or NL
	    if (i < slen && s.charAt(i - 1) == '\r' && s.charAt(i) == '\n')
		i++;	// skip LF
	    if (start > 0 && s.charAt(start - 1) == '\\') {
		// there's a backslash before the line break
		// strip it out, but leave in the line break
		if (sb == null)
		    sb = new StringBuilder(s.length());
		sb.append(s.substring(0, start - 1));
		sb.append(s.substring(start, i));
		s = s.substring(i);
	    } else {
		char c;
		// if next line starts with whitespace,
		// or at the end of the string, remove the line break
		// XXX - next line should always start with whitespace
		if (i >= slen || (c = s.charAt(i)) == ' ' || c == '\t') {
		    if (sb == null)
			sb = new StringBuilder(s.length());
		    sb.append(s.substring(0, start));
		    s = s.substring(i);
		} else {
		    // it's not a continuation line, just leave in the newline
		    if (sb == null)
			sb = new StringBuilder(s.length());
		    sb.append(s.substring(0, i));
		    s = s.substring(i);
		}
	    }
	}
	if (sb != null) {
	    sb.append(s);
	    return sb.toString();
	} else
	    return s;
    }

    /**
     * Return the first index of any of the characters in "any" in "s",
     * or -1 if none are found.
     *
     * This should be a method on String.
     */
    private static int indexOfAny(String s, String any) {
	return indexOfAny(s, any, 0);
    }

    private static int indexOfAny(String s, String any, int start) {
	try {
	    int len = s.length();
	    for (int i = start; i < len; i++) {
		if (any.indexOf(s.charAt(i)) >= 0)
		    return i;
	    }
	    return -1;
	} catch (StringIndexOutOfBoundsException e) {
	    return -1;
	}
    }

    /**
     * Convert a MIME charset name into a valid Java charset name. 

     *
     * @param charset	the MIME charset name
     * @return  the Java charset equivalent. If a suitable mapping is
     *		not available, the passed in charset is itself returned.
     */
    public static String javaCharset(String charset) {
	if (mime2java == null || charset == null)
	    // no mapping table, or charset parameter is null
	    return charset;

	String alias = mime2java.get(charset.toLowerCase(Locale.ENGLISH));
	if (alias != null) {
	    // verify that the mapped name is valid before trying to use it
	    try {
		Charset.forName(alias);
	    } catch (Exception ex) {
		alias = null;	// charset alias not valid, use original name
	    }
	}
	return alias == null ? charset : alias;
    }

    /**
     * Convert a java charset into its MIME charset name. 

     *
     * Note that a future version of JDK (post 1.2) might provide
     * this functionality, in which case, we may deprecate this
     * method then.
     *
     * @param   charset    the JDK charset
     * @return      	the MIME/IANA equivalent. If a mapping
     *			is not possible, the passed in charset itself
     *			is returned.
     * @since		JavaMail 1.1
     */
    public static String mimeCharset(String charset) {
	if (java2mime == null || charset == null) 
	    // no mapping table or charset param is null
	    return charset;

	String alias = java2mime.get(charset.toLowerCase(Locale.ENGLISH));
	return alias == null ? charset : alias;
    }

    private static String defaultJavaCharset;
    private static String defaultMIMECharset;

    /**
     * Get the default charset corresponding to the system's current 
     * default locale.  If the System property mail.mime.charset
     * is set, a system charset corresponding to this MIME charset will be
     * returned. 
     * 
     * @return	the default charset of the system's default locale, 
     * 		as a Java charset. (NOT a MIME charset)
     * @since	JavaMail 1.1
     */
    public static String getDefaultJavaCharset() {
	if (defaultJavaCharset == null) {
	    /*
	     * If mail.mime.charset is set, it controls the default
	     * Java charset as well.
	     */
	    String mimecs = null;
	    try {
		mimecs = System.getProperty("mail.mime.charset");
	    } catch (SecurityException ex) { }	// ignore it
	    if (mimecs != null && mimecs.length() > 0) {
		defaultJavaCharset = javaCharset(mimecs);
		return defaultJavaCharset;
	    }

	    try {
		defaultJavaCharset = System.getProperty("file.encoding", 
							"8859_1");
	    } catch (SecurityException sex) {
		
		class NullInputStream extends InputStream {
		    @Override
		    public int read() {
			return 0;
		    }
		}
		InputStreamReader reader = 
			new InputStreamReader(new NullInputStream());
		defaultJavaCharset = reader.getEncoding();
		if (defaultJavaCharset == null)
		    defaultJavaCharset = "8859_1";
	    }
	}

	return defaultJavaCharset;
    }

    /*
     * Get the default MIME charset for this locale.
     */
    static String getDefaultMIMECharset() {
	if (defaultMIMECharset == null) {
	    try {
		defaultMIMECharset = System.getProperty("mail.mime.charset");
	    } catch (SecurityException ex) { }	// ignore it
	}
	if (defaultMIMECharset == null)
	    defaultMIMECharset = mimeCharset(getDefaultJavaCharset());
	return defaultMIMECharset;
    }

    // Tables to map MIME charset names to Java names and vice versa.
    // XXX - Should eventually use J2SE 1.4 java.nio.charset.Charset
    private static Map mime2java;
    private static Map java2mime;

    static {
	java2mime = new HashMap<>(40);
	mime2java = new HashMap<>(14);

	try {
	    // Use this class's classloader to load the mapping file
	    // XXX - we should use SecuritySupport, but it's in another package
	    InputStream is = 
		    org.simplejavamail.jakarta.mail.internet.MimeUtility.class.getResourceAsStream(
		    "/META-INF/javamail.charset.map");

	    if (is != null) {
		try {
		    is = new LineInputStream(is);

		    // Load the JDK-to-MIME charset mapping table
		    loadMappings((LineInputStream)is, java2mime);

		    // Load the MIME-to-JDK charset mapping table
		    loadMappings((LineInputStream)is, mime2java);
		} finally {
		    try {
			is.close();
		    } catch (Exception cex) {
			// ignore
		    }
		}
	    }
	} catch (Exception ex) { }

	// If we didn't load the tables, e.g., because we didn't have
	// permission, load them manually.  The entries here should be
	// the same as the default javamail.charset.map.
	if (java2mime.isEmpty()) {
	    java2mime.put("8859_1", "ISO-8859-1");
	    java2mime.put("iso8859_1", "ISO-8859-1");
	    java2mime.put("iso8859-1", "ISO-8859-1");

	    java2mime.put("8859_2", "ISO-8859-2");
	    java2mime.put("iso8859_2", "ISO-8859-2");
	    java2mime.put("iso8859-2", "ISO-8859-2");

	    java2mime.put("8859_3", "ISO-8859-3");
	    java2mime.put("iso8859_3", "ISO-8859-3");
	    java2mime.put("iso8859-3", "ISO-8859-3");

	    java2mime.put("8859_4", "ISO-8859-4");
	    java2mime.put("iso8859_4", "ISO-8859-4");
	    java2mime.put("iso8859-4", "ISO-8859-4");

	    java2mime.put("8859_5", "ISO-8859-5");
	    java2mime.put("iso8859_5", "ISO-8859-5");
	    java2mime.put("iso8859-5", "ISO-8859-5");

	    java2mime.put("8859_6", "ISO-8859-6");
	    java2mime.put("iso8859_6", "ISO-8859-6");
	    java2mime.put("iso8859-6", "ISO-8859-6");

	    java2mime.put("8859_7", "ISO-8859-7");
	    java2mime.put("iso8859_7", "ISO-8859-7");
	    java2mime.put("iso8859-7", "ISO-8859-7");

	    java2mime.put("8859_8", "ISO-8859-8");
	    java2mime.put("iso8859_8", "ISO-8859-8");
	    java2mime.put("iso8859-8", "ISO-8859-8");

	    java2mime.put("8859_9", "ISO-8859-9");
	    java2mime.put("iso8859_9", "ISO-8859-9");
	    java2mime.put("iso8859-9", "ISO-8859-9");

	    java2mime.put("sjis", "Shift_JIS");
	    java2mime.put("jis", "ISO-2022-JP");
	    java2mime.put("iso2022jp", "ISO-2022-JP");
	    java2mime.put("euc_jp", "euc-jp");
	    java2mime.put("koi8_r", "koi8-r");
	    java2mime.put("euc_cn", "euc-cn");
	    java2mime.put("euc_tw", "euc-tw");
	    java2mime.put("euc_kr", "euc-kr");
	}
	if (mime2java.isEmpty()) {
	    mime2java.put("iso-2022-cn", "ISO2022CN");
	    mime2java.put("iso-2022-kr", "ISO2022KR");
	    mime2java.put("utf-8", "UTF8");
	    mime2java.put("utf8", "UTF8");
	    mime2java.put("ja_jp.iso2022-7", "ISO2022JP");
	    mime2java.put("ja_jp.eucjp", "EUCJIS");
	    mime2java.put("euc-kr", "KSC5601");
	    mime2java.put("euckr", "KSC5601");
	    mime2java.put("us-ascii", "ISO-8859-1");
	    mime2java.put("x-us-ascii", "ISO-8859-1");
	    mime2java.put("gb2312", "GB18030");
	    mime2java.put("cp936", "GB18030");
	    mime2java.put("ms936", "GB18030");
	    mime2java.put("gbk", "GB18030");
	}
    }

    private static void loadMappings(LineInputStream is,
	    Map table) {
	String currLine;

	while (true) {
	    try {
		currLine = is.readLine();
	    } catch (IOException ioex) {
		break; // error in reading, stop
	    }

	    if (currLine == null) // end of file, stop
		break;
	    if (currLine.startsWith("--") && currLine.endsWith("--"))
		// end of this table
		break;	

	    // ignore empty lines and comments
	    if (currLine.trim().length() == 0 || currLine.startsWith("#"))
		continue;
	    
	    // A valid entry is of the form 
	    // where,  := SPACE | HT. Parse this
	    StringTokenizer tk = new StringTokenizer(currLine, " \t");
	    try {
		String key = tk.nextToken();
		String value = tk.nextToken();
		table.put(key.toLowerCase(Locale.ENGLISH), value);
	    } catch (NoSuchElementException nex) { }
	}
    }

    static final int ALL_ASCII 		= 1;
    static final int MOSTLY_ASCII 	= 2;
    static final int MOSTLY_NONASCII 	= 3;

    /** 
     * Check if the given string contains non US-ASCII characters.
     * @param	s	string
     * @return		ALL_ASCII if all characters in the string 
     *			belong to the US-ASCII charset. MOSTLY_ASCII
     *			if more than half of the available characters
     *			are US-ASCII characters. Else MOSTLY_NONASCII.
     */
    static int checkAscii(String s) {
	int ascii = 0, non_ascii = 0;
	int l = s.length();

	for (int i = 0; i < l; i++) {
	    if (nonascii((int)s.charAt(i))) // non-ascii
		non_ascii++;
	    else
		ascii++;
	}

	if (non_ascii == 0)
	    return ALL_ASCII;
	if (ascii > non_ascii)
	    return MOSTLY_ASCII;

	return MOSTLY_NONASCII;
    }

    /** 
     * Check if the given byte array contains non US-ASCII characters.
     * @param	b	byte array
     * @return		ALL_ASCII if all characters in the string 
     *			belong to the US-ASCII charset. MOSTLY_ASCII
     *			if more than half of the available characters
     *			are US-ASCII characters. Else MOSTLY_NONASCII.
     *
     * XXX - this method is no longer used
     */
    static int checkAscii(byte[] b) {
	int ascii = 0, non_ascii = 0;

	for (int i=0; i < b.length; i++) {
	    // The '&' operator automatically causes b[i] to be promoted
	    // to an int, and we mask out the higher bytes in the int 
	    // so that the resulting value is not a negative integer.
	    if (nonascii(b[i] & 0xff)) // non-ascii
		non_ascii++;
	    else
		ascii++;
	}
	
	if (non_ascii == 0)
	    return ALL_ASCII;
	if (ascii > non_ascii)
	    return MOSTLY_ASCII;
	
	return MOSTLY_NONASCII;
    }

    /** 
     * Check if the given input stream contains non US-ASCII characters.
     * Upto max bytes are checked. If max is
     * set to ALL, then all the bytes available in this
     * input stream are checked. If breakOnNonAscii is true
     * the check terminates when the first non-US-ASCII character is
     * found and MOSTLY_NONASCII is returned. Else, the check continues
     * till max bytes or till the end of stream.
     *
     * @param	is	the input stream
     * @param	max	maximum bytes to check for. The special value
     *			ALL indicates that all the bytes in this input
     *			stream must be checked.
     * @param	breakOnNonAscii if true, then terminate the
     *			the check when the first non-US-ASCII character
     *			is found.
     * @return		ALL_ASCII if all characters in the string 
     *			belong to the US-ASCII charset. MOSTLY_ASCII
     *			if more than half of the available characters
     *			are US-ASCII characters. Else MOSTLY_NONASCII.
     */
    static int checkAscii(InputStream is, int max, boolean breakOnNonAscii) {
	int ascii = 0, non_ascii = 0;
	int len;
	int block = 4096;
	int linelen = 0;
	boolean longLine = false, badEOL = false;
	boolean checkEOL = encodeEolStrict && breakOnNonAscii;
	byte buf[] = null;
	if (max != 0) {
	    block = (max == ALL) ? 4096 : Math.min(max, 4096);
	    buf = new byte[block]; 
	}
	while (max != 0) {
	    try {
		if ((len = is.read(buf, 0, block)) == -1)
		    break;
		int lastb = 0;
		for (int i = 0; i < len; i++) {
	    	    // The '&' operator automatically causes b[i] to 
		    // be promoted to an int, and we mask out the higher
		    // bytes in the int so that the resulting value is 
		    // not a negative integer.
		    int b = buf[i] & 0xff;
		    if (checkEOL &&
			    ((lastb == '\r' && b != '\n') ||
			    (lastb != '\r' && b == '\n')))
			badEOL = true;
		    if (b == '\r' || b == '\n')
			linelen = 0;
		    else {
			linelen++;
			if (linelen > 998)	// 1000 - CRLF
			    longLine = true;
		    }
		    if (nonascii(b)) {	// non-ascii
		        if (breakOnNonAscii) // we are done
			    return MOSTLY_NONASCII;
		        else
			    non_ascii++;
		    } else
		        ascii++;
		    lastb = b;
		}
	    } catch (IOException ioex) {
		break;
	    }
	    if (max != ALL)
		max -= len;
	}

	if (max == 0 && breakOnNonAscii)
	    // We have been told to break on the first non-ascii character.
	    // We haven't got any non-ascii character yet, but then we
	    // have not checked all of the available bytes either. So we
	    // cannot say for sure that this input stream is ALL_ASCII,
	    // and hence we must play safe and return MOSTLY_NONASCII

	    return MOSTLY_NONASCII;

	if (non_ascii == 0) { // no non-us-ascii characters so far
	    // If we're looking at non-text data, and we saw CR without LF
	    // or vice versa, consider this mostly non-ASCII so that it
	    // will be base64 encoded (since the quoted-printable encoder
	    // doesn't encode this case properly).
	    if (badEOL)
		return MOSTLY_NONASCII;
	    // if we've seen a long line, we degrade to mostly ascii
	    else if (longLine)
		return MOSTLY_ASCII;
	    else
		return ALL_ASCII;
	}
	if (ascii > non_ascii) // mostly ascii
	    return MOSTLY_ASCII;
	return MOSTLY_NONASCII;
    }

    static final boolean nonascii(int b) {
	return b >= 0177 || (b < 040 && b != '\r' && b != '\n' && b != '\t');
    }
}

/**
 * An OutputStream that determines whether the data written to
 * it is all ASCII, mostly ASCII, or mostly non-ASCII.
 */
class AsciiOutputStream extends OutputStream {
    private boolean breakOnNonAscii;
    private int ascii = 0, non_ascii = 0;
    private int linelen = 0;
    private boolean longLine = false;
    private boolean badEOL = false;
    private boolean checkEOL = false;
    private int lastb = 0;
    private int ret = 0;

    public AsciiOutputStream(boolean breakOnNonAscii, boolean encodeEolStrict) {
	this.breakOnNonAscii = breakOnNonAscii;
	checkEOL = encodeEolStrict && breakOnNonAscii;
    }

    @Override
    public void write(int b) throws IOException {
	check(b);
    }

    @Override
    public void write(byte b[]) throws IOException {
	write(b, 0, b.length);
    }

    @Override
    public void write(byte b[], int off, int len) throws IOException {
	len += off;
	for (int i = off; i < len ; i++)
	    check(b[i]);
    }

    private final void check(int b) throws IOException {
	b &= 0xff;
	if (checkEOL &&
		((lastb == '\r' && b != '\n') || (lastb != '\r' && b == '\n')))
	    badEOL = true;
	if (b == '\r' || b == '\n')
	    linelen = 0;
	else {
	    linelen++;
	    if (linelen > 998)	// 1000 - CRLF
		longLine = true;
	}
	if (MimeUtility.nonascii(b)) { // non-ascii
	    non_ascii++;
	    if (breakOnNonAscii) {	// we are done
		ret = MimeUtility.MOSTLY_NONASCII;
		throw new EOFException();
	    }
	} else
	    ascii++;
	lastb = b;
    }

    /**
     * Return ASCII-ness of data stream.
     */
    public int getAscii() {
	if (ret != 0)
	    return ret;
	// If we're looking at non-text data, and we saw CR without LF
	// or vice versa, consider this mostly non-ASCII so that it
	// will be base64 encoded (since the quoted-printable encoder
	// doesn't encode this case properly).
	if (badEOL)
	    return MimeUtility.MOSTLY_NONASCII;
	else if (non_ascii == 0) { // no non-us-ascii characters so far
	    // if we've seen a long line, we degrade to mostly ascii
	    if (longLine)
		return MimeUtility.MOSTLY_ASCII;
	    else
		return MimeUtility.ALL_ASCII;
	}
	if (ascii > non_ascii) // mostly ascii
	    return MimeUtility.MOSTLY_ASCII;
	return MimeUtility.MOSTLY_NONASCII;
    }
}