org.apache.geronimo.mail.util.ASCIIUtil Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.geronimo.mail.util;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.IOException;
/**
* Set of utility classes for handling common encoding-related
* manipulations.
*/
public class ASCIIUtil {
/**
* Test to see if this string contains only US-ASCII (i.e., 7-bit
* ASCII) charactes.
*
* @param s The test string.
*
* @return true if this is a valid 7-bit ASCII encoding, false if it
* contains any non-US ASCII characters.
*/
static public boolean isAscii(String s) {
for (int i = 0; i < s.length(); i++) {
if (!isAscii(s.charAt(i))) {
return false;
}
}
return true;
}
/**
* Test to see if a given character can be considered "valid" ASCII.
* The excluded characters are the control characters less than
* 32, 8-bit characters greater than 127, EXCEPT the CR, LF and
* tab characters ARE considered value (all less than 32).
*
* @param ch The test character.
*
* @return true if this character meets the "ascii-ness" criteria, false
* otherwise.
*/
static public boolean isAscii(int ch) {
// these are explicitly considered valid.
if (ch == '\r' || ch == '\n' || ch == '\t') {
return true;
}
// anything else outside the range is just plain wrong.
if (ch >= 127 || ch < 32) {
return false;
}
return true;
}
/**
* Examine a stream of text and make a judgement on what encoding
* type should be used for the text. Ideally, we want to use 7bit
* encoding to determine this, but we may need to use either quoted-printable
* or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
*
* @param content An input stream for the content we're examining.
*
* @exception IOException
*/
public static String getTextTransferEncoding(InputStream content) throws IOException {
// for efficiency, we'll read in blocks.
BufferedInputStream in = new BufferedInputStream(content, 4096);
int span = 0; // span of characters without a line break.
boolean containsLongLines = false;
int asciiChars = 0;
int nonAsciiChars = 0;
while (true) {
int ch = in.read();
// if we hit an EOF here, go decide what type we've actually found.
if (ch == -1) {
break;
}
// we found a linebreak. Reset the line length counters on either one. We don't
// really need to validate here.
if (ch == '\n' || ch == '\r') {
// hit a line end, reset our line length counter
span = 0;
}
else {
span++;
// the text has long lines, we can't transfer this as unencoded text.
if (span > 998) {
containsLongLines = true;
}
// non-ascii character, we have to transfer this in binary.
if (!isAscii(ch)) {
nonAsciiChars++;
}
else {
asciiChars++;
}
}
}
// looking good so far, only valid chars here.
if (nonAsciiChars == 0) {
// does this contain long text lines? We need to use a Q-P encoding which will
// be only slightly longer, but handles folding the longer lines.
if (containsLongLines) {
return "quoted-printable";
}
else {
// ideal! Easiest one to handle.
return "7bit";
}
}
else {
// mostly characters requiring encoding? Base64 is our best bet.
if (nonAsciiChars > asciiChars) {
return "base64";
}
else {
// Q-P encoding will use fewer bytes than the full Base64.
return "quoted-printable";
}
}
}
/**
* Examine a stream of text and make a judgement on what encoding
* type should be used for the text. Ideally, we want to use 7bit
* encoding to determine this, but we may need to use either quoted-printable
* or base64. The choice is made on the ratio of 7-bit characters to non-7bit.
*
* @param content A string for the content we're examining.
*/
public static String getTextTransferEncoding(String content) {
int asciiChars = 0;
int nonAsciiChars = 0;
for (int i = 0; i < content.length(); i++) {
int ch = content.charAt(i);
// non-ascii character, we have to transfer this in binary.
if (!isAscii(ch)) {
nonAsciiChars++;
}
else {
asciiChars++;
}
}
// looking good so far, only valid chars here.
if (nonAsciiChars == 0) {
// ideal! Easiest one to handle.
return "7bit";
}
else {
// mostly characters requiring encoding? Base64 is our best bet.
if (nonAsciiChars > asciiChars) {
return "base64";
}
else {
// Q-P encoding will use fewer bytes than the full Base64.
return "quoted-printable";
}
}
}
/**
* Determine if the transfer encoding looks like it might be
* valid ascii text, and thus transferable as 7bit code. In
* order for this to be true, all characters must be valid
* 7-bit ASCII code AND all line breaks must be properly formed
* (JUST '\r\n' sequences). 7-bit transfers also
* typically have a line limit of 1000 bytes (998 + the CRLF), so any
* stretch of charactes longer than that will also force Base64 encoding.
*
* @param content An input stream for the content we're examining.
*
* @exception IOException
*/
public static String getBinaryTransferEncoding(InputStream content) throws IOException {
// for efficiency, we'll read in blocks.
BufferedInputStream in = new BufferedInputStream(content, 4096);
int previousChar = 0;
int span = 0; // span of characters without a line break.
while (true) {
int ch = in.read();
// if we hit an EOF here, we've only found valid text so far, so we can transfer this as
// 7-bit ascii.
if (ch == -1) {
return "7bit";
}
// we found a newline, this is only valid if the previous char was the '\r'
if (ch == '\n') {
// malformed linebreak? force this to base64 encoding.
if (previousChar != '\r') {
return "base64";
}
// hit a line end, reset our line length counter
span = 0;
}
else {
span++;
// the text has long lines, we can't transfer this as unencoded text.
if (span > 998) {
return "base64";
}
// non-ascii character, we have to transfer this in binary.
if (!isAscii(ch)) {
return "base64";
}
}
previousChar = ch;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy