com.vaadin.external.apache.commons.fileupload2.util.mime.MimeUtility Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of flow-commons-upload Show documentation
Show all versions of flow-commons-upload Show documentation
Flow Commons Fileupload 2 fork
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.vaadin.external.apache.commons.fileupload2.util.mime;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
/**
* Utility class to decode MIME texts.
*
* @since 1.3
*/
public final class MimeUtility {
/**
* The marker to indicate text is encoded with BASE64 algorithm.
*/
private static final String BASE64_ENCODING_MARKER = "B";
/**
* The marker to indicate text is encoded with QuotedPrintable algorithm.
*/
private static final String QUOTEDPRINTABLE_ENCODING_MARKER = "Q";
/**
* If the text contains any encoded tokens, those tokens will be marked with
* "=?".
*/
private static final String ENCODED_TOKEN_MARKER = "=?";
/**
* If the text contains any encoded tokens, those tokens will terminate with
* "=?".
*/
private static final String ENCODED_TOKEN_FINISHER = "?=";
/**
* The linear whitespace chars sequence.
*/
private static final String LINEAR_WHITESPACE = " \t\r\n";
/**
* Mappings between MIME and Java charset.
*/
private static final Map MIME2JAVA = new HashMap<>();
static {
MIME2JAVA.put("iso-2022-cn", "ISO2022CN");
MIME2JAVA.put("iso-2022-kr", "ISO2022KR");
MIME2JAVA.put("utf-8", "UTF8");
MIME2JAVA.put("utf8", "UTF8");
MIME2JAVA.put("ja_jp.iso2022-7", "ISO2022JP");
MIME2JAVA.put("ja_jp.eucjp", "EUCJIS");
MIME2JAVA.put("euc-kr", "KSC5601");
MIME2JAVA.put("euckr", "KSC5601");
MIME2JAVA.put("us-ascii", "ISO-8859-1");
MIME2JAVA.put("x-us-ascii", "ISO-8859-1");
}
/**
* Hidden constructor, this class must not be instantiated.
*/
private MimeUtility() {
// do nothing
}
/**
* Decode a string of text obtained from a mail header into its proper form.
* The text generally will consist of a string of tokens, some of which may
* be encoded using base64 encoding.
*
* @param text
* The text to decode.
*
* @return The decoded text string.
* @throws UnsupportedEncodingException
* if the detected encoding in the input text is not supported.
*/
public static String decodeText(final String text)
throws UnsupportedEncodingException {
// if the text contains any encoded tokens, those tokens will be marked
// with "=?". If the
// source string doesn't contain that sequent, no decoding is required.
if (!text.contains(ENCODED_TOKEN_MARKER)) {
return text;
}
int offset = 0;
final int endOffset = text.length();
int startWhiteSpace = -1;
int endWhiteSpace = -1;
final StringBuilder decodedText = new StringBuilder(text.length());
boolean previousTokenEncoded = false;
while (offset < endOffset) {
char ch = text.charAt(offset);
// is this a whitespace character?
if (LINEAR_WHITESPACE.indexOf(ch) != -1) { // whitespace found
startWhiteSpace = offset;
while (offset < endOffset) {
// step over the white space characters.
ch = text.charAt(offset);
if (LINEAR_WHITESPACE.indexOf(ch) == -1) {
// record the location of the first non lwsp and drop
// down to process the
// token characters.
endWhiteSpace = offset;
break;
}
offset++;
}
} else {
// we have a word token. We need to scan over the word and then
// try to parse it.
final int wordStart = offset;
while (offset < endOffset) {
// step over the non white space characters.
ch = text.charAt(offset);
if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
break;
}
offset++;
// NB: Trailing whitespace on these header strings will just
// be discarded.
}
// pull out the word token.
final String word = text.substring(wordStart, offset);
// is the token encoded? decode the word
if (word.startsWith(ENCODED_TOKEN_MARKER)) {
try {
// if this gives a parsing failure, treat it like a
// non-encoded word.
final String decodedWord = decodeWord(word);
// are any whitespace characters significant? Append 'em
// if we've got 'em.
if (!previousTokenEncoded && startWhiteSpace != -1) {
decodedText.append(text, startWhiteSpace,
endWhiteSpace);
startWhiteSpace = -1;
}
// this is definitely a decoded token.
previousTokenEncoded = true;
// and add this to the text.
decodedText.append(decodedWord);
// we continue parsing from here...we allow parsing
// errors to fall through
// and get handled as normal text.
continue;
} catch (final ParseException e) {
// just ignore it, skip to next word
}
}
// this is a normal token, so it doesn't matter what the
// previous token was. Add the white space
// if we have it.
if (startWhiteSpace != -1) {
decodedText.append(text, startWhiteSpace, endWhiteSpace);
startWhiteSpace = -1;
}
// this is not a decoded token.
previousTokenEncoded = false;
decodedText.append(word);
}
}
return decodedText.toString();
}
/**
* Parse a string using the RFC 2047 rules for an "encoded-word" type. This
* encoding has the syntax:
*
* encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
*
* @param word
* The possibly encoded word value.
*
* @return The decoded word.
* @throws ParseException
* in case of a parse error of the RFC 2047
* @throws UnsupportedEncodingException
* Thrown when Invalid RFC 2047 encoding was found
*/
private static String decodeWord(final String word)
throws ParseException, UnsupportedEncodingException {
// encoded words start with the characters "=?". If this not an encoded
// word, we throw a
// ParseException for the caller.
if (!word.startsWith(ENCODED_TOKEN_MARKER)) {
throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
}
final int charsetPos = word.indexOf('?', 2);
if (charsetPos == -1) {
throw new ParseException(
"Missing charset in RFC 2047 encoded-word: " + word);
}
// pull out the character set information (this is the MIME name at this
// point).
final String charset = word.substring(2, charsetPos)
.toLowerCase(Locale.ENGLISH);
// now pull out the encoding token the same way.
final int encodingPos = word.indexOf('?', charsetPos + 1);
if (encodingPos == -1) {
throw new ParseException(
"Missing encoding in RFC 2047 encoded-word: " + word);
}
final String encoding = word.substring(charsetPos + 1, encodingPos);
// and finally the encoded text.
final int encodedTextPos = word.indexOf(ENCODED_TOKEN_FINISHER,
encodingPos + 1);
if (encodedTextPos == -1) {
throw new ParseException(
"Missing encoded text in RFC 2047 encoded-word: " + word);
}
final String encodedText = word.substring(encodingPos + 1,
encodedTextPos);
// seems a bit silly to encode a null string, but easy to deal with.
if (encodedText.isEmpty()) {
return "";
}
try {
// the decoder writes directly to an output stream.
final ByteArrayOutputStream out = new ByteArrayOutputStream(
encodedText.length());
final byte[] encodedData = encodedText
.getBytes(StandardCharsets.US_ASCII);
// Base64 encoded?
if (encoding.equals(BASE64_ENCODING_MARKER)) {
Base64Decoder.decode(encodedData, out);
} else if (encoding.equals(QUOTEDPRINTABLE_ENCODING_MARKER)) { // maybe
// quoted
// printable.
QuotedPrintableDecoder.decode(encodedData, out);
} else {
throw new UnsupportedEncodingException(
"Unknown RFC 2047 encoding: " + encoding);
}
// get the decoded byte data and convert into a string.
final byte[] decodedData = out.toByteArray();
return new String(decodedData, javaCharset(charset));
} catch (final IOException e) {
throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
}
}
/**
* Translate a MIME standard character set name into the Java equivalent.
*
* @param charset
* The MIME standard name.
*
* @return The Java equivalent for this name.
*/
private static String javaCharset(final String charset) {
// nothing in, nothing out.
if (charset == null) {
return null;
}
final String mappedCharset = MIME2JAVA
.get(charset.toLowerCase(Locale.ENGLISH));
// if there is no mapping, then the original name is used. Many of the
// MIME character set
// names map directly back into Java. The reverse isn't necessarily
// true.
if (mappedCharset == null) {
return charset;
}
return mappedCharset;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy