org.synchronoss.cloud.nio.multipart.NioMultipartParser Maven / Gradle / Ivy
/*
* Copyright (C) 2015 Synchronoss Technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.synchronoss.cloud.nio.multipart;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.synchronoss.cloud.nio.multipart.io.FixedSizeByteArrayOutputStream;
import org.synchronoss.cloud.nio.multipart.io.buffer.EndOfLineBuffer;
import org.synchronoss.cloud.nio.multipart.util.HeadersParser;
import org.synchronoss.cloud.nio.stream.storage.Disposable;
import org.synchronoss.cloud.nio.stream.storage.StreamStorage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.synchronoss.cloud.nio.multipart.MultipartUtils.*;
/**
* The main class for parsing a multipart stream in an NIO mode. A new instance can be created and the
* data can be written invoking the {@link #write(byte[], int, int)}, {@link #write(byte[])} or {@link #write(int)} methods.
* As data is written, the parser is identifying the various parts and notifying the client via the {@link NioMultipartParserListener} listener.
*
*
For each part the {@link org.synchronoss.cloud.nio.multipart.NioMultipartParser} will ask the
* {@link PartBodyStreamStorageFactory} for a {@code StreamStorage} where the bytes will be written.
* Once the parser finished to write, it calls the {@link #close()} method.
*
*
The class extends {@code OutputStream} and it can be seen as a 'splitter' where the main stream (the multipart body) is saved into different streams (one for each part).
* Each individual stream can be read back by the client when it's notified about the part completion.
* For more information about the events raised by the parser see {@link NioMultipartParserListener}.
*
* @author Silvano Riz.
*/
public class NioMultipartParser extends OutputStream implements Disposable {
private static final Logger log = LoggerFactory.getLogger(NioMultipartParser.class);
/**
* The default buffer size: 16Kb
* The buffer size needs to be bigger than the separator. (usually no more than 70 Characters)
*/
public static final int DEFAULT_BUFFER_SIZE = 16384;
/**
* The default limit in bytes of the headers section.
*/
public static final int DEFAULT_HEADERS_SECTION_SIZE = 16384;
/**
* Default number of nested multiparts body.
*/
public static final int DEFAULT_MAX_LEVEL_OF_NESTED_MULTIPART = 1;
/**
* The type of a delimiter is identified using its suffix.
* For example if the boundary is "XVZ", the sequence
* DASH,DASH,X,W,Z,CR,LF represents an encapsulation boundary, while the
* sequence DASH,DASH,X,V,Z,DASH,DASH is the close boundary.
* This utility class allows to write the 2 byte suffix into an array and identify the type of delimiter.
*/
private static class DelimiterType {
enum Type {CLOSE, ENCAPSULATION, UNKNOWN}
final byte[] delimiterSuffix = new byte[2];
int index = 0;
void addDelimiterByte(byte delimiterByte) {
if (index >= delimiterSuffix.length) {
throw new IllegalStateException("Cannot write the delimiter byte.");
}
delimiterSuffix[index] = delimiterByte;
index++;
}
Type getDelimiterType() {
if (index == 2) {
if (delimiterSuffix[0] == CR && delimiterSuffix[1] == LF) {
return Type.ENCAPSULATION;
} else if (delimiterSuffix[0] == DASH && delimiterSuffix[1] == DASH) {
return Type.CLOSE;
}
}
return Type.UNKNOWN;
}
void reset() {
index = 0;
}
}
/**
* Helper class used every time a write is called to pass information between FSM statuses.
* It provides convenience methods to
* - read the received data
* - Decide if the FSM should continue.
*/
private static class WriteContext {
private int currentIndex;
private int indexEnd;
private byte[] data;
private boolean finished;
void init(final int currentIndex, final int indexEnd, final byte[] data, final boolean finished) {
this.currentIndex = currentIndex;
this.indexEnd = indexEnd;
this.data = data;
this.finished = finished;
}
int read() {
if (currentIndex >= indexEnd) {
return -1;
} else {
byte ret = data[currentIndex];
currentIndex++;
return ret & 0xff;
}
}
void setNotFinished() {
finished = false;
}
void setFinishedIfNoMoreData() {
finished = currentIndex >= indexEnd;
}
void setFinished() {
finished = true;
}
}
// FSM States
private enum State {
SKIP_PREAMBLE,
IDENTIFY_PREAMBLE_DELIMITER,
GET_READY_FOR_HEADERS,
READ_HEADERS,
GET_READY_FOR_BODY,
READ_BODY,
IDENTIFY_BODY_DELIMITER,
PART_COMPLETE,
GET_READY_FOR_NESTED_MULTIPART,
NESTED_PART_READ,
ALL_PARTS_READ,
SKIP_EPILOGUE,
ERROR
}
/*
* The multipart context. Content-Type, Content-Length and Char Cncoding
*/
final MultipartContext multipartContext;
/*
* Listener to notify
*/
final NioMultipartParserListener nioMultipartParserListener;
/*
* Factory that will be used to get an OutputStream where to store a multipart body and retrieve its related
* OutputStream
*/
final PartBodyStreamStorageFactory partBodyStreamStorageFactory;
/*
* A reusable buffer to identify when a preamble, part section or headers section is finished.
*/
final EndOfLineBuffer endOfLineBuffer;
/**
* A reusable in memory output stream to process the headers
*/
final ByteArrayOutputStream headersByteArrayOutputStream;
/**
* Controls how many nested multipart request can be processed.
*/
final int maxLevelOfNestedMultipart;
/*
* Allows to identify the delimiter type
*/
final DelimiterType delimiterType = new DelimiterType();
/*
* Stack of delimiters. Using a stack to support nested multipart requests.
*/
final Stack delimiterPrefixes = new Stack();
/*
* If debug mode is enabled it keeps track of the FSM transitions
*/
final List fsmTransitions = new ArrayList();
/*
* A reusable write context passed between the states during the data processing.
* The context will be re-set at each write
*/
final WriteContext wCtx = new WriteContext();
/*
* Current state of the ASF
*/
volatile State currentState = State.SKIP_PREAMBLE;
/*
* Current output stream where to flush the body data.
* It will be instantiated for each part via {@link BodyStreamFactory#getOutputStream(Map, int)} )}
*/
volatile StreamStorage partBodyStreamStorage = null;
/*
* The current headers.
*/
volatile Map> headers = null;
/*
* Keeps track of how many parts we encountered
*/
volatile int partIndex = 1;
/**
* Close/open status of the output stram
*/
volatile AtomicBoolean closed = new AtomicBoolean(false);
// ------------
// Constructors
// ------------
/**
* Constructs a {@code NioMultipartParser}. The default values for the buffer size, headers section size and nested multipart limit will be used.
* The {@link PartBodyStreamStorageFactory} used will be the default implementation provided with the library. See {@link DefaultPartBodyStreamStorageFactory}.
*
* @param multipartContext The multipart context
* @param nioMultipartParserListener The listener that will be notified
*/
public NioMultipartParser(final MultipartContext multipartContext, final NioMultipartParserListener nioMultipartParserListener) {
this(multipartContext, nioMultipartParserListener, null, DEFAULT_BUFFER_SIZE, DEFAULT_HEADERS_SECTION_SIZE, DEFAULT_MAX_LEVEL_OF_NESTED_MULTIPART);
}
/**
*
Constructs a {@code NioMultipartParser} with default values for the buffer size, headers section size and nested multipart limit, but a
* custom implementation of {@code PartBodyStreamStorageFactory}.
*
* @param multipartContext The multipart context
* @param nioMultipartParserListener The listener that will be notified
* @param partBodyStreamStorageFactory The custom {@code PartBodyStreamStorageFactory}.
*/
public NioMultipartParser(final MultipartContext multipartContext, final NioMultipartParserListener nioMultipartParserListener, final PartBodyStreamStorageFactory partBodyStreamStorageFactory) {
this(multipartContext, nioMultipartParserListener, partBodyStreamStorageFactory, DEFAULT_BUFFER_SIZE, DEFAULT_HEADERS_SECTION_SIZE, DEFAULT_MAX_LEVEL_OF_NESTED_MULTIPART);
}
/**
*
Constructs a {@code NioMultipartParser} with default values for the headers section size and nested multipart limit and {@link PartBodyStreamStorageFactory}.
* It wants the size of the buffer to use.
*
* @param multipartContext The multipart context
* @param nioMultipartParserListener The listener that will be notified
* @param bufferSize The buffer size, a strictly positive integer.
* The actual buffer size used will be {@link MultipartUtils#getBoundary(String)} + 5 + bufferSize.
*/
public NioMultipartParser(final MultipartContext multipartContext, final NioMultipartParserListener nioMultipartParserListener, final int bufferSize) {
this(multipartContext, nioMultipartParserListener, null, bufferSize, DEFAULT_HEADERS_SECTION_SIZE, DEFAULT_MAX_LEVEL_OF_NESTED_MULTIPART);
}
/**
*
Constructs a {@code NioMultipartParser}.
*
* @param multipartContext The multipart context
* @param nioMultipartParserListener The listener that will be notified
* @param partBodyStreamStorageFactory The custom {@code PartBodyStreamStorageFactory} to use.
* @param bufferSize The buffer size, a strictly positive integer.
* The actual buffer size used will be {@link MultipartUtils#getBoundary(String)} + 5 + bufferSize.
* @param maxHeadersSectionSize The max size of the headers section
* @param maxLevelOfNestedMultipart the max number of nested multipart
*/
public NioMultipartParser(final MultipartContext multipartContext,
final NioMultipartParserListener nioMultipartParserListener,
final PartBodyStreamStorageFactory partBodyStreamStorageFactory,
final int bufferSize,
final int maxHeadersSectionSize,
final int maxLevelOfNestedMultipart) {
if (bufferSize <= 0){
throw new IllegalArgumentException("The buffer size must be grater than 0. Size specified: " + bufferSize);
}
this.multipartContext = multipartContext;
this.nioMultipartParserListener = nioMultipartParserListener;
final byte[] delimiterPrefix = getDelimiterPrefix(multipartContext.getContentType());
final int actualBufferSize = delimiterPrefix.length + bufferSize;
this.delimiterPrefixes.push(delimiterPrefix);
this.maxLevelOfNestedMultipart = maxLevelOfNestedMultipart;
if (maxHeadersSectionSize == -1) {
this.headersByteArrayOutputStream = new ByteArrayOutputStream();
} else {
this.headersByteArrayOutputStream = new FixedSizeByteArrayOutputStream(maxHeadersSectionSize);
}
if (partBodyStreamStorageFactory != null) {
this.partBodyStreamStorageFactory = partBodyStreamStorageFactory;
} else {
this.partBodyStreamStorageFactory = new DefaultPartBodyStreamStorageFactory();
}
// At the beginning set up the endOfLineBuffer to skip the preamble.
this.endOfLineBuffer = new EndOfLineBuffer(actualBufferSize, getPreambleDelimiterPrefix(delimiterPrefixes.peek()), null);
}
@Override
public void close() throws IOException {
if (closed.compareAndSet(false, true)) {
if (partBodyStreamStorage != null) {
partBodyStreamStorage.close();
}
}
}
@Override
public boolean dispose() {
try {
close();
} catch(IOException e) {
// Do nothing
}
if (partBodyStreamStorage != null) {
return partBodyStreamStorage.dispose();
}
return true;
}
@Override
public void flush() throws IOException {
if (partBodyStreamStorage != null) {
partBodyStreamStorage.flush();
}
}
@Override
public void write(final int data) throws IOException {
write(new byte[]{(byte) data}, 0, 1);
}
@Override
public void write(byte[] data) throws IOException {
write(data, 0, data.length);
}
@Override
public void write(byte[] data, int indexStart, int indexEnd) {
if (closed.get()){
throw new IllegalStateException("Cannot write, the parser is closed.");
}
if (data == null) {
goToState(State.ERROR);
throw new IllegalArgumentException("Data cannot be null");
}
if (data.length == 0) {
return;
}
if (indexEnd < indexStart) {
goToState(State.ERROR);
throw new IllegalArgumentException("End index cannot be lower that the start index. End index: " + indexEnd + ", Start index: " + indexStart);
}
if (indexStart > data.length) {
goToState(State.ERROR);
throw new IllegalArgumentException("The start index cannot be greater than the size of the data. Start index: " + indexStart + ", Data length: " + data.length);
}
if (indexEnd > data.length) {
goToState(State.ERROR);
throw new IllegalArgumentException("The end index cannot be greater than the size of the data. End index: " + indexEnd + ", Data length: " + data.length);
}
wCtx.init(indexStart, indexEnd, data, false);
while (!wCtx.finished) {
switch (currentState) {
case SKIP_PREAMBLE:
skipPreamble(wCtx);
break;
case IDENTIFY_PREAMBLE_DELIMITER:
identifyPreambleDelimiter(wCtx);
break;
case GET_READY_FOR_HEADERS:
getReadyForHeaders(wCtx);
break;
case READ_HEADERS:
readHeaders(wCtx);
break;
case GET_READY_FOR_BODY:
getReadyForBody(wCtx);
break;
case READ_BODY:
readBody(wCtx);
break;
case IDENTIFY_BODY_DELIMITER:
identifyBodyDelimiter(wCtx);
break;
case PART_COMPLETE:
partComplete(wCtx);
break;
case GET_READY_FOR_NESTED_MULTIPART:
getReadyForNestedMultipart(wCtx);
break;
case NESTED_PART_READ:
nestedPartRead(wCtx);
break;
case ALL_PARTS_READ:
allPartsRead(wCtx);
break;
case SKIP_EPILOGUE:
skipEpilogue(wCtx);
break;
case ERROR:
throw new IllegalStateException("Parser is in an error state.");
default:
// This should never happen...
throw new IllegalStateException("Unknown state");
}
}
}
// Convenience method to switch state. If debug is enabled il will save the transition sequence.
void goToState(final State nextState) {
if (log.isDebugEnabled()) {
fsmTransitions.add(String.format("%-30s --> %s", currentState.name(), nextState.name()));
}
currentState = nextState;
}
void skipPreamble(final WriteContext wCtx) {
int byteOfData;
while ((byteOfData = wCtx.read()) != -1) {
if (endOfLineBuffer.write((byte)byteOfData)) {
goToState(State.IDENTIFY_PREAMBLE_DELIMITER);
wCtx.setFinishedIfNoMoreData();
return;
}
}
wCtx.setFinishedIfNoMoreData();
}
void getReadyForHeaders(final WriteContext wCtx) {
headersByteArrayOutputStream.reset();
endOfLineBuffer.recycle(HEADER_DELIMITER, headersByteArrayOutputStream);
headers = new HashMap>();
goToState(State.READ_HEADERS);
wCtx.setFinishedIfNoMoreData();
}
void readHeaders(final WriteContext wCtx) {
int byteOfData;
while ((byteOfData = wCtx.read()) != -1) {
if (endOfLineBuffer.write((byte)byteOfData)) {
parseHeaders();
String contentType = MultipartUtils.getHeader(MultipartUtils.CONTENT_TYPE, headers);
if (MultipartUtils.isMultipart(contentType)) {
goToState(State.GET_READY_FOR_NESTED_MULTIPART);
} else {
goToState(State.GET_READY_FOR_BODY);
}
wCtx.setFinishedIfNoMoreData();
return;
}
}
wCtx.setFinishedIfNoMoreData();
}
void parseHeaders() {
try {
headers = HeadersParser.parseHeaders(new ByteArrayInputStream(headersByteArrayOutputStream.toByteArray()), multipartContext.getCharEncoding());
headersByteArrayOutputStream.reset();
} catch (Exception e) {
goToState(State.ERROR);
nioMultipartParserListener.onError("Error parsing the part headers", e);
}
}
void getReadyForBody(final WriteContext wCtx) {
partBodyStreamStorage = partBodyStreamStorageFactory.newStreamStorageForPartBody(headers, partIndex);
endOfLineBuffer.recycle(delimiterPrefixes.peek(), partBodyStreamStorage);
delimiterType.reset();
goToState(State.READ_BODY);
wCtx.setFinishedIfNoMoreData();
}
void getReadyForNestedMultipart(final WriteContext wCtx) {
if (delimiterPrefixes.size() > maxLevelOfNestedMultipart + 1) {
goToState(State.ERROR);
nioMultipartParserListener.onError("Reached maximum number of nested multiparts: " + maxLevelOfNestedMultipart, null);
} else {
byte[] delimiter = getDelimiterPrefix(MultipartUtils.getHeader(MultipartUtils.CONTENT_TYPE, headers));
delimiterType.reset();
delimiterPrefixes.push(delimiter);
endOfLineBuffer.recycle(getPreambleDelimiterPrefix(delimiter), null);
goToState(State.SKIP_PREAMBLE);
nioMultipartParserListener.onNestedPartStarted(headers);
}
wCtx.setFinishedIfNoMoreData();
}
void readBody(final WriteContext wCtx) {
int byteOfData;
while ((byteOfData = wCtx.read()) != -1) {
if (endOfLineBuffer.write((byte)byteOfData)) {
goToState(State.IDENTIFY_BODY_DELIMITER);
wCtx.setFinishedIfNoMoreData();
return;
}
}
wCtx.setFinishedIfNoMoreData();
}
void identifyPreambleDelimiter(final WriteContext wCtx) {
if (delimiterPrefixes.size() > 1) {
identifyDelimiter(wCtx, State.GET_READY_FOR_HEADERS, State.NESTED_PART_READ);
} else {
identifyDelimiter(wCtx, State.GET_READY_FOR_HEADERS, State.ALL_PARTS_READ);
}
}
void identifyBodyDelimiter(final WriteContext ctx) {
identifyDelimiter(ctx, State.PART_COMPLETE, State.PART_COMPLETE);
}
void identifyDelimiter(final WriteContext wCtx, final State onDelimiter, final State onCloseDelimiter) {
int byteOfData;
while ((byteOfData = wCtx.read()) != -1) {
delimiterType.addDelimiterByte((byte)byteOfData);
if (delimiterType.index >= 2) {
DelimiterType.Type type = delimiterType.getDelimiterType();
if (DelimiterType.Type.ENCAPSULATION == type) {
goToState(onDelimiter);
wCtx.setFinishedIfNoMoreData();
return;
} else if (DelimiterType.Type.CLOSE == type) {
goToState(onCloseDelimiter);
// Need to continue because we encountered a close delimiter and we might not have more data coming
// but we want to switch state and notify.
wCtx.setNotFinished();
return;
} else {
goToState(State.ERROR);
nioMultipartParserListener.onError("Unexpected characters follow a boundary", null);
wCtx.setFinished();
return;
}
}
}
wCtx.setFinishedIfNoMoreData();
}
void allPartsRead(final WriteContext wCtx) {
goToState(State.SKIP_EPILOGUE);
nioMultipartParserListener.onAllPartsFinished();
wCtx.setFinishedIfNoMoreData();
}
void partComplete(final WriteContext wCtx){
// First flush the output stream and close it...
try{
partBodyStreamStorage.flush();
partBodyStreamStorage.close();
}catch (Exception e){
goToState(State.ERROR);
nioMultipartParserListener.onError("Unable to read/write the body data", e);
return;
}
// Switch state
if (delimiterType.getDelimiterType() == DelimiterType.Type.CLOSE){
if (delimiterPrefixes.size() > 1){
goToState(State.NESTED_PART_READ);
}else {
goToState(State.ALL_PARTS_READ);
}
}else {
goToState(State.GET_READY_FOR_HEADERS);
}
nioMultipartParserListener.onPartFinished(partBodyStreamStorage, headers);
partIndex++;
wCtx.setFinishedIfNoMoreData();
}
void nestedPartRead(final WriteContext wCtx){
delimiterPrefixes.pop();
delimiterType.reset();
endOfLineBuffer.recycle(getPreambleDelimiterPrefix(delimiterPrefixes.peek()), null);
goToState(State.SKIP_PREAMBLE);
nioMultipartParserListener.onNestedPartFinished();
wCtx.setFinishedIfNoMoreData();
}
void skipEpilogue(final WriteContext wCtx){
wCtx.setFinished();
}
static byte[] getPreambleDelimiterPrefix(final byte[] delimiterPrefix){
// This allows to parse multipart bodies starting with a delimiter.
// From the specs, a delimiter is always preceded by a CR,LF but commons file upload supports it.
// Remove the CR,LF from the delimiterPrefix
byte[] preambleDelimiterPrefix = new byte[delimiterPrefix.length-2];
System.arraycopy(delimiterPrefix, 2, preambleDelimiterPrefix, 0, delimiterPrefix.length -2);
return preambleDelimiterPrefix;
}
static byte[] getDelimiterPrefix(final String contentType){
byte[] boundary = getBoundary(contentType);
if (boundary == null || boundary.length == 0){
throw new IllegalStateException("Invalid boundary in the content type " + contentType);
}
byte[] delimiterPrefix = new byte[boundary.length + 4];
delimiterPrefix[0] = CR;
delimiterPrefix[1] = LF;
delimiterPrefix[2] = DASH;
delimiterPrefix[3] = DASH;
System.arraycopy(boundary, 0, delimiterPrefix, 4, boundary.length);
return delimiterPrefix;
}
public List geFsmTransitions(){
if (log.isDebugEnabled()) {
return fsmTransitions;
}else{
return null;
}
}
}