All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.rivulet.BufferedHttpMessageParser Maven / Gradle / Ivy

The newest version!
package io.rivulet;

import edu.columbia.cs.psl.phosphor.struct.SinglyLinkedList;
import org.apache.http.*;
import org.apache.http.client.entity.DeflateDecompressingEntity;
import org.apache.http.client.entity.GzipDecompressingEntity;
import org.apache.http.entity.ContentLengthStrategy;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.entity.StrictContentLengthStrategy;
import org.apache.http.impl.io.*;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PushbackInputStream;
import java.nio.ByteBuffer;
import java.util.LinkedHashMap;
import java.util.Map;

public class BufferedHttpMessageParser {

    // Standard bytes used for an empty line in http
    private static final byte[] CRLF = new byte[]{'\r', '\n'};

    // The current parsing phase
    private HttpPhase currentPhase;
    // Stores bytes that have been read, but have not yet been parsed into a message or entity
    private ByteArrayOutputStream byteStream;
    // Whether the last byte of the last stream read was a carriage return that was stored for the next read.
    private boolean storedCR;
    // If the entity body is chunked, the number of bytes read for the current chunk. If the entity body is not chunked,
    // the total number of entity body bytes read.
    private int entityBytesRead = 0;
    // Parser that should be used to parse the http messages once the byte stream is full enough
    private final AbstractMessageParser parser;
    //  The buffer that the parser uses
    private final SessionInputBufferImpl sessionBuffer;
    // The last http message parsed
    private T message;
    // The content type of the last message parsed
    private ContentType contentType;
    // The content length of the last message parsed
    private long contentLength;
    // The content encoding of the last message parsed
    private ContentEncoding contentEncoding;
    // The entity of the last http message parsed
    private HttpEntity entity;
    // The trailers of the last http message parsed
    private LinkedHashMap trailers;
    // Builds the hex string of the next chunk's length
    private StringBuilder chunkSizeBuilder;
    // The size of the next chunk
    private long currentChunkSize;
    // Queue of messages parsed by this instance.
    private final SinglyLinkedList> parsedMessageQueue;

    /* Constructs a new buffered parser that use the specified parser to parse the HttpMessage instance when enough bytes
     * have been written. */
    private BufferedHttpMessageParser(AbstractMessageParser parser, SessionInputBufferImpl sessionBuffer) {
        this.parser = parser;
        this.sessionBuffer = sessionBuffer;
        this.parsedMessageQueue = new SinglyLinkedList<>();
        this.byteStream = new ByteArrayOutputStream();
        this.storedCR = false;
        reset();
    }

    /* Resets this instance to start parsing an entirely new message. */
    private void reset() {
        this.currentPhase = HttpPhase.PRE_START_LINE;
        this.entityBytesRead = 0;
        this.sessionBuffer.clear();
        this.message = null;
        this.contentType = null;
        this.contentLength = 0;
        this.contentEncoding = null;
        this.entity = null;
        this.trailers = null;
        this.chunkSizeBuilder = new StringBuilder();
        this.currentChunkSize = 0;
    }

    /* Writes bytes from the specified buffer to this instance's byte stream. */
    public void appendBytes(ByteBuffer buffer) throws IOException, HttpException {
        appendBytes(buffer, 0, buffer.remaining());
    }

    /* Writes bytes from the specified buffer to this instance's byte stream. */
    public void appendBytes(ByteBuffer buffer, int offset, int len) throws IOException, HttpException {
        ByteBuffer shallowCopy = buffer.duplicate();
        byte[] arr = new byte[len];
        shallowCopy.get(arr, offset, len);
        appendBytes(arr);
    }

    /* Writes bytes from the specified array to this instance's byte stream. */
    public void appendBytes(byte[] bytes) throws IOException, HttpException {
        if(bytes.length > 0) {
            boolean trailingCR = bytes[bytes.length - 1] == CRLF[0];
            PushbackInputStream stream = new PushbackInputStream(new ByteArrayInputStream(bytes, 0, trailingCR ? bytes.length - 1 : bytes.length), bytes.length + 1);
            if(storedCR) {
                // If a carriage return is stored from a previous append add it to the front of the input stream
                stream.unread(CRLF[0]);
            }
            storedCR = trailingCR;
            appendBytes(stream);
        }
    }

    /* Writes bytes from the specified stream to this instance's byte stream. */
    private void appendBytes(PushbackInputStream stream) throws IOException, HttpException {
        while(stream.available() > 0) {
            switch(currentPhase) {
                case PRE_START_LINE:
                    if(!consumeEmptyLines(stream, false)) {
                        currentPhase = HttpPhase.START_LINE;
                    }
                    break;
                case START_LINE:
                case HEADER:
                    if(!consumeNonEmpty(stream)) {
                        currentPhase = HttpPhase.PRE_HEADER;
                    }
                    break;
                case PRE_HEADER:
                    if(!consumeEmptyLine(stream, true)) {
                        currentPhase = HttpPhase.HEADER;
                        break;
                    }
                    currentPhase =  HttpPhase.PRE_MESSAGE_BODY;
                case PRE_MESSAGE_BODY:
                    parseMessage();
                    currentPhase = (contentLength == ContentLengthStrategy.CHUNKED) ? HttpPhase.CHUNK_SIZE : HttpPhase.IDENTITY_BODY;
                    entityBytesRead = 0;
                    if(currentPhase != HttpPhase.IDENTITY_BODY) {
                        break;
                    }
                case IDENTITY_BODY:
                    if(entityBytesRead != contentLength && stream.available() > 0) {
                        byteStream.write(stream.read());
                        entityBytesRead++;
                    }
                    if(stream.available() == 0 && storedCR && (entityBytesRead + 1) == contentLength) {
                        // Last byte of body is a carriage return
                        byteStream.write(CRLF[0]);
                        storedCR = false;
                        entityBytesRead++;
                    }
                    if(entityBytesRead == contentLength) {
                        parseEntity();
                        // A request was parsed, store it and reset
                        parsedMessageQueue.enqueue(new ParsedMessage<>(message, entity, trailers));
                        reset();
                    }
                    break;
                case CHUNK_SIZE:
                    if(consumeEmptyLine(stream, true)) {
                        this.currentChunkSize = Long.parseLong(chunkSizeBuilder.toString(), 16);
                        chunkSizeBuilder = new StringBuilder();
                        currentPhase = HttpPhase.CHUNK_DATA;
                    } else if(consumeSemicolon(stream)) {
                        this.currentChunkSize = Long.parseLong(chunkSizeBuilder.toString(), 16);
                        chunkSizeBuilder = new StringBuilder();
                        currentPhase = HttpPhase.CHUNK_EXT;
                    } else {
                        int read = stream.read();
                        chunkSizeBuilder.append((char)read);
                        byteStream.write(read);
                    }
                    break;
                case CHUNK_EXT:
                    if(!consumeNonEmpty(stream)) {
                        currentPhase = HttpPhase.CHUNK_DATA;
                    }
                    break;
                case CHUNK_DATA:
                    if(currentChunkSize == 0) {
                        // Ending chunk
                        entityBytesRead = 0;
                        parseEntity();
                        currentPhase = HttpPhase.PRE_TRAILER;
                    } else if(entityBytesRead == currentChunkSize) {
                        entityBytesRead = 0;
                        consumeEmptyLine(stream, true);
                        currentPhase = HttpPhase.CHUNK_SIZE;
                    } else {
                        byteStream.write(stream.read());
                        entityBytesRead++;
                    }
                    break;
                case PRE_TRAILER:
                    if(consumeEmptyLine(stream, true)) {
                        parseTrailer();
                        // A request was parsed, store it and reset
                        parsedMessageQueue.enqueue(new ParsedMessage<>(message, entity, trailers));
                        reset();
                    } else {
                        currentPhase = HttpPhase.TRAILER;
                    }
                    break;
                case TRAILER:
                    if(!consumeNonEmpty(stream)) {
                        currentPhase = HttpPhase.PRE_TRAILER;
                    }
                    break;
            }
        }
    }

    /* Reads the next thing on the buffer. If it is an empty line add it to this instance's byteStream in write is true
     * and returns true. Otherwise, pushes it back onto the stream and returns false.*/
    private boolean consumeEmptyLine(PushbackInputStream stream, boolean write) throws IOException {
        if(stream.available() > 1) {
            int first = stream.read();
            int second = stream.read();
            if(first == CRLF[0] && second == CRLF[1]) {
                // The pair read was a empty line
                if(write) {
                    byteStream.write(first);
                    byteStream.write(second);
                }
                return true;
            } else {
                // The pair read was not an empty line
                stream.unread(second);
                stream.unread(first);
                return false;
            }
        } else {
            return false;
        }
    }

    /* Reads the next thing on the buffer. If it is a ';' adds it to this instance's byteStream and returns true.
     * Otherwise, pushes it back onto the stream and returns false.*/
    private boolean consumeSemicolon(PushbackInputStream stream) throws IOException {
        if(stream.available() > 0) {
            int read = stream.read();
            if(read == ';') {
                byteStream.write(read);
                return true;
            } else {
                stream.unread(read);
                return false;
            }
        } else {
            return false;
        }
    }

    /* Reads from the specified buffer into this instance's byte stream until the next thing on the buffer is not an empty
     * line if write is true. Otherwise Reads from the specified buffer until the next thing on the buffer is not an empty
     * line but does not write those empty lines to the byte stream. Returns whether the entire stream was consumed. */
    private boolean consumeEmptyLines(PushbackInputStream stream, boolean write) throws IOException {
        while(consumeEmptyLine(stream, write));
        return stream.available() == 0;
    }

    /* Reads from the specified buffer into this instance's byte stream until the next thing on the buffer is an empty
     * line. Returns whether the entire stream was consumed before an empty line was encountered. */
    private boolean consumeNonEmpty(PushbackInputStream stream) throws IOException {
        boolean lastReadCR = false;
        while(stream.available() > 0) {
            int read = stream.read();
            byteStream.write(read);
            if(read == CRLF[0]) {
                lastReadCR = true;
            } else if(read == CRLF[1] && lastReadCR) {
                // An empty line was found and consumed
                return false;
            }
        }
        return true;
    }

    /* Parses the bytes currently in this instance's byte stream into an http message. */
    private void parseMessage() throws IOException, HttpException {
        this.message = null;
        sessionBuffer.clear();
        sessionBuffer.bind(new ByteArrayInputStream(byteStream.toByteArray()));
        this.message = parser.parse();
        // Set the byte stream to contain the remaining bytes in the session buffer
        byteStream.reset();
        byte[] remaining = new byte[sessionBuffer.length()];
        sessionBuffer.read(remaining);
        byteStream.write(remaining);
        // Determine the content type
        try {
            this.contentType = ContentType.parse(message.getFirstHeader(HTTP.CONTENT_TYPE).getValue());
        } catch(Exception e) {
            this.contentType = null;
        }
        // Determine the content length
        this.contentLength = StrictContentLengthStrategy.INSTANCE.determineLength(message);
        if(this.contentLength != ContentLengthStrategy.CHUNKED) {
            try {
                this.contentLength = Long.parseLong(message.getFirstHeader(HTTP.CONTENT_LEN).getValue());
            } catch(Exception e) {
                contentLength = 0;
            }
        }
        // Determine the content encoding
        try {
            this.contentEncoding = ContentEncoding.parse(message.getFirstHeader(HTTP.CONTENT_ENCODING));
        } catch(Exception e) {
            this.contentEncoding = ContentEncoding.IDENTITY;
        }
    }

    /* Parses the bytes currently in this instance's byte stream into a body entity. */
    private void parseEntity() {
        this.entity = null;
        sessionBuffer.clear();
        sessionBuffer.bind(new ByteArrayInputStream(byteStream.toByteArray()));
        if(contentLength == ContentLengthStrategy.CHUNKED) {
            this.entity = new InputStreamEntity(new ChunkedInputStream(sessionBuffer), -1, contentType);
        } else if(contentLength == ContentLengthStrategy.IDENTITY) {
            this.entity = new InputStreamEntity(new IdentityInputStream(sessionBuffer), -1, contentType);
        } else if(contentLength > 0) {
            this.entity = new InputStreamEntity(new ContentLengthInputStream(sessionBuffer, contentLength), contentLength, contentType);
        }
        // Decompress the entity if needed
        if(entity != null) {
            if(contentEncoding == ContentEncoding.GZIP) {
                this.entity = new GzipDecompressingEntity(this.entity);
            } else if(contentEncoding == ContentEncoding.DEFLATE) {
                this.entity = new DeflateDecompressingEntity(this.entity);
            }
        }
        // Reset the byte stream
        byteStream.reset();
    }

    /* Parses the bytes currently in this instance's byte stream into a trailing header String. */
    private void parseTrailer() {
        String trailerString = new String(byteStream.toByteArray()).trim();
        LinkedHashMap map = new LinkedHashMap<>();
        for(String trailer : trailerString.split("\r\n")) {
            int splitIndex = trailer.indexOf(":");
            if(splitIndex != -1) {
                String name = trailer.substring(0, splitIndex).trim();
                String value = trailer.substring(splitIndex+1).trim();
                map.put(name, value);
            }
        }
        this.trailers = map;
        // Reset the byte stream
        byteStream.reset();
    }

    /* Returns whether this instance has at least one parsed message waiting to be dequeued. */
    public boolean hasParsedMessage() {
        return !parsedMessageQueue.isEmpty();
    }

    /* Removes and returns the first parsed message in this instance's queue. */
    public ParsedMessage getParsedMessage() {
        return parsedMessageQueue.dequeue();
    }

    /* Returns a new instance of BufferedHttpMessageParser for parsing HttpResponse instances with the specified session
     * buffer size. */
    public static BufferedHttpMessageParser getResponseParser(int bufferSize) {
        SessionInputBufferImpl sessionBuffer = new SessionInputBufferImpl(new HttpTransportMetricsImpl(), bufferSize);
        DefaultHttpResponseParser responseParser = new DefaultHttpResponseParser(sessionBuffer);
        return new BufferedHttpMessageParser<>(responseParser, sessionBuffer);
    }

    /* Returns a new instance of BufferedHttpMessageParser for parsing HttpRequest instances with the specified session
     * buffer size. */
    public static BufferedHttpMessageParser getRequestParser(int bufferSize) {
        SessionInputBufferImpl sessionBuffer = new SessionInputBufferImpl(new HttpTransportMetricsImpl(), bufferSize);
        DefaultHttpRequestParser requestParser = new DefaultHttpRequestParser(sessionBuffer);
        return new BufferedHttpMessageParser<>(requestParser, sessionBuffer);
    }

    /* Returns a new instance of BufferedHttpMessageParser for parsing HttpResponse instances. */
    public static BufferedHttpMessageParser getResponseParser() {
        return getResponseParser(8192);
    }

    /* Returns a new instance of BufferedHttpMessageParser for parsing HttpRequest instances. */
    public static BufferedHttpMessageParser getRequestParser() {
        return getRequestParser(8192);
    }

    public enum HttpPhase {
        PRE_START_LINE,
        START_LINE,
        PRE_HEADER,
        HEADER,
        PRE_MESSAGE_BODY,
        IDENTITY_BODY,
        CHUNK_SIZE,
        CHUNK_EXT,
        CHUNK_DATA,
        PRE_TRAILER,
        TRAILER
    }

    public enum ContentEncoding {
        GZIP("gzip"),
        DEFLATE("deflate"),
        IDENTITY("identity");

        private final String headerValue;

        ContentEncoding(String headerValue) {
            this.headerValue = headerValue;
        }

        public String getHeaderValue() {
            return headerValue;
        }

        /* Returns a ContentEncoding instance based on the value of the specified header. Defaults to identity encoding
         * if the specified header is null or doesn't match the values for gzip or deflate. */
        public static ContentEncoding parse(Header contentEncodingHeader) {
            if(contentEncodingHeader == null) {
                return IDENTITY;
            }
            return parse(contentEncodingHeader.getValue());
        }

        /* Returns a ContentEncoding instance based on the specified value. Defaults to identity encoding if the specified
         * value is null or doesn't match the values for gzip or deflate. */
        public static ContentEncoding parse(String value) {
            if(value == null) {
                return IDENTITY;
            } else {
                value = value.toLowerCase().trim();
            }
            switch(value) {
                case "gzip":
                case "x-gzip":
                    return GZIP;
                case "deflate":
                    return DEFLATE;
                default:
                    return IDENTITY;
            }
        }
    }

    /* Record type that groups parsed information together. */
    public static class ParsedMessage {

        private final T message;
        private final HttpEntity entity;
        private final LinkedHashMap trailers;

        /* Constructs a new ParsedMessage with the specified information. */
        ParsedMessage(T message, HttpEntity entity, LinkedHashMap trailers) {
            this.message = message;
            this.entity = entity;
            this.trailers = trailers == null ? new LinkedHashMap<>() : trailers;
        }

        /* Getter for message. */
        public T getMessage() {
            return message;
        }

        /* Getter for entity. */
        public HttpEntity getEntity() {
            return entity;
        }

        /* Returns a String representation of this instance's entity. */
        public String getEntityString() throws IOException {
            try {
                return (entity == null) ? null : EntityUtils.toString(entity);
            } finally {
                if(entity != null) {
                    EntityUtils.consume(entity);
                }
            }
        }

        /* Return whether this instance has an entity whose content is text/html or not specified. */
        public Boolean hasHtmlContent() {
            return entity != null && (entity.getContentType() == null
                    || ContentType.get(entity).getMimeType().toLowerCase().equals(ContentType.TEXT_HTML.getMimeType().toLowerCase()));
        }

        /* Getter for trailers */
        public Map getTrailers() {
            return trailers;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy