All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.format.http.HttpRequestMessageParser Maven / Gradle / Ivy

The newest version!
package org.archive.format.http;

import java.io.IOException;
import java.io.InputStream;

public class HttpRequestMessageParser extends HttpMessageParser {
	public int maxBytes = 1024 * 1024;
	public boolean strict = false;
	public HttpRequestMessage parse(InputStream is) throws HttpParseException, IOException {
		HttpRequestMessage message = new HttpRequestMessage();
		parse(is,message);
		return message;
	}

	public int parse(InputStream is, HttpRequestMessageObserver obs) throws HttpParseException, IOException {
		byte buf[] = new byte[maxBytes];
		int bytesRead = 0;
		
		while(bytesRead < maxBytes) {
			int i = is.read();
			if(i == -1) {
				if(strict) {
					throw new HttpParseException("EOF before CRLF");
				}
				obs.messageCorrupt();
				return bytesRead;
			}
			if(i > 127) {
				if(strict) {
					throw new HttpParseException("Non ASCII byte in message");
				}
				obs.messageCorrupt();
				return bytesRead;
			}
			byte b = (byte) (i & 0xff);
			buf[bytesRead] = b;
			bytesRead++;
			if(b == LF) {
				return parse(buf,bytesRead,obs);
			}
		}
		// TODO: under Lax consume till EOL and continue?
		throw new HttpParseException("Response Message too long");

	}
	
	public int parse(byte buf[], int len, HttpRequestMessageObserver obs) 
	throws HttpParseException, IOException {
		return strict ? parseStrict(buf,len,obs) : parseLax(buf,len,obs);
	}


	public int parseStrict(byte buf[], int len, HttpRequestMessageObserver obs) throws HttpParseException {
		int origLen = len;
		if(buf[len-1] != LF) {
			throw new HttpParseException("Response Message missing LF");
		}
		len--;
		if(buf[len-1] != CR) {
			throw new HttpParseException("Response Message missing CRLF");
		}
		len--;
		
		int version = VERSION_0;
		int method = 0;
		String path = null;

		int idx = 0;
		int ms = 0;
		int ml = 0;
		int ps = -1;
		int pl = 0;
		int vs = -1;
		int vl = 0;
		while(buf[idx] != SP) {
			ml++;
			idx++;
			if(idx >= len) {
				throw new HttpParseException("No spaces in message");
			}
		}
		if(idx == 0) {
			throw new HttpParseException("Http Request starts with SP");
		}
		method = parseMethodStrict(buf, ms, idx);
		idx++;
		ps = idx;
		while(buf[idx] != SP) {
			pl++;
			idx++;
			if(idx >= len) {
				throw new HttpParseException("No spaces in message");
			}
		}
		if(pl == 0) {
			throw new HttpParseException("Empty Path");
		}
		path = new String(buf,ps,pl,UTF8);
		idx++;
		vs = idx;
		vl = len - vs;
		while(idx < len) {
			if(buf[idx] == SP) {
				throw new HttpParseException("Too many fields in HTTP Request");
			}
			idx++;
		}
		version = parseVersionStrict(buf, vs,vl);
		
		obs.messageParsed(method,path,version, origLen);

		return origLen;
	}

	public int parseLax(byte buf[], int len, HttpRequestMessageObserver obs)
		throws HttpParseException {
		/* TODO: make this a lot more lax:
		 *       * auto trim leading and trailing whitespace
		 *       * first pass looks for 2 spaces, if found, go easy case
		 *       * if less than 2 whitespace, attempt to parse first and last
		 *             tokens as method and version, vary parsing based on that
		 *       * if more than 2 tokens. attempt to find leading method and
		 *             trailing version, and interpret intervening fields as
		 *             path.
		 *       * etc..
		 */
		int origLen = len;
		if(buf[len-1] != LF) {
			throw new HttpParseException("Response Message missing LF");
		}
		len--;
		if(buf[len-1] == CR) {
			len--;
		}
		
		int version = VERSION_0;
		int method = METHOD_UNK;
		String path = "";

		int idx = 0;
		int ms = 0;
		int ml = 0;
		int ps = -1;
		int pl = 0;
		int vs = -1;
		int vl = 0;
		
		// consume leading spaces:
		while(buf[idx] == SP) {
			idx++;
			if(idx >= len) {
				throw new HttpParseException("No spaces in message");
			}
		}
		ms = idx;
		while(buf[idx] != SP) {
			ml++;
			idx++;
			if(idx >= len) {
				throw new HttpParseException("No spaces in message");
			}
		}

		method = parseMethodLax(buf, ms, ml);
		while(buf[idx] == SP) {
			idx++;
			if(idx >= len) {
				throw new HttpParseException("No spaces in message");
			}
		}
		ps = idx;
		while(buf[idx] != SP) {
			pl++;
			idx++;
			if(idx >= len) {
				throw new HttpParseException("No spaces in message");
			}
		}
		if(pl > 0) {
			path = new String(buf,ps,pl,UTF8);
		}
		while(buf[idx] == SP) {
			idx++;
			if(idx >= len) {
				throw new HttpParseException("No spaces in message");
			}
		}
		vs = idx;
		while(idx < len) {
			if(buf[idx] == SP) {
				break;
			}
			vl++;
			idx++;
		}
		version = parseVersionLax(buf, vs,vl);

		obs.messageParsed(method,path,version, origLen);
		return len;
	}

	protected int parseMethodStrict(byte buf[], int start, int len)
	throws HttpParseException {
		String v = new String(buf,start,len,UTF8);
		if(v.compareTo(METHOD_GET_STRING) == 0) {
			return METHOD_GET;
		} else if(v.compareTo(METHOD_HEAD_STRING) == 0) {
			return METHOD_HEAD;
		} else if(v.compareTo(METHOD_POST_STRING) == 0) {
			return METHOD_POST;
		} else if(v.compareTo(METHOD_PUT_STRING) == 0) {
			return METHOD_PUT;
		} else if(v.compareTo(METHOD_TRACE_STRING) == 0) {
			return METHOD_TRACE;
		} else if(v.compareTo(METHOD_DELETE_STRING) == 0) {
			return METHOD_DELETE;
		} else if(v.compareTo(METHOD_CONNECT_STRING) == 0) {
			return METHOD_CONNECT;
		} else {
			throw new HttpParseException("Unknown version");
		}
	}

	protected int parseMethodLax(byte buf[], int start, int len)
	throws HttpParseException {
		String v = new String(buf,start,len,UTF8).toUpperCase();
		if(v.compareTo(METHOD_GET_STRING) == 0) {
			return METHOD_GET;
		} else if(v.compareTo(METHOD_HEAD_STRING) == 0) {
			return METHOD_HEAD;
		} else if(v.compareTo(METHOD_POST_STRING) == 0) {
			return METHOD_POST;
		} else if(v.compareTo(METHOD_PUT_STRING) == 0) {
			return METHOD_PUT;
		} else if(v.compareTo(METHOD_TRACE_STRING) == 0) {
			return METHOD_TRACE;
		} else if(v.compareTo(METHOD_DELETE_STRING) == 0) {
			return METHOD_DELETE;
		} else if(v.compareTo(METHOD_CONNECT_STRING) == 0) {
			return METHOD_CONNECT;
		}
		return METHOD_UNK;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy