org.archive.format.http.HttpRequestMessageParser Maven / Gradle / Ivy
The newest version!
package org.archive.format.http;
import java.io.IOException;
import java.io.InputStream;
public class HttpRequestMessageParser extends HttpMessageParser {
public int maxBytes = 1024 * 1024;
public boolean strict = false;
public HttpRequestMessage parse(InputStream is) throws HttpParseException, IOException {
HttpRequestMessage message = new HttpRequestMessage();
parse(is,message);
return message;
}
public int parse(InputStream is, HttpRequestMessageObserver obs) throws HttpParseException, IOException {
byte buf[] = new byte[maxBytes];
int bytesRead = 0;
while(bytesRead < maxBytes) {
int i = is.read();
if(i == -1) {
if(strict) {
throw new HttpParseException("EOF before CRLF");
}
obs.messageCorrupt();
return bytesRead;
}
if(i > 127) {
if(strict) {
throw new HttpParseException("Non ASCII byte in message");
}
obs.messageCorrupt();
return bytesRead;
}
byte b = (byte) (i & 0xff);
buf[bytesRead] = b;
bytesRead++;
if(b == LF) {
return parse(buf,bytesRead,obs);
}
}
// TODO: under Lax consume till EOL and continue?
throw new HttpParseException("Response Message too long");
}
public int parse(byte buf[], int len, HttpRequestMessageObserver obs)
throws HttpParseException, IOException {
return strict ? parseStrict(buf,len,obs) : parseLax(buf,len,obs);
}
public int parseStrict(byte buf[], int len, HttpRequestMessageObserver obs) throws HttpParseException {
int origLen = len;
if(buf[len-1] != LF) {
throw new HttpParseException("Response Message missing LF");
}
len--;
if(buf[len-1] != CR) {
throw new HttpParseException("Response Message missing CRLF");
}
len--;
int version = VERSION_0;
int method = 0;
String path = null;
int idx = 0;
int ms = 0;
int ml = 0;
int ps = -1;
int pl = 0;
int vs = -1;
int vl = 0;
while(buf[idx] != SP) {
ml++;
idx++;
if(idx >= len) {
throw new HttpParseException("No spaces in message");
}
}
if(idx == 0) {
throw new HttpParseException("Http Request starts with SP");
}
method = parseMethodStrict(buf, ms, idx);
idx++;
ps = idx;
while(buf[idx] != SP) {
pl++;
idx++;
if(idx >= len) {
throw new HttpParseException("No spaces in message");
}
}
if(pl == 0) {
throw new HttpParseException("Empty Path");
}
path = new String(buf,ps,pl,UTF8);
idx++;
vs = idx;
vl = len - vs;
while(idx < len) {
if(buf[idx] == SP) {
throw new HttpParseException("Too many fields in HTTP Request");
}
idx++;
}
version = parseVersionStrict(buf, vs,vl);
obs.messageParsed(method,path,version, origLen);
return origLen;
}
public int parseLax(byte buf[], int len, HttpRequestMessageObserver obs)
throws HttpParseException {
/* TODO: make this a lot more lax:
* * auto trim leading and trailing whitespace
* * first pass looks for 2 spaces, if found, go easy case
* * if less than 2 whitespace, attempt to parse first and last
* tokens as method and version, vary parsing based on that
* * if more than 2 tokens. attempt to find leading method and
* trailing version, and interpret intervening fields as
* path.
* * etc..
*/
int origLen = len;
if(buf[len-1] != LF) {
throw new HttpParseException("Response Message missing LF");
}
len--;
if(buf[len-1] == CR) {
len--;
}
int version = VERSION_0;
int method = METHOD_UNK;
String path = "";
int idx = 0;
int ms = 0;
int ml = 0;
int ps = -1;
int pl = 0;
int vs = -1;
int vl = 0;
// consume leading spaces:
while(buf[idx] == SP) {
idx++;
if(idx >= len) {
throw new HttpParseException("No spaces in message");
}
}
ms = idx;
while(buf[idx] != SP) {
ml++;
idx++;
if(idx >= len) {
throw new HttpParseException("No spaces in message");
}
}
method = parseMethodLax(buf, ms, ml);
while(buf[idx] == SP) {
idx++;
if(idx >= len) {
throw new HttpParseException("No spaces in message");
}
}
ps = idx;
while(buf[idx] != SP) {
pl++;
idx++;
if(idx >= len) {
throw new HttpParseException("No spaces in message");
}
}
if(pl > 0) {
path = new String(buf,ps,pl,UTF8);
}
while(buf[idx] == SP) {
idx++;
if(idx >= len) {
throw new HttpParseException("No spaces in message");
}
}
vs = idx;
while(idx < len) {
if(buf[idx] == SP) {
break;
}
vl++;
idx++;
}
version = parseVersionLax(buf, vs,vl);
obs.messageParsed(method,path,version, origLen);
return len;
}
protected int parseMethodStrict(byte buf[], int start, int len)
throws HttpParseException {
String v = new String(buf,start,len,UTF8);
if(v.compareTo(METHOD_GET_STRING) == 0) {
return METHOD_GET;
} else if(v.compareTo(METHOD_HEAD_STRING) == 0) {
return METHOD_HEAD;
} else if(v.compareTo(METHOD_POST_STRING) == 0) {
return METHOD_POST;
} else if(v.compareTo(METHOD_PUT_STRING) == 0) {
return METHOD_PUT;
} else if(v.compareTo(METHOD_TRACE_STRING) == 0) {
return METHOD_TRACE;
} else if(v.compareTo(METHOD_DELETE_STRING) == 0) {
return METHOD_DELETE;
} else if(v.compareTo(METHOD_CONNECT_STRING) == 0) {
return METHOD_CONNECT;
} else {
throw new HttpParseException("Unknown version");
}
}
protected int parseMethodLax(byte buf[], int start, int len)
throws HttpParseException {
String v = new String(buf,start,len,UTF8).toUpperCase();
if(v.compareTo(METHOD_GET_STRING) == 0) {
return METHOD_GET;
} else if(v.compareTo(METHOD_HEAD_STRING) == 0) {
return METHOD_HEAD;
} else if(v.compareTo(METHOD_POST_STRING) == 0) {
return METHOD_POST;
} else if(v.compareTo(METHOD_PUT_STRING) == 0) {
return METHOD_PUT;
} else if(v.compareTo(METHOD_TRACE_STRING) == 0) {
return METHOD_TRACE;
} else if(v.compareTo(METHOD_DELETE_STRING) == 0) {
return METHOD_DELETE;
} else if(v.compareTo(METHOD_CONNECT_STRING) == 0) {
return METHOD_CONNECT;
}
return METHOD_UNK;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy