stream.util.parser.MParser Maven / Gradle / Ivy
/*
* streams library
*
* Copyright (C) 2011-2014 by Christian Bockermann, Hendrik Blom
*
* streams is a library, API and runtime environment for processing high
* volume data streams. It is composed of three submodules "stream-api",
* "stream-core" and "stream-runtime".
*
* The streams library (and its submodules) is free software: you can
* redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* The stream.ai library (and its submodules) is distributed in the hope
* that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package stream.util.parser;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
public class MParser {
int pos = 0;
public MParser() {
}
public void reset() {
pos = 0;
}
public void skip(int i) {
pos += i;
}
public void skipBlanks(String str) {
while (pos < str.length() && Character.isWhitespace(str.charAt(pos)))
pos++;
}
public int position() {
return pos;
}
public String remainder(String str) {
if (pos < 0)
return "";
if (pos >= 0 && pos < str.length())
return str.substring(pos);
return "";
}
public Character firstChar(String str) {
if (pos >= 0 && pos < str.length())
return str.charAt(pos);
return null;
}
public String prefix(String str, int len) {
StringBuffer s = new StringBuffer();
int i = pos;
while (i < str.length() && i < pos + len) {
s.append(str.charAt(i++));
}
return s.toString();
}
public String readToken(String str) {
Pair tok = readToken(str, pos);
pos = tok.value;
return tok.key;
}
public String readTokenUntil(String str, String prefix) {
Pair tok = readToken(str, pos, prefix);
pos = tok.value;
return tok.key;
}
public String readToken(String str, char open, char close) {
Pair tok = readToken(str, pos, open, close);
pos = tok.value;
return tok.key;
}
public Integer readInteger(String str) {
this.skipBlanks(str);
StringBuffer s = new StringBuffer();
while (pos < str.length() && Character.isDigit(str.charAt(pos))) {
s.append(str.charAt(pos));
pos++;
}
return new Integer(s.toString());
}
public static boolean isEmpty(String str) {
return str == null || str.trim().isEmpty();
}
public static Pair readToken(String str, int offset) {
int start = offset;
if (start >= str.length())
return new Pair("", str.length());
while (start < str.length()
&& Character.isWhitespace(str.charAt(start)))
start++;
int end = start;
if (str.charAt(start) == '"') {
return readToken(str, start, '"', '"');
}
if (str.charAt(start) == '(')
return readToken(str, start, '(', ')');
if (str.charAt(start) == '[')
return readToken(str, start, '[', ']');
while (end < str.length() && str.charAt(end) != ' ')
end++;
return new Pair(str.substring(start, end), end);
}
public static Pair readToken(String str, int offset,
String boundary) {
int start = offset;
if (start >= str.length())
return new Pair("", str.length());
while (start < str.length()
&& Character.isWhitespace(str.charAt(start)))
start++;
int end = start;
if (str.charAt(start) == '"') {
return readToken(str, start, '"', '"');
}
if (str.charAt(start) == '(')
return readToken(str, start, '(', ')');
if (str.charAt(start) == '[')
return readToken(str, start, '[', ']');
while (end < str.length() && !str.startsWith(boundary, end))
end++;
return new Pair(str.substring(start, end), end);
}
public static Pair readToken(String str, int offset,
char opening, char closing) {
int start = offset;
while (start < str.length() && str.charAt(start) != opening)
start++;
if (str.charAt(start) == opening) {
int i = start + 1;
while (i < str.length() && str.charAt(i) != closing) {
i++;
}
if (i + 1 < str.length())
i++;
return new Pair(str.substring(start, i), i);
}
return new Pair("", offset);
}
public static String trim(String str, String tr) {
if (str.startsWith(tr) && str.endsWith(tr)) {
return str.substring(tr.length(), str.length() - 2 * tr.length());
}
return str;
}
public static class Pair {
public final K key;
public final V value;
public Pair(K key, V val) {
this.key = key;
this.value = val;
}
}
public static boolean isBlank(String str) {
return str == null || str.trim().equals("");
}
static String line = "66.249.65.43 - - [22/Nov/2009:18:39:12 +0100] \"GET /web/policy/editor.jsp HTTP/1.1\" 200 6065 \"-\" \"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\"";
public static void main(String[] args) throws Exception {
String str = line;
List tokens = new LinkedList();
MParser p = new MParser();
tokens.add(p.readToken(str));
tokens.add(p.readToken(str));
tokens.add(p.readToken(str));
// tokens.add( p.readToken( str, '[', ']') );
tokens.add(p.readToken(str));
String tok = p.readToken(str);
while (tok != null && !tok.equals("")) {
tokens.add(tok);
tok = p.readToken(str);
}
System.out
.println("----------------------------------------------------");
System.out.println();
int i = 0;
for (String s : tokens) {
System.out.println(" token[" + i + "]: " + s);
i++;
}
System.out.println();
System.out
.println("----------------------------------------------------");
System.out.println();
System.out.println();
p.reset();
Map map = new LinkedHashMap();
p.reset();
map.put("REMOTE_ADDR", p.readToken(str));
map.put("REMOTE_USER", p.readToken(str));
map.put("__UNKNOWN__", p.readToken(str));
map.put("DATE", p.readToken(str, '[', ']'));
map.put("REQUEST_LINE", p.readToken(str)); // , '"', '"' ) );
map.put("RESPONSE_STATUS", p.readToken(str));
map.put("RESPONSE_SIZE", p.readToken(str));
map.put("REFERER", p.readToken(str));
map.put("USER_AGENT", p.readToken(str));
for (String key : map.keySet()) {
System.out.println(" " + key + " => " + map.get(key));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy