org.python.modules.sre.PatternObject Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython-standalone Show documentation
Show all versions of jython-standalone Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented
language Python written in 100% Pure Java, and seamlessly integrated with
the Java platform. It thus allows you to run Python on any Java platform.
/*
* Copyright 2000 Finn Bock
*
* This program contains material copyrighted by:
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
*
* This version of the SRE library can be redistributed under CNRI's
* Python 1.6 license. For any other use, please contact Secret Labs
* AB ([email protected]).
*
* Portions of this engine have been developed in cooperation with
* CNRI. Hewlett-Packard provided funding for 1.6 integration and
ther compatibility work.
*/
package org.python.modules.sre;
import java.util.*;
import org.python.core.*;
public class PatternObject extends PyObject {
int[] code; /* link to the code string object */
public PyString pattern; /* link to the pattern source (or None) */
public int groups;
public org.python.core.PyObject groupindex;
public int flags;
org.python.core.PyObject indexgroup;
public int codesize;
public PatternObject(PyString pattern, int flags, int[] code,
int groups, PyObject groupindex, PyObject indexgroup) {
if (pattern != null)
this.pattern = pattern;
this.flags = flags;
this.code = code;
this.codesize = code.length;
this.groups = groups;
this.groupindex = groupindex;
this.indexgroup = indexgroup;
}
public MatchObject match(PyObject[] args, String[] kws) {
ArgParser ap = new ArgParser("search", args, kws,
"pattern", "pos", "endpos");
PyString string = extractPyString(ap, 0);
int start = ap.getInt(1, 0);
int end = ap.getInt(2, string.__len__());
SRE_STATE state = new SRE_STATE(string, start, end, flags);
state.ptr = state.start;
int status = state.SRE_MATCH(code, 0, 1);
return _pattern_new_match(state, string, status);
}
public MatchObject search(PyObject[] args, String[] kws) {
ArgParser ap = new ArgParser("search", args, kws,
"pattern", "pos", "endpos");
PyString string = extractPyString(ap, 0);
int start = ap.getInt(1, 0);
int end = ap.getInt(2, string.__len__());
SRE_STATE state = new SRE_STATE(string, start, end, flags);
int status = state.SRE_SEARCH(code, 0);
return _pattern_new_match(state, string, status);
}
public PyObject sub(PyObject[] args, String[] kws) {
ArgParser ap = new ArgParser("sub", args, kws,
"repl", "string", "count");
PyObject template = ap.getPyObject(0);
int count = ap.getInt(2, 0);
return subx(template, extractPyString(ap, 1), count, false);
}
public PyObject subn(PyObject[] args, String[] kws) {
ArgParser ap = new ArgParser("subn", args, kws,
"repl", "string", "count");
PyObject template = ap.getPyObject(0);
int count = ap.getInt(2, 0);
return subx(template, extractPyString(ap, 1), count, true);
}
private PyObject subx(PyObject template, PyString instring, int count,
boolean subn)
{
final PyString string = instring;
PyObject filter = null;
boolean filter_is_callable = false;
if (template.isCallable()) {
filter = template;
filter_is_callable = true;
} else {
boolean literal = false;
if (template instanceof PyString) {
literal = template.toString().indexOf('\\') < 0;
}
if (literal) {
filter = template;
filter_is_callable = false;
} else {
filter = call("re", "_subx", new PyObject[] {
this, template});
filter_is_callable = filter.isCallable();
}
}
SRE_STATE state = new SRE_STATE(string, 0, Integer.MAX_VALUE, flags);
PyList list = new PyList();
int n = 0;
int i = 0;
while (count == 0 || n < count) {
state.state_reset();
state.ptr = state.start;
int status = state.SRE_SEARCH(code, 0);
if (status <= 0) {
if (status == 0)
break;
_error(status);
}
int b = state.start;
int e = state.ptr;
if (i < b) {
/* get segment before this match */
list.append(string.__getslice__(Py.newInteger(i), Py.newInteger(b)));
}
if (! (i == b && i == e && n > 0)) {
PyObject item;
if (filter_is_callable) {
/* pass match object through filter */
MatchObject match = _pattern_new_match(state, instring, 1);
item = filter.__call__(match);
} else {
item = filter;
}
if (item != Py.None) {
list.append(item);
}
i = e;
n++;
}
/* move on */
if (state.ptr == state.start)
state.start = state.ptr + 1;
else
state.start = state.ptr;
}
if (i < state.endpos) {
list.append(string.__getslice__(Py.newInteger(i), Py.newInteger(state.endpos)));
}
PyObject outstring = join_list(list, string);
if (subn) {
return new PyTuple(outstring, Py.newInteger(n));
}
return outstring;
}
private PyObject join_list(PyList list, PyString string) {
PyObject joiner = string.__getslice__(Py.Zero, Py.Zero);
if (list.size() == 0) {
return joiner;
}
return joiner.__getattr__("join").__call__(list);
}
public PyObject split(PyObject[] args, String[] kws) {
ArgParser ap = new ArgParser("split", args, kws,
"source", "maxsplit");
PyString string = extractPyString(ap, 0);
int maxsplit = ap.getInt(1, 0);
SRE_STATE state = new SRE_STATE(string, 0, Integer.MAX_VALUE, flags);
PyList list = new PyList();
int n = 0;
int last = state.start;
while (maxsplit == 0 || n < maxsplit) {
state.state_reset();
state.ptr = state.start;
int status = state.SRE_SEARCH(code, 0);
if (status <= 0) {
if (status == 0)
break;
_error(status);
}
if (state.start == state.ptr) {
if (last == state.end)
break;
/* skip one character */
state.start = state.ptr + 1;
continue;
}
/* get segment before this match */
PyObject item = string.__getslice__(Py.newInteger(last), Py.newInteger(state.start));
list.append(item);
for (int i = 0; i < groups; i++) {
String s = state.getslice(i+1, string.toString(), false);
if (s != null)
list.append(string.createInstance(s));
else
list.append(Py.None);
}
n += 1;
last = state.start = state.ptr;
}
list.append(string.__getslice__(Py.newInteger(last), Py.newInteger(state.endpos)));
return list;
}
private PyObject call(String module, String function, PyObject[] args) {
PyObject sre = imp.importName(module, true);
return sre.invoke(function, args);
}
public PyObject findall(PyObject[] args, String[] kws) {
ArgParser ap = new ArgParser("findall", args, kws,
"source", "pos", "endpos");
PyString string = extractPyString(ap, 0);
int start = ap.getInt(1, 0);
int end = ap.getInt(2, Integer.MAX_VALUE);
SRE_STATE state = new SRE_STATE(string, start, end, flags);
final List list = new ArrayList();
while (state.start <= state.end) {
state.state_reset();
state.ptr = state.start;
int status = state.SRE_SEARCH(code, 0);
if (status > 0) {
PyObject item;
/* don't bother to build a match object */
switch (groups) {
case 0:
item = string.__getslice__(Py.newInteger(state.start), Py.newInteger(state.ptr));
break;
case 1:
item = string.createInstance(state.getslice(1, string.toString(), true));
break;
default:
PyObject[] t = new PyObject[groups];
for (int i = 0; i < groups; i++)
t[i] = string.createInstance(state.getslice(i+1, string.toString(), true));
item = new PyTuple(t);
break;
}
list.add(item);
if (state.ptr == state.start)
state.start = state.ptr + 1;
else
state.start = state.ptr;
} else {
if (status == 0)
break;
_error(status);
}
}
return new PyList(list);
}
public PyObject finditer(PyObject[] args, String[] kws) {
ScannerObject scanner = scanner(args, kws);
PyObject search = scanner.__findattr__("search");
return new PyCallIter(search, Py.None);
}
public ScannerObject scanner(PyObject[] args, String[] kws) {
ArgParser ap = new ArgParser("scanner", args, kws,
"pattern", "pos", "endpos");
PyString string = extractPyString(ap, 0);
ScannerObject self = new ScannerObject();
self.state = new SRE_STATE(string,
ap.getInt(1, 0),
ap.getInt(2, Integer.MAX_VALUE),
flags);
self.pattern = this;
self.string = string;
return self;
}
private void _error(int status) {
if (status == SRE_STATE.SRE_ERROR_RECURSION_LIMIT)
throw Py.RuntimeError("maximum recursion limit exceeded");
throw Py.RuntimeError("internal error in regular expression engine");
}
MatchObject _pattern_new_match(SRE_STATE state, PyString string,
int status)
{
/* create match object (from state object) */
//System.out.println("status = " + status + " " + string);
if (status > 0) {
/* create match object (with room for extra group marks) */
MatchObject match = new MatchObject();
match.pattern = this;
match.string = string;
match.regs = null;
match.groups = groups+1;
/* group zero */
int base = state.beginning;
match.mark = new int[match.groups*2];
match.mark[0] = state.start - base;
match.mark[1] = state.ptr - base;
/* fill in the rest of the groups */
int i, j;
for (i = j = 0; i < groups; i++, j+=2) {
if (j+1 <= state.lastmark && state.mark[j] != -1 &&
state.mark[j+1] != -1) {
match.mark[j+2] = state.mark[j] - base;
match.mark[j+3] = state.mark[j+1] - base;
} else
match.mark[j+2] = match.mark[j+3] = -1;
}
match.pos = state.pos;
match.endpos = state.endpos;
match.lastindex = state.lastindex;
return match;
} else if (status == 0) {
return null;
}
_error(status);
return null;
}
private static PyString extractPyString(ArgParser ap, int pos){
PyObject obj = ap.getPyObject(pos);
if(!(obj instanceof PyString)){
if (obj instanceof PyArray) {
return new PyString(obj.toString());
}
throw Py.TypeError("expected str or unicode but got " + obj.getType());
}
return (PyString)obj;
}
}