org.python.compiler.Module Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython-slim Show documentation
Show all versions of jython-slim Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented
language Python written in 100% Pure Java, and seamlessly integrated with
the Java platform. It thus allows you to run Python on any Java platform.
// Copyright (c) Corporation for National Research Initiatives
package org.python.compiler;
import static org.python.core.RegistryKey.PYTHON_CPYTHON;
import static org.python.util.CodegenUtils.ci;
import static org.python.util.CodegenUtils.p;
import static org.python.util.CodegenUtils.sig;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;
import java.io.File;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Stack;
import java.util.List;
import org.objectweb.asm.Label;
import org.objectweb.asm.MethodTooLargeException;
import org.objectweb.asm.Opcodes;
import org.objectweb.asm.Type;
import org.python.antlr.ParseException;
import org.python.antlr.PythonTree;
import org.python.antlr.ast.Num;
import org.python.antlr.ast.Str;
import org.python.antlr.ast.Suite;
import org.python.antlr.base.mod;
import org.python.core.ClasspathPyImporter;
import org.python.core.CodeBootstrap;
import org.python.core.CodeFlag;
import org.python.core.CodeLoader;
import org.python.core.CompilerFlags;
import org.python.core.imp;
import org.python.core.Py;
import org.python.core.PyCode;
import org.python.core.PyBytecode;
import org.python.core.PyComplex;
import org.python.core.PyException;
import org.python.core.PyFile;
import org.python.core.PyFloat;
import org.python.core.PyFrame;
import org.python.core.PyFunctionTable;
import org.python.core.PyInteger;
import org.python.core.PyLong;
import org.python.core.PyObject;
import org.python.core.PyRunnable;
import org.python.core.PyRunnableBootstrap;
import org.python.core.PyString;
import org.python.core.PyUnicode;
import org.python.core.ThreadState;
import org.python.modules._marshal;
class PyIntegerConstant extends Constant implements ClassConstants, Opcodes {
final int value;
PyIntegerConstant(int value) {
this.value = value;
}
@Override
void get(Code c) throws IOException {
c.iconst(value); // it would be nice if we knew we didn't have to box next
c.invokestatic(p(Py.class), "newInteger", sig(PyInteger.class, Integer.TYPE));
}
@Override
void put(Code c) throws IOException {}
@Override
public int hashCode() {
return value;
}
@Override
public boolean equals(Object o) {
if (o instanceof PyIntegerConstant) {
return ((PyIntegerConstant)o).value == value;
} else {
return false;
}
}
}
class PyFloatConstant extends Constant implements ClassConstants, Opcodes {
final double value;
PyFloatConstant(double value) {
this.value = value;
}
@Override
void get(Code c) throws IOException {
c.ldc(Double.valueOf(value));
c.invokestatic(p(Py.class), "newFloat", sig(PyFloat.class, Double.TYPE));
}
@Override
void put(Code c) throws IOException {}
@Override
public int hashCode() {
return (int)value;
}
@Override
public boolean equals(Object o) {
if (o instanceof PyFloatConstant) {
// Ensure hashtable works things like for -0.0 and NaN (see java.lang.Double.equals).
PyFloatConstant pyco = (PyFloatConstant)o;
return Double.doubleToLongBits(pyco.value) == Double.doubleToLongBits(value);
} else {
return false;
}
}
}
class PyComplexConstant extends Constant implements ClassConstants, Opcodes {
final double value;
PyComplexConstant(double value) {
this.value = value;
}
@Override
void get(Code c) throws IOException {
c.ldc(Double.valueOf(value));
c.invokestatic(p(Py.class), "newImaginary", sig(PyComplex.class, Double.TYPE));
}
@Override
void put(Code c) throws IOException {}
@Override
public int hashCode() {
return (int)value;
}
@Override
public boolean equals(Object o) {
if (o instanceof PyComplexConstant) {
// Ensure hashtable works things like for -0.0 and NaN (see java.lang.Double.equals).
PyComplexConstant pyco = (PyComplexConstant)o;
return Double.doubleToLongBits(pyco.value) == Double.doubleToLongBits(value);
} else {
return false;
}
}
}
class PyStringConstant extends Constant implements ClassConstants, Opcodes {
final String value;
PyStringConstant(String value) {
this.value = value;
}
@Override
void get(Code c) throws IOException {
c.ldc(value);
c.invokestatic(p(PyString.class), "fromInterned", sig(PyString.class, String.class));
}
@Override
void put(Code c) throws IOException {}
@Override
public int hashCode() {
return value.hashCode();
}
@Override
public boolean equals(Object o) {
if (o instanceof PyStringConstant) {
return ((PyStringConstant)o).value.equals(value);
} else {
return false;
}
}
}
class PyUnicodeConstant extends Constant implements ClassConstants, Opcodes {
final String value;
PyUnicodeConstant(String value) {
this.value = value;
}
@Override
void get(Code c) throws IOException {
c.ldc(value);
c.invokestatic(p(PyUnicode.class), "fromInterned", sig(PyUnicode.class, String.class));
}
@Override
void put(Code c) throws IOException {}
@Override
public int hashCode() {
return value.hashCode();
}
@Override
public boolean equals(Object o) {
if (o instanceof PyUnicodeConstant) {
return ((PyUnicodeConstant)o).value.equals(value);
} else {
return false;
}
}
}
class PyLongConstant extends Constant implements ClassConstants, Opcodes {
final String value;
PyLongConstant(String value) {
this.value = value;
}
@Override
void get(Code c) throws IOException {
c.ldc(value);
c.invokestatic(p(Py.class), "newLong", sig(PyLong.class, String.class));
}
@Override
void put(Code c) throws IOException {}
@Override
public int hashCode() {
return value.hashCode();
}
@Override
public boolean equals(Object o) {
if (o instanceof PyLongConstant) {
return ((PyLongConstant)o).value.equals(value);
} else {
return false;
}
}
}
class PyCodeConstant extends Constant implements ClassConstants, Opcodes {
final String co_name;
final int argcount;
final List names;
final int id;
final int co_firstlineno;
final boolean arglist, keywordlist;
final String fname;
// for nested scopes
final List cellvars;
final List freevars;
final int jy_npurecell;
final int moreflags;
PyCodeConstant(mod tree, String name, boolean fast_locals, String className, boolean classBody,
boolean printResults, int firstlineno, ScopeInfo scope, CompilerFlags cflags,
Module module) throws Exception {
this.co_name = name;
this.co_firstlineno = firstlineno;
this.module = module;
// Needed so that moreflags can be final.
int _moreflags = 0;
if (scope.ac != null) {
arglist = scope.ac.arglist;
keywordlist = scope.ac.keywordlist;
argcount = scope.ac.names.size();
// Do something to add init_code to tree
// XXX: not sure we should be modifying scope.ac in a PyCodeConstant
// constructor.
if (scope.ac.init_code.size() > 0) {
scope.ac.appendInitCode((Suite)tree);
}
} else {
arglist = false;
keywordlist = false;
argcount = 0;
}
id = module.codes.size();
// Better names in the future?
if (isJavaIdentifier(name)) {
fname = name + "$" + id;
} else {
fname = "f$" + id;
}
// XXX: is fname needed at all, or should we just use "name"?
// It is needed to disambiguate functions and methods with
// same name, but in different classes. The function-fields
// and PyCode-fields that Jython generates don't use fully
// qualified names. So fname is used.
this.name = fname;
// !classdef only
if (!classBody) {
names = toNameAr(scope.names, false);
} else {
names = null;
}
cellvars = toNameAr(scope.cellvars, true);
freevars = toNameAr(scope.freevars, true);
jy_npurecell = scope.jy_npurecell;
if (CodeCompiler.checkOptimizeGlobals(fast_locals, scope)) {
_moreflags |= org.python.core.CodeFlag.CO_OPTIMIZED.flag;
}
if (scope.generator) {
_moreflags |= org.python.core.CodeFlag.CO_GENERATOR.flag;
}
if (cflags != null) {
if (cflags.isFlagSet(CodeFlag.CO_GENERATOR_ALLOWED)) {
_moreflags |= org.python.core.CodeFlag.CO_GENERATOR_ALLOWED.flag;
}
if (cflags.isFlagSet(CodeFlag.CO_FUTURE_DIVISION)) {
_moreflags |= org.python.core.CodeFlag.CO_FUTURE_DIVISION.flag;
}
}
moreflags = _moreflags;
}
// XXX: this can probably go away now that we can probably just copy the list.
private List toNameAr(List names, boolean nullok) {
int sz = names.size();
if (sz == 0 && nullok) {
return null;
}
List nameArray = new ArrayList();
nameArray.addAll(names);
return nameArray;
}
public static boolean isJavaIdentifier(String s) {
char[] chars = s.toCharArray();
if (chars.length == 0) {
return false;
}
if (!Character.isJavaIdentifierStart(chars[0])) {
return false;
}
for (int i = 1; i < chars.length; i++) {
if (!Character.isJavaIdentifierPart(chars[i])) {
return false;
}
}
return true;
}
@Override
void get(Code c) throws IOException {
c.getstatic(module.classfile.name, name, ci(PyCode.class));
}
@Override
void put(Code c) throws IOException {
module.classfile.addField(name, ci(PyCode.class), access);
c.iconst(argcount);
// Make all names
int nameArray;
if (names != null) {
nameArray = CodeCompiler.makeStrings(c, names);
} else { // classdef
nameArray = CodeCompiler.makeStrings(c, null);
}
c.aload(nameArray);
c.freeLocal(nameArray);
c.aload(1);
c.ldc(co_name);
c.iconst(co_firstlineno);
c.iconst(arglist ? 1 : 0);
c.iconst(keywordlist ? 1 : 0);
c.getstatic(module.classfile.name, "self", "L" + module.classfile.name + ";");
c.iconst(id);
if (cellvars != null) {
int strArray = CodeCompiler.makeStrings(c, cellvars);
c.aload(strArray);
c.freeLocal(strArray);
} else {
c.aconst_null();
}
if (freevars != null) {
int strArray = CodeCompiler.makeStrings(c, freevars);
c.aload(strArray);
c.freeLocal(strArray);
} else {
c.aconst_null();
}
c.iconst(jy_npurecell);
c.iconst(moreflags);
c.invokestatic(
p(Py.class),
"newCode",
sig(PyCode.class, Integer.TYPE, String[].class, String.class, String.class,
Integer.TYPE, Boolean.TYPE, Boolean.TYPE, PyFunctionTable.class,
Integer.TYPE, String[].class, String[].class, Integer.TYPE, Integer.TYPE));
c.putstatic(module.classfile.name, name, ci(PyCode.class));
}
}
class PyBytecodeConstant extends Constant implements ClassConstants, Opcodes {
PyBytecodeConstant(String name, String className, CompilerFlags cflags,
Module module) throws Exception {
super();
this.module = module;
this.name = name;
}
@Override
void get(Code c) throws IOException {
c.getstatic(module.classfile.name, name, ci(PyCode.class));
}
@Override
void put(Code c) throws IOException {
}
}
public class Module implements Opcodes, ClassConstants, CompilationContext {
ClassFile classfile;
Constant filename;
String sfilename;
Constant mainCode;
boolean linenumbers;
Future futures;
Hashtable scopes;
List codes;
long mtime;
private int setter_count = 0;
private final static int USE_SETTERS_LIMIT = 100;
private final static int MAX_SETTINGS_PER_SETTER = 4096;
/** The pool of Python Constants */
Hashtable constants;
/** Table of oversized methods represented as CPython bytecode. */
protected Hashtable oversized_methods = null;
public Module(String name, String filename, boolean linenumbers) {
this(name, filename, linenumbers, org.python.core.imp.NO_MTIME);
}
public Module(String name, String filename, boolean linenumbers, long mtime) {
this.linenumbers = linenumbers;
this.mtime = mtime;
classfile =
new ClassFile(name, p(PyFunctionTable.class), ACC_SYNCHRONIZED | ACC_PUBLIC, mtime);
constants = new Hashtable();
sfilename = filename;
if (filename != null) {
this.filename = stringConstant(filename);
} else {
this.filename = null;
}
codes = new ArrayList();
futures = new Future();
scopes = new Hashtable();
}
public Module(String name) {
this(name, name + ".py", true, org.python.core.imp.NO_MTIME);
}
private Constant findConstant(Constant c) {
Constant ret = constants.get(c);
if (ret != null) {
return ret;
}
ret = c;
c.module = this;
// More sophisticated name mappings might be nice
c.name = "_" + constants.size();
constants.put(ret, ret);
return ret;
}
Constant integerConstant(int value) {
return findConstant(new PyIntegerConstant(value));
}
Constant floatConstant(double value) {
return findConstant(new PyFloatConstant(value));
}
Constant complexConstant(double value) {
return findConstant(new PyComplexConstant(value));
}
Constant stringConstant(String value) {
return findConstant(new PyStringConstant(value));
}
Constant unicodeConstant(String value) {
return findConstant(new PyUnicodeConstant(value));
}
Constant longConstant(String value) {
return findConstant(new PyLongConstant(value));
}
Constant codeConstant(mod tree, String name, boolean fast_locals, String className,
boolean classBody, boolean printResults, int firstlineno, ScopeInfo scope,
CompilerFlags cflags) throws Exception {
return codeConstant(tree, name, fast_locals, className, null, classBody, printResults,
firstlineno, scope, cflags);
}
Constant codeConstant(mod tree, String name, boolean fast_locals, String className,
Str classDoc, boolean classBody, boolean printResults, int firstlineno,
ScopeInfo scope, CompilerFlags cflags) throws Exception {
if (oversized_methods != null && oversized_methods.containsKey(name+firstlineno)) {
// For now this only declares the field.
// PyBytecodeConstant is just a dummy to allow the caller to work properly.
// It is intentionally not added to codes, because it doesn't participate in
// FunctionTable and doesn't mess up addFunctions and addConstants this way.
PyBytecodeConstant bcode = new PyBytecodeConstant(
oversized_methods.get(name+firstlineno), className, cflags, this);
classfile.addField(bcode.name, ci(PyCode.class), ACC_PUBLIC | ACC_STATIC);
return bcode;
}
PyCodeConstant code = new PyCodeConstant(tree, name, fast_locals, className, classBody,
printResults, firstlineno, scope, cflags, this);
codes.add(code);
CodeCompiler compiler = new CodeCompiler(this, printResults);
Code c = classfile.addMethod(code.fname,
sig(PyObject.class, PyFrame.class, ThreadState.class), ACC_PUBLIC);
compiler.parse(tree, c, fast_locals, className, classDoc, classBody, scope, cflags);
return code;
}
/** This block of code writes out the various standard methods */
public void addInit() throws IOException {
Code c = classfile.addMethod("", sig(Void.TYPE, String.class), ACC_PUBLIC);
c.aload(0);
c.invokespecial(p(PyFunctionTable.class), "", sig(Void.TYPE));
addConstants(c);
}
public void addRunnable() throws IOException {
Code c = classfile.addMethod("getMain", sig(PyCode.class), ACC_PUBLIC);
mainCode.get(c);
c.areturn();
}
public void addMain() throws IOException {
Code c = classfile.addMethod("main",
sig(Void.TYPE, String[].class), ACC_PUBLIC | ACC_STATIC);
c.new_(classfile.name);
c.dup();
c.ldc(classfile.name);
c.invokespecial(classfile.name, "", sig(Void.TYPE, String.class));
c.invokevirtual(classfile.name, "getMain", sig(PyCode.class));
c.invokestatic(p(CodeLoader.class), CodeLoader.SIMPLE_FACTORY_METHOD_NAME,
sig(CodeBootstrap.class, PyCode.class));
c.aload(0);
c.invokestatic(p(Py.class), "runMain", sig(Void.TYPE, CodeBootstrap.class, String[].class));
c.return_();
}
public void addBootstrap() throws IOException {
Code c = classfile.addMethod(CodeLoader.GET_BOOTSTRAP_METHOD_NAME,
sig(CodeBootstrap.class), ACC_PUBLIC | ACC_STATIC);
c.ldc(Type.getType("L" + classfile.name + ";"));
c.invokestatic(p(PyRunnableBootstrap.class), PyRunnableBootstrap.REFLECTION_METHOD_NAME,
sig(CodeBootstrap.class, Class.class));
c.areturn();
}
void addConstants(Code c) throws IOException {
classfile.addField("self", "L" + classfile.name + ";", ACC_STATIC);
c.aload(0);
c.putstatic(classfile.name, "self", "L" + classfile.name + ";");
Enumeration e = constants.elements();
while (e.hasMoreElements()) {
Constant constant = e.nextElement();
constant.put(c);
}
for (PyCodeConstant pyc: codes) {
pyc.put(c);
}
c.return_();
}
public void addFunctions() throws IOException {
Code code = classfile.addMethod("call_function",
sig(PyObject.class, Integer.TYPE, PyFrame.class, ThreadState.class), ACC_PUBLIC);
if (!codes.isEmpty()) {
code.aload(0); // this
code.aload(2); // frame
code.aload(3); // thread state
Label def = new Label();
Label[] labels = new Label[codes.size()];
int i;
for (i = 0; i < labels.length; i++) {
labels[i] = new Label();
}
// Get index for function to call
code.iload(1);
code.tableswitch(0, labels.length - 1, def, labels);
for (i = 0; i < labels.length; i++) {
code.label(labels[i]);
code.invokevirtual(classfile.name, (codes.get(i)).fname,
sig(PyObject.class, PyFrame.class, ThreadState.class));
code.areturn();
}
code.label(def);
}
// Should probably throw internal exception here
code.aconst_null();
code.areturn();
}
public void write(OutputStream stream) throws IOException {
addInit();
addRunnable();
addMain();
addBootstrap();
addFunctions();
classfile.addInterface(p(PyRunnable.class));
if (sfilename != null) {
classfile.setSource(sfilename);
}
classfile.write(stream);
}
// Implementation of CompilationContext
@Override
public Future getFutures() {
return futures;
}
@Override
public String getFilename() {
return sfilename;
}
@Override
public ScopeInfo getScopeInfo(PythonTree node) {
return scopes.get(node);
}
@Override
public void error(String msg, boolean err, PythonTree node) throws Exception {
if (!err) {
try {
Py.warning(Py.SyntaxWarning, msg, (sfilename != null) ? sfilename : "?",
node.getLineno(), null, Py.None);
return;
} catch (PyException e) {
if (!e.match(Py.SyntaxWarning)) {
throw e;
}
}
}
throw new ParseException(msg, node);
}
public static void compile(mod node, OutputStream ostream, String name, String filename,
boolean linenumbers, boolean printResults, CompilerFlags cflags) throws Exception {
compile(node, ostream, name, filename, linenumbers, printResults, cflags,
org.python.core.imp.NO_MTIME);
}
protected static void _module_init(mod node, Module module, boolean printResults,
CompilerFlags cflags) throws Exception {
if (cflags == null) {
cflags = new CompilerFlags();
}
module.futures.preprocessFutures(node, cflags);
new ScopesCompiler(module, module.scopes).parse(node);
// Add __doc__ if it exists
Constant main = module.codeConstant(node, "", false, null, false,
printResults, 0, module.getScopeInfo(node), cflags);
module.mainCode = main;
}
// Error message formats required by loadPyBytecode
private static String TRIED_CREATE_PYC_MSG =
"\nJython tried to create a pyc-file by executing\n %s\nwhich failed because %s";
private static String LARGE_METHOD_MSG = "Module or method too large in `%s`.";
private static String PLEASE_PROVIDE_MSG =
"\n\nPlease provide a CPython 2.7 bytecode file (.pyc), e.g. run"
+ "\n python -m py_compile %s";
private static String CPYTHON_CMD_MSG =
"\n\nAlternatively, specify a CPython 2.7 command via the " //
+ PYTHON_CPYTHON + " property, e.g.:" //
+ "\n jython -D" + PYTHON_CPYTHON + "=python" //
+ "\nor (e.g. for pip) through the environment variable JYTHON_OPTS:" //
+ "\n export JYTHON_OPTS=\"-D" + PYTHON_CPYTHON
+ "=python\"\n";
private static PyBytecode loadPyBytecode(String filename, boolean try_cpython)
throws RuntimeException {
if (filename.startsWith(ClasspathPyImporter.PYCLASSPATH_PREFIX)) {
ClassLoader cld = Py.getSystemState().getClassLoader();
if (cld == null) {
cld = imp.getParentClassLoader();
}
URL py_url =
cld.getResource(filename.replace(ClasspathPyImporter.PYCLASSPATH_PREFIX, ""));
if (py_url != null) {
filename = py_url.getPath();
} else {
// Should never happen, but let's play it safe and treat this case.
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename)
+ "but couldn't resolve that filename within classpath.\n"
+ "Make sure the source file is at a proper location.");
}
}
String pyc_filename = filename + "c";
File pyc_file = new File(pyc_filename);
if (pyc_file.exists()) {
PyFile f = new PyFile(pyc_filename, "rb", 4096);
byte[] bts = f.read(8).toBytes();
int magic = (bts[1] << 8) & 0x0000FF00 | (bts[0] << 0) & 0x000000FF;
// int mtime_pyc = (bts[7]<<24) & 0xFF000000 |
// (bts[6]<<16) & 0x00FF0000 |
// (bts[5]<< 8) & 0x0000FF00 |
// (bts[4]<< 0) & 0x000000FF;
if (magic != 62211) { // check Python 2.7 bytecode
throw new RuntimeException(
String.format(LARGE_METHOD_MSG, filename) //
+ "\n'" + pyc_filename + "' is not CPython 2.7 bytecode." //
+ String.format(PLEASE_PROVIDE_MSG, filename));
}
_marshal.Unmarshaller un = new _marshal.Unmarshaller(f);
PyObject code = un.load();
f.close();
if (code instanceof PyBytecode) {
return (PyBytecode) code;
}
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename) //
+ "\n'" + pyc_filename + "' contains invalid bytecode."
+ String.format(PLEASE_PROVIDE_MSG, filename));
} else {
String CPython_command = System.getProperty(PYTHON_CPYTHON);
if (try_cpython && CPython_command != null) {
// check version...
String command_ver = CPython_command + " --version";
String command = CPython_command + " -m py_compile " + filename;
Exception exc = null;
int result = 0;
String reason;
try {
Process p = Runtime.getRuntime().exec(command_ver);
// Python 2.7 writes version to error-stream for some reason:
BufferedReader br =
new BufferedReader(new InputStreamReader(p.getErrorStream()));
String cp_version = br.readLine();
while (br.readLine() != null) {}
br.close();
if (cp_version == null) {
// Also try input-stream as fallback, just in case...
br = new BufferedReader(new InputStreamReader(p.getInputStream()));
cp_version = br.readLine();
while (br.readLine() != null) {}
br.close();
}
result = p.waitFor();
if (!cp_version.startsWith("Python 2.7.")) {
reason = cp_version + " has been provided, but 2.7.x is required.";
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename)
+ String.format(TRIED_CREATE_PYC_MSG, command, reason)
+ String.format(PLEASE_PROVIDE_MSG, filename) + CPYTHON_CMD_MSG);
}
} catch (InterruptedException | IOException e) {
exc = e;
}
if (exc == null && result == 0) {
try {
Process p = Runtime.getRuntime().exec(command);
result = p.waitFor();
if (result == 0) {
return loadPyBytecode(filename, false);
}
} catch (InterruptedException | IOException e) {
exc = e;
}
}
reason = exc != null ? "of " + exc.toString() : "of a bad return: " + result;
String exc_msg = String.format(LARGE_METHOD_MSG, filename)
+ String.format(TRIED_CREATE_PYC_MSG, command, reason)
+ String.format(PLEASE_PROVIDE_MSG, filename) + CPYTHON_CMD_MSG;
throw exc != null ? new RuntimeException(exc_msg, exc)
: new RuntimeException(exc_msg);
} else {
throw new RuntimeException(String.format(LARGE_METHOD_MSG, filename)
+ String.format(PLEASE_PROVIDE_MSG, filename) + CPYTHON_CMD_MSG);
}
}
}
private static String serializePyBytecode(PyBytecode btcode) throws java.io.IOException {
// For some reason we cannot do this using _marshal:
/*
cStringIO.StringIO buf = cStringIO.StringIO();
_marshal.Marshaller marsh = new _marshal.Marshaller(buf);
marsh.dump(largest_m_code);
String code_str = buf.getvalue().asString();
_marshal.Unmarshaller un2 = new _marshal.Unmarshaller(cStringIO.StringIO(code_str));
PyBytecode code = (PyBytecode) un2.load();
This says 'ValueError: bad marshal data'
Maybe the issue is actually with cStringIO, because bytecode-marshalling uses
bytes not directly suitable as String-values. cStringIO does not use Base64 or
something, but rather supports only string-compatible data.
*/
// so we use Java-serialization...
// serialize the object
ByteArrayOutputStream bo = new ByteArrayOutputStream();
ObjectOutputStream so = new ObjectOutputStream(bo);
so.writeObject(btcode);
so.flush();
// From Java 8 use: String code_str = Base64.getEncoder().encodeToString(bo.toByteArray());
String code_str = base64encodeToString(bo.toByteArray());
so.close();
bo.close();
return code_str;
}
/**
* Implement a simplified base64 encoding compatible with the decoding in BytecodeLoader. This
* encoder adds no '=' padding or line-breaks. equivalent to
* {@code binascii.b2a_base64(bytes).rstrip('=\n')}.
*
* @param data to encode
* @return the string encoding the data
*/
private static String base64encodeToString(byte[] data) {
final int N = data.length;
int tail = N % 3;
StringBuilder chars = new StringBuilder(((N / 3) + 1) * 4);
// Process bytes in blocks of three
int b = 0, quantum;
while (b <= N - 3) {
// Process [b:b+3]
quantum = ((data[b++] & 0xff) << 16) + ((data[b++] & 0xff) << 8) + (data[b++] & 0xff);
chars.append(base64enc[quantum >> 18]);
chars.append(base64enc[(quantum >> 12) & 0x3f]);
chars.append(base64enc[(quantum >> 6) & 0x3f]);
chars.append(base64enc[quantum & 0x3f]);
}
// Process the tail bytes
if (tail >= 1) {
quantum = ((data[b++] & 0xff) << 8);
if (tail == 2) {
quantum += data[b++] & 0xff;
}
chars.append(base64enc[quantum >> 10]);
chars.append(base64enc[(quantum >> 4) & 0x3f]);
if (tail == 2) {
chars.append(base64enc[(quantum << 2) & 0x3f]);
}
}
return chars.toString();
}
/** Look-up table for {@link #base64encodeToString(byte[])}. */
private static final char[] base64enc =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
private static final int maxLiteral = 65535;
/**
* This method stores Base64 encoded Python byte code in one or more String literals.
*
* While Java String objects are limited only by the address range of arrays, the class file
* standard only supports literals representable in at most 65535 bytes of modified UTF-8. This
* method us used only with base64 Strings (therefore ASCII without nulls) and so each character
* occupies exactly 1 byte in the class file after encoding to UTF-8.
*
* To work within the 65535 byte limitation, the {@code code_str} is split into several literals
* with the following naming-scheme:
*
* - The marker-interface 'ContainsPyBytecode' indicates that a class contains (static final)
* literals of the following scheme:
*
- a prefix of '___' indicates a bytecode-containing string literal
*
- a number indicating the number of parts follows
*
- '0_' indicates that no splitting occurred
*
- otherwise another number follows, naming the index of the literal
*
- indexing starts at 0
*
* Examples:
*
* - {@code ___0_method1} contains bytecode for method1
*
- {@code ___2_0_method2} contains first part of method2's bytecode
*
- {@code ___2_1_method2} contains second part of method2's bytecode
*
* Note that this approach is provisional. In future, Jython might contain the bytecode directly
* as bytecode-objects. The current approach was feasible with far less complicated JVM
* bytecode-manipulation, but needs special treatment after class-loading.
*
* @param name of the method or function being generated
* @param code_str Base64 encoded CPython byte code
* @param module currently being defined as a class file
* @throws java.io.IOException
*/
private static void insert_code_str_to_classfile(String name, String code_str, Module module)
throws java.io.IOException {
if (code_str.length() <= maxLiteral) {
// This can go as a single literal
module.classfile.addFinalStringLiteral("___0_" + name, code_str);
} else {
// We need to split the code into several literals.
int splits = code_str.length() / maxLiteral;
if (code_str.length() % maxLiteral > 0) {
++splits;
}
int pos = 0, i = 0;
for (; pos + maxLiteral <= code_str.length(); ++i) {
module.classfile.addFinalStringLiteral("___" + splits + "_" + i + "_" + name,
code_str.substring(pos, pos + maxLiteral));
pos += maxLiteral;
}
if (i < splits) {
module.classfile.addFinalStringLiteral("___" + splits + "_" + i + "_" + name,
code_str.substring(pos));
}
}
}
/**
* Create and write a Python module as a Java class file.
*
* @param node AST of the module to write
* @param ostream stream onto which to write it
* @param name
* @param filename
* @param linenumbers
* @param printResults
* @param cflags
* @param mtime
* @throws Exception
*/
public static void compile(mod node, OutputStream ostream, String name, String filename,
boolean linenumbers, boolean printResults, CompilerFlags cflags, long mtime)
throws Exception {
try {
Module module = new Module(name, filename, linenumbers, mtime);
_module_init(node, module, printResults, cflags);
module.write(ostream);
} catch (MethodTooLargeException re) {
PyBytecode btcode = loadPyBytecode(filename, true);
int thresh = 22000;
/*
* No idea, how to determine at this point if a method is oversized, so we just try a
* threshold regarding Python code-length, while JVM restriction is actually about Java
* bytecode length. Anyway; given that code-lengths are strongly related, this should
* work well enough.
*/
while (true) { // Always enjoy to write a line like this :)
try {
List largest_m_codes = new ArrayList<>();
Stack buffer = new Stack<>();
// HashSet allCodes = new HashSet<>();
buffer.push(btcode);
// allCodes.add(btcode);
while (!buffer.isEmpty()) {
/*
* Probably this cannot yield cycles, so cycle-proof stuff is out-commented
* for now. (everything regarding 'allCodes')
*/
PyBytecode bcode = buffer.pop();
if (bcode.co_code.length > thresh) {
largest_m_codes.add(bcode);
} else {
/*
* If a function needs to be represented as CPython bytecode, we create
* all inner PyCode-items (classes, functions, methods) also as CPython
* bytecode implicitly, so no need to look at them individually. Maybe
* we can later optimize this such that inner methods can be
* JVM-bytecode as well (if not oversized themselves).
*/
for (PyObject item : bcode.co_consts) {
if (item instanceof PyBytecode /* && !allCodes.contains(item) */) {
PyBytecode mpbc = (PyBytecode) item;
buffer.push(mpbc);
// allCodes.add(mpbc);
}
}
}
}
Module module = new Module(name, filename, linenumbers, mtime);
module.oversized_methods = new Hashtable<>(largest_m_codes.size());
int ov_id = 0;
String name_id;
for (PyBytecode largest_m_code : largest_m_codes) {
if (!PyCodeConstant.isJavaIdentifier(largest_m_code.co_name)) {
name_id = "f$_" + ov_id++;
} else {
name_id = largest_m_code.co_name + "$_" + ov_id++;
}
if (largest_m_code.co_name.equals("")) {
/*
* In Jython's opinion module begins at line 0 (while CPython reports
* line 1)
*/
module.oversized_methods.put(largest_m_code.co_name + 0, name_id);
} else {
module.oversized_methods.put(
largest_m_code.co_name + largest_m_code.co_firstlineno,
name_id);
}
String code_str = serializePyBytecode(largest_m_code);
insert_code_str_to_classfile(name_id, code_str, module);
}
module.classfile.addInterface(p(org.python.core.ContainsPyBytecode.class));
_module_init(node, module, printResults, cflags);
module.write(ostream);
break;
} catch (MethodTooLargeException e) {
thresh -= 1000;
}
if (thresh < 10000) {
/*
* This value should be well feasible by JVM-bytecode, so something else must be
* wrong.
*/
throw new RuntimeException(
"For unknown reason, too large method code couldn't be resolved"
+ "\nby PyBytecode-approach:\n" + filename);
}
}
}
}
public void emitNum(Num node, Code code) throws Exception {
if (node.getInternalN() instanceof PyInteger) {
integerConstant(((PyInteger)node.getInternalN()).getValue()).get(code);
} else if (node.getInternalN() instanceof PyLong) {
longConstant(((PyObject)node.getInternalN()).__str__().toString()).get(code);
} else if (node.getInternalN() instanceof PyFloat) {
floatConstant(((PyFloat)node.getInternalN()).getValue()).get(code);
} else if (node.getInternalN() instanceof PyComplex) {
complexConstant(((PyComplex)node.getInternalN()).imag).get(code);
}
}
public void emitStr(Str node, Code code) throws Exception {
PyString s = (PyString)node.getInternalS();
if (s instanceof PyUnicode) {
unicodeConstant(s.asString()).get(code);
} else {
stringConstant(s.asString()).get(code);
}
}
public boolean emitPrimitiveArraySetters(java.util.List extends PythonTree> nodes, Code code)
throws Exception {
final int n = nodes.size();
if (n < USE_SETTERS_LIMIT) {
return false; // Too small to matter, so bail
}
// Only attempt if all nodes are either Num or Str, otherwise bail
boolean primitive_literals = true;
for (int i = 0; i < n; i++) {
PythonTree node = nodes.get(i);
if (!(node instanceof Num || node instanceof Str)) {
primitive_literals = false;
}
}
if (!primitive_literals) {
return false;
}
final int num_setters = (n / MAX_SETTINGS_PER_SETTER) + 1;
code.iconst(n);
code.anewarray(p(PyObject.class));
for (int i = 0; i < num_setters; i++) {
Code setter = this.classfile.addMethod("set$$" + setter_count,
sig(Void.TYPE, PyObject[].class), ACC_STATIC | ACC_PRIVATE);
for (int j = 0; (j < MAX_SETTINGS_PER_SETTER)
&& ((i * MAX_SETTINGS_PER_SETTER + j) < n); j++) {
setter.aload(0);
setter.iconst(i * MAX_SETTINGS_PER_SETTER + j);
PythonTree node = nodes.get(i * MAX_SETTINGS_PER_SETTER + j);
if (node instanceof Num) {
emitNum((Num)node, setter);
} else if (node instanceof Str) {
emitStr((Str)node, setter);
}
setter.aastore();
}
setter.return_();
code.dup();
code.invokestatic(this.classfile.name, "set$$" + setter_count,
sig(Void.TYPE, PyObject[].class));
setter_count++;
}
return true;
}
}