org.python.core.ParserFacade Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython-slim Show documentation
Show all versions of jython-slim Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented
language Python written in 100% Pure Java, and seamlessly integrated with
the Java platform. It thus allows you to run Python on any Java platform.
// Copyright (c) Corporation for National Research Initiatives
package org.python.core;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.UnsupportedCharsetException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.python.antlr.BaseParser;
import org.python.antlr.NoCloseReaderStream;
import org.python.antlr.ParseException;
import org.python.antlr.PythonPartialLexer;
import org.python.antlr.PythonPartialParser;
import org.python.antlr.PythonTokenSource;
import org.python.antlr.PythonTree;
import org.python.antlr.base.mod;
import org.python.core.io.StreamIO;
import org.python.core.io.TextIOInputStream;
import org.python.core.io.UniversalIOWrapper;
import org.python.core.util.StringUtil;
/**
* Facade for the classes in the org.python.antlr package.
*/
public class ParserFacade {
private static int MARK_LIMIT = 100000;
private ParserFacade() {}
private static String getLine(ExpectedEncodingBufferedReader reader, int line) {
if (reader == null) {
return "";
}
String text = null;
try {
for (int i = 0; i < line; i++) {
text = reader.readLine();
}
if (text == null) {
return text;
}
if (reader.encoding != null) {
// The parser used a non-latin encoding: re-encode chars to bytes.
Charset cs = Charset.forName(reader.encoding);
ByteBuffer decoded = cs.encode(text);
text = StringUtil.fromBytes(decoded);
}
return text + "\n";
} catch (IOException ioe) {
}
return text;
}
// if reader != null, reset it
public static PyException fixParseError(ExpectedEncodingBufferedReader reader,
Throwable t,
String filename) {
if (reader != null) {
try {
reader.reset();
} catch (IOException e) {
reader = null;
}
}
if (t instanceof ParseException) {
ParseException e = (ParseException)t;
PythonTree node = (PythonTree)e.node;
int line=e.line;
int col=e.charPositionInLine;
if (node != null) {
line = node.getLineno();
col = node.getCol_offset();
}
String text= getLine(reader, line);
String msg = e.getMessage();
if (e.getType() == Py.IndentationError) {
return new PyIndentationError(msg, line, col, text, filename);
}
PyException pye = new PySyntaxError(msg, line, col, text, filename);
if (e.definite) {
// The error cannot be fixed by reading more input
throw pye;
} else {
// The error might be fixed by reading more input
return pye;
}
} else if (t instanceof CharacterCodingException) {
String msg;
if (reader.encoding == null) {
msg = "Non-ASCII character in file '" + filename + "', but no encoding declared"
+ "; see http://www.python.org/peps/pep-0263.html for details";
} else {
msg = "Illegal character in file '" + filename + "' for encoding '"
+ reader.encoding + "'";
}
throw Py.SyntaxError(msg);
} else {
return Py.JavaError(t);
}
}
/**
* Parse Python source as either an expression (if possible) or module.
*
* Designed for use by a JSR 223 implementation: "the Scripting API does not distinguish
* between scripts which return values and those which do not, nor do they make the
* corresponding distinction between evaluating or executing objects." (SCR.4.2.1)
*/
public static mod parseExpressionOrModule(Reader reader,
String filename,
CompilerFlags cflags) {
ExpectedEncodingBufferedReader bufReader = null;
try {
bufReader = prepBufReader(reader, cflags, filename);
// first, try parsing as an expression
return parse(bufReader, CompileMode.eval, filename, cflags);
} catch (Throwable t) {
if (bufReader == null)
{
throw Py.JavaError(t); // can't do any more
}
try {
// then, try parsing as a module
bufReader.reset();
return parse(bufReader, CompileMode.exec, filename, cflags);
} catch (Throwable tt) {
throw fixParseError(bufReader, tt, filename);
}
}
}
/**
* Internal parser entry point.
*
* Users of this method should call fixParseError on any Throwable thrown
* from it, to translate ParserExceptions into PySyntaxErrors or
* PyIndentationErrors.
*/
private static mod parse(ExpectedEncodingBufferedReader reader,
CompileMode kind,
String filename,
CompilerFlags cflags) throws Throwable {
reader.mark(MARK_LIMIT); // We need the ability to move back on the
// reader, for the benefit of fixParseError and
// validPartialSentence
if (kind != null) {
CharStream cs = new NoCloseReaderStream(reader);
BaseParser parser = new BaseParser(cs, filename, cflags);
return kind.dispatch(parser);
} else {
throw Py.ValueError("parse kind must be eval, exec, or single");
}
}
public static mod parse(Reader reader,
CompileMode kind,
String filename,
CompilerFlags cflags) {
ExpectedEncodingBufferedReader bufReader = null;
try {
bufReader = prepBufReader(reader, cflags, filename);
return parse(bufReader, kind, filename, cflags );
} catch (Throwable t) {
throw fixParseError(bufReader, t, filename);
} finally {
close(bufReader);
}
}
public static mod parse(InputStream stream,
CompileMode kind,
String filename,
CompilerFlags cflags) {
ExpectedEncodingBufferedReader bufReader = null;
try {
// prepBufReader takes care of encoding detection and universal
// newlines:
bufReader = prepBufReader(stream, cflags, filename, false);
return parse(bufReader, kind, filename, cflags );
} catch (Throwable t) {
throw fixParseError(bufReader, t, filename);
} finally {
close(bufReader);
}
}
public static mod parse(String string,
CompileMode kind,
String filename,
CompilerFlags cflags) {
ExpectedEncodingBufferedReader bufReader = null;
try {
bufReader = prepBufReader(string, cflags, filename);
return parse(bufReader, kind, filename, cflags);
} catch (Throwable t) {
throw fixParseError(bufReader, t, filename);
} finally {
close(bufReader);
}
}
public static mod partialParse(String string,
CompileMode kind,
String filename,
CompilerFlags cflags,
boolean stdprompt) {
// XXX: What's the idea of the stdprompt argument?
ExpectedEncodingBufferedReader reader = null;
try {
reader = prepBufReader(string, cflags, filename);
return parse(reader, kind, filename, cflags);
} catch (Throwable t) {
PyException p = fixParseError(reader, t, filename);
if (reader != null && validPartialSentence(reader, kind, filename)) {
return null;
}
throw p;
} finally {
close(reader);
}
}
private static boolean validPartialSentence(BufferedReader bufreader, CompileMode kind, String filename) {
PythonPartialLexer lexer = null;
try {
bufreader.reset();
CharStream cs = new NoCloseReaderStream(bufreader);
lexer = new PythonPartialLexer(cs);
CommonTokenStream tokens = new CommonTokenStream(lexer);
PythonTokenSource indentedSource = new PythonTokenSource(tokens, filename);
tokens = new CommonTokenStream(indentedSource);
PythonPartialParser parser = new PythonPartialParser(tokens);
switch (kind) {
case single:
parser.single_input();
break;
case eval:
parser.eval_input();
break;
default:
return false;
}
} catch (Exception e) {
return lexer.eofWhileNested;
}
return true;
}
private static class ExpectedEncodingBufferedReader extends BufferedReader {
/**
* The encoding from the source file, or null if none was specified and ascii is being used.
*/
public final String encoding;
public ExpectedEncodingBufferedReader(Reader in, String encoding) {
super(in);
this.encoding = encoding;
}
}
private static ExpectedEncodingBufferedReader prepBufReader(Reader reader,
CompilerFlags cflags,
String filename)
throws IOException {
cflags.source_is_utf8 = true;
cflags.encoding = "utf-8";
BufferedReader bufferedReader = new BufferedReader(reader);
bufferedReader.mark(MARK_LIMIT);
if (findEncoding(bufferedReader) != null) {
throw new ParseException("encoding declaration in Unicode string");
}
bufferedReader.reset();
return new ExpectedEncodingBufferedReader(bufferedReader, null);
}
private static ExpectedEncodingBufferedReader prepBufReader(InputStream input,
CompilerFlags cflags,
String filename,
boolean fromString)
throws IOException {
return prepBufReader(input, cflags, filename, fromString, true);
}
private static ExpectedEncodingBufferedReader prepBufReader(InputStream input,
CompilerFlags cflags,
String filename,
boolean fromString,
boolean universalNewlines)
throws IOException {
input = new BufferedInputStream(input);
boolean bom = adjustForBOM(input);
String encoding = readEncoding(input);
if (encoding == null) {
if (bom) {
encoding = "utf-8";
} else if (cflags != null && cflags.encoding != null) {
encoding = cflags.encoding;
}
}
if (cflags.source_is_utf8) {
if (encoding != null) {
throw new ParseException("encoding declaration in Unicode string");
}
encoding = "utf-8";
}
cflags.encoding = encoding;
if (universalNewlines) {
// Enable universal newlines mode on the input
StreamIO rawIO = new StreamIO(input, true);
org.python.core.io.BufferedReader bufferedIO =
new org.python.core.io.BufferedReader(rawIO, 0);
UniversalIOWrapper textIO = new UniversalIOWrapper(bufferedIO);
input = new TextIOInputStream(textIO);
}
Charset cs;
try {
// Use ascii for the raw bytes when no encoding was specified
if (encoding == null) {
if (fromString) {
cs = Charset.forName("ISO-8859-1");
} else {
cs = Charset.forName("ascii");
}
} else {
cs = Charset.forName(encoding);
}
} catch (UnsupportedCharsetException exc) {
throw new PySyntaxError("Unknown encoding: " + encoding, 1, 0, "", filename);
}
CharsetDecoder dec = cs.newDecoder();
dec.onMalformedInput(CodingErrorAction.REPORT);
dec.onUnmappableCharacter(CodingErrorAction.REPORT);
return new ExpectedEncodingBufferedReader(new InputStreamReader(input, dec), encoding);
}
private static ExpectedEncodingBufferedReader prepBufReader(String string,
CompilerFlags cflags,
String filename)
throws IOException {
if (cflags.source_is_utf8) {
return prepBufReader(new StringReader(string), cflags, filename);
}
byte[] stringBytes = StringUtil.toBytes(string);
return prepBufReader(new ByteArrayInputStream(stringBytes), cflags, filename, true, false);
}
/**
* Check for a BOM mark at the beginning of stream. If there is a BOM
* mark, advance the stream passed it. If not, reset() to start at the
* beginning of the stream again.
*
* Only checks for EF BB BF right now, since that is all that CPython 2.5
* Checks.
*
* @return true if a BOM was found and skipped.
* @throws ParseException if only part of a BOM is matched.
*
*/
private static boolean adjustForBOM(InputStream stream) throws IOException {
stream.mark(3);
int ch = stream.read();
if (ch == 0xEF) {
if (stream.read() != 0xBB) {
throw new ParseException("Incomplete BOM at beginning of file");
}
if (stream.read() != 0xBF) {
throw new ParseException("Incomplete BOM at beginning of file");
}
return true;
}
stream.reset();
return false;
}
private static String readEncoding(InputStream stream) throws IOException {
stream.mark(MARK_LIMIT);
String encoding = null;
BufferedReader br = new BufferedReader(new InputStreamReader(stream, "ISO-8859-1"), 512);
encoding = findEncoding(br);
// XXX: reset() can still raise an IOException if a line exceeds our large mark
// limit
stream.reset();
return encodingMap(encoding);
}
/**
* Reads the first two lines of the reader, searching for an encoding
* declaration.
*
* Note that reseting the reader (if needed) is responsibility of the caller.
*
* @return The declared encoding, or null if no encoding declaration is
* found
*/
private static String findEncoding(BufferedReader br)
throws IOException {
String encoding = null;
for (int i = 0; i < 2; i++) {
String strLine = br.readLine();
if (strLine == null) {
break;
}
String result = matchEncoding(strLine);
if (result != null) {
encoding = result;
break;
}
}
return encoding;
}
private static String encodingMap(String encoding) {
if (encoding == null) {
return null;
}
if (encoding.equals("Latin-1") || encoding.equals("latin-1")) {
return "ISO8859_1";
}
return encoding;
}
private static final Pattern pep263EncodingPattern = Pattern.compile("#.*coding[:=]\\s*([-\\w.]+)");
private static String matchEncoding(String inputStr) {
Matcher matcher = pep263EncodingPattern.matcher(inputStr);
boolean matchFound = matcher.find();
if (matchFound && matcher.groupCount() == 1) {
String groupStr = matcher.group(1);
return groupStr;
}
return null;
}
private static void close(BufferedReader reader) {
try {
if (reader != null) {
reader.close();
}
} catch (IOException i) {
// XXX: Log the error?
}
}
}