src.org.python.indexer.Indexer Maven / Gradle / Ivy
/**
* Copyright 2009, Google Inc. All rights reserved.
* Licensed to PSF under a Contributor Agreement.
*/
package org.python.indexer;
import org.python.indexer.ast.NNode;
import org.python.indexer.ast.NModule;
import org.python.indexer.ast.NName;
import org.python.indexer.ast.NUrl;
import org.python.indexer.types.NModuleType;
import org.python.indexer.types.NType;
import org.python.indexer.types.NUnknownType;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.TreeMap;
import java.util.Map.Entry;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Indexes a set of Python files and builds a code graph.
* This class is not thread-safe.
*/
public class Indexer {
/**
* The global indexer instance. Provides convenient access to global
* resources, as well as easy cleanup of resources after the index is built.
*/
public static Indexer idx;
/**
* A scope containing bindings for all modules currently loaded by the indexer.
*/
public Scope moduleTable = new Scope(null, Scope.Type.GLOBAL);
/**
* The top-level (builtin) scope.
*/
public Scope globaltable = new Scope(null, Scope.Type.GLOBAL);
/**
* A map of all bindings created, keyed on their qnames.
*/
private Map allBindings = new HashMap();
/**
* A map of references to their referenced bindings. Most nodes will refer
* to a single binding, and few ever refer to more than two. One situation
* in which a multiple reference can occur is the an attribute of a union
* type. For instance:
*
*
* class A:
* def foo(self): pass
* class B:
* def foo(self): pass
* if some_condition:
* var = A()
* else
* var = B()
* var.foo() # foo here refers to both A.foo and B.foo
*
*/
private Map> locations = new HashMap>();
/**
* Diagnostics.
*/
public Map> problems = new HashMap>();
public Map> parseErrs = new HashMap>();
public String currentFile = null;
public String projDir = null;
public List path = new ArrayList();
/**
* Manages a store of serialized ASTs. ANTLR parsing is one of the slower and
* more expensive phases of indexing; reusing parse trees can help with resource
* utilization when indexing several projects (or re-indexing one project).
*/
private AstCache astCache;
/**
* When resolving imports we look in various possible locations.
* This set keeps track of modules we attempted but didn't find.
*/
public Set failedModules = new HashSet();
/**
* This set tracks module imports that could not be resolved.
*/
private Map> unresolvedModules = new TreeMap>();
/**
* Manages the built-in modules -- that is, modules from the standard Python
* library that are implemented in C and consequently have no Python source.
*/
public Builtins builtins;
private boolean aggressiveAssertions;
// stats counters
private int nloc = 0;
private int nunbound = 0;
private int nunknown = 0;
private int nprob = 0;
private int nparsing = 0;
private int loadedFiles = 0;
private Logger logger = Logger.getLogger(Indexer.class.getCanonicalName());
public Indexer() {
idx = this;
builtins = new Builtins(globaltable, moduleTable);
builtins.init();
}
public void setLogger(Logger logger) {
if (logger == null) {
throw new IllegalArgumentException("null logger param");
}
logger = logger;
}
public Logger getLogger() {
return logger;
}
public void setProjectDir(String cd) throws IOException {
projDir = Util.canonicalize(cd);
}
/**
* Configures whether the indexer should abort with an exception when it
* encounters an internal error or unexpected program state. Normally the
* indexer attempts to continue indexing, on the assumption that having an
* index with mostly good data is better than having no index at all.
* Enabling aggressive assertions is useful for debugging the indexer.
*/
public void enableAggressiveAssertions(boolean enable) {
aggressiveAssertions = enable;
}
public boolean aggressiveAssertionsEnabled() {
return aggressiveAssertions;
}
/**
* If aggressive assertions are enabled, propages the passed
* {@link Throwable}, wrapped in an {@link IndexingException}.
* @param msg descriptive message; ok to be {@code null}
* @throws IndexingException
*/
public void handleException(String msg, Throwable cause) {
// Stack overflows are still fairly common due to cyclic
// types, and they take up an awful lot of log space, so we
// don't log the whole trace by default.
if (cause instanceof StackOverflowError) {
logger.log(Level.WARNING, msg, cause);
return;
}
if (aggressiveAssertionsEnabled()) {
if (msg != null) {
throw new IndexingException(msg, cause);
}
throw new IndexingException(cause);
}
if (msg == null)
msg = "";
if (cause == null)
cause = new Exception();
logger.log(Level.WARNING, msg, cause);
}
/**
* Signals a failed assertion about the state of the indexer or index.
* If aggressive assertions are enabled, throws an {@code IndexingException}.
* Otherwise the message is logged as a warning, and indexing continues.
* @param msg a descriptive message about the problem
* @see enableAggressiveAssertions
* @throws IndexingException
*/
public void reportFailedAssertion(String msg) {
if (aggressiveAssertionsEnabled()) {
throw new IndexingException(msg, new Exception()); // capture stack
}
// Need more configuration control here.
// Currently getting a hillion jillion of these in large clients.
if (false) {
logger.log(Level.WARNING, msg);
}
}
/**
* Adds the specified absolute paths to the module search path.
*/
public void addPaths(List p) throws IOException {
for (String s : p) {
addPath(s);
}
}
/**
* Adds the specified absolute path to the module search path.
*/
public void addPath(String p) throws IOException {
path.add(Util.canonicalize(p));
}
/**
* Sets the module search path to the specified list of absolute paths.
*/
public void setPath(List path) throws IOException {
this.path = new ArrayList(path.size());
addPaths(path);
}
/**
* Returns the module search path -- the project directory followed by any
* paths that were added by {@link addPath}.
*/
public List getLoadPath() {
List loadPath = new ArrayList();
if (projDir != null) {
loadPath.add(projDir);
}
loadPath.addAll(path);
return loadPath;
}
public boolean isLibFile(String file) {
if (file.startsWith("/")) {
return true;
}
if (path != null) {
for (String p : path) {
if (file.startsWith(p)) {
return true;
}
}
}
return false;
}
/**
* Returns the mutable set of all bindings collected, keyed on their qnames.
*/
public Map getBindings() {
return allBindings;
}
/**
* Return the binding for {@code qname}, or {@code null} if not known.
*/
public NBinding lookupQname(String qname) {
return allBindings.get(qname);
}
/**
* Return the type for {@code qname}, or {@code null} if not known.
* @throws IllegalStateException if {@link #ready} has not been called.
*/
public NType lookupQnameType(String qname) {
NBinding b = lookupQname(qname);
if (b != null) {
return b.followType();
}
return null;
}
NModuleType getCachedModule(String file) {
return (NModuleType)moduleTable.lookupType(file);
}
/**
* Returns (loading/resolving if necessary) the module for a given source path.
* @param file absolute file path
*/
public NModuleType getModuleForFile(String file) throws Exception {
if (failedModules.contains(file)) {
return null;
}
NModuleType m = getCachedModule(file);
if (m != null) {
return m;
}
return loadFile(file);
}
/**
* Returns the list, possibly empty but never {@code null}, of
* errors and warnings generated in the file.
*/
public List getDiagnosticsForFile(String file) {
List errs = problems.get(file);
if (errs != null) {
return errs;
}
return new ArrayList();
}
/**
* Create an outline for a file in the index.
* @param path the file for which to build the outline
* @return a list of entries constituting the file outline.
* Returns an empty list if the indexer hasn't indexed that path.
*/
public List generateOutline(String file) throws Exception {
return new Outliner().generate(this, file);
}
/**
* Add a reference to binding {@code b} at AST node {@code node}.
* @param node a node referring to a name binding. Typically a
* {@link NName}, {@link NStr} or {@link NUrl}.
*/
public void putLocation(NNode node, NBinding b) {
if (node == null) {
return;
}
putLocation(new Ref(node), b);
}
public void putLocation(Ref ref, NBinding b) {
if (ref == null) {
return;
}
List bindings = locations.get(ref);
if (bindings == null) {
// The indexer is heavily memory-constrained, so we need small overhead.
// Empirically using a capacity-1 ArrayList for the binding set
// uses about 1/2 the memory of a LinkedList, and 1/4 the memory
// of a default HashSet.
bindings = new ArrayList(1);
locations.put(ref, bindings);
}
if (!bindings.contains(b)) {
bindings.add(b);
// Having > 1 is often an indicator of an indexer bug:
// if (bindings.size() > 1) {
// info("now have " + bindings.size() + " bindings for " + ref + " in " +
// ref.getFile() + ": " + bindings);
// }
}
b.addRef(ref);
}
/**
* Add {@code node} as a reference to binding {@code b}, removing
* {@code node} from any other binding ref-lists that it may have occupied.
* Currently only used in retargeting attribute references from provisional
* bindings once the actual binding is determined.
*/
public void updateLocation(Ref node, NBinding b) {
if (node == null) {
return;
}
List bindings = locations.get(node);
if (bindings == null) {
bindings = new ArrayList(1);
locations.put(node, bindings);
} else {
for (NBinding oldb : bindings) {
oldb.removeRef(node);
}
bindings.clear();
}
if (!bindings.contains(b)) {
bindings.add(b);
}
b.addRef(node);
}
public void removeBinding(NBinding b) {
allBindings.remove(b);
}
public NBinding putBinding(NBinding b) {
if (b == null) {
throw new IllegalArgumentException("null binding arg");
}
String qname = b.getQname();
if (qname == null || qname.length() == 0) {
throw new IllegalArgumentException("Null/empty qname: " + b);
}
NBinding existing = allBindings.get(qname);
if (existing == b) {
return b;
}
if (existing != null) {
duplicateBindingFailure(b, existing);
// A bad edge case was triggered by an __init__.py that defined a
// "Parser" binding (type unknown), and there was a Parser.py in the
// same directory. Loading Parser.py resulted in infinite recursion.
//
// XXX: need to revisit this logic. It seems that bindings made in
// __init__.py probably (?) ought to have __init__ in their qnames
// to avoid dup-binding conflicts. The Indexer module table also
// probably ought not be a normal scope -- it's different enough that
// overloading it to handle modules is making the logic rather twisty.
if (b.getKind() == NBinding.Kind.MODULE) {
return b;
}
return existing;
}
allBindings.put(qname, b);
return b;
}
private void duplicateBindingFailure(NBinding newb, NBinding oldb) {
// XXX: this seems to happen only (and always) for duplicated
// class or function defs in the same scope. Need to figure out
// what the right thing is for this scenario.
if (true) {
return;
}
StringBuilder sb = new StringBuilder();
sb.append("Error creating binding ");
sb.append(newb);
sb.append(" in file ");
sb.append(newb.getFirstFile());
sb.append(": qname already bound to ");
sb.append(oldb);
sb.append(" in file ");
sb.append(oldb.getFirstFile());
reportFailedAssertion(sb.toString());
}
public void putProblem(NNode loc, String msg) {
String file;
if (loc != null && ((file = loc.getFile()) != null)) {
addFileErr(file, loc.start(), loc.end(), msg);
}
}
public void putProblem(String file, int beg, int end, String msg) {
if (file != null) {
addFileErr(file, beg, end, msg);
}
}
void addFileErr(String file, int beg, int end, String msg) {
List msgs = getFileErrs(file, problems);
msgs.add(new Diagnostic(file, Diagnostic.Type.ERROR, beg, end, msg));
}
List getParseErrs(String file) {
return getFileErrs(file, parseErrs);
}
List getFileErrs(String file, Map> map) {
List msgs = map.get(file);
if (msgs == null) {
msgs = new ArrayList();
map.put(file, msgs);
}
return msgs;
}
/**
* Loads a file and all its ancestor packages.
* @see #loadFile(String,boolean)
*/
public NModuleType loadFile(String path) throws Exception {
return loadFile(path, false);
}
/**
* Loads a module from a string containing the module contents.
* Idempotent: looks in the module cache first. Used for simple unit tests.
* @param path a path for reporting/caching purposes. The filename
* component is used to construct the module qualified name.
*/
public NModuleType loadString(String path, String contents) throws Exception {
NModuleType module = getCachedModule(path);
if (module != null) {
finer("\nusing cached module " + path + " [succeeded]");
return module;
}
return parseAndResolve(path, contents);
}
/**
* Load, parse and analyze a source file given its absolute path.
* By default, loads the entire ancestor package chain.
*
* @param path the absolute path to the file or directory.
* If it is a directory, it is suffixed with "__init__.py", and
* only that file is loaded from the directory.
*
* @param noparents {@code true} to skip loading ancestor packages
*
* @return {@code null} if {@code path} could not be loaded
*/
public NModuleType loadFile(String path, boolean skipChain) throws Exception {
File f = new File(path);
if (f.isDirectory()) {
finer("\n loading init file from directory: " + path);
f = Util.joinPath(path, "__init__.py");
path = f.getAbsolutePath();
}
if (!f.canRead()) {
finer("\nfile not not found or cannot be read: " + path);
return null;
}
NModuleType module = getCachedModule(path);
if (module != null) {
finer("\nusing cached module " + path + " [succeeded]");
return module;
}
if (!skipChain) {
loadParentPackage(path);
}
try {
return parseAndResolve(path);
} catch (StackOverflowError soe) {
handleException("Error loading " + path, soe);
return null;
}
}
/**
* When we load a module, load all its parent packages, top-down.
* This is in part because Python does it anyway, and in part so that you
* can click on all parent package components in import statements.
* We load whole ancestor chain top-down, as does Python.
*/
private void loadParentPackage(String file) throws Exception {
File f = new File(file);
File parent = f.getParentFile();
if (parent == null || isInLoadPath(parent)) {
return;
}
// the parent package of an __init__.py file is the grandparent dir
if (parent != null && f.isFile() && "__init__.py".equals(f.getName())) {
parent = parent.getParentFile();
}
if (parent == null || isInLoadPath(parent)) {
return;
}
File initpy = Util.joinPath(parent, "__init__.py");
if (!(initpy.isFile() && initpy.canRead())) {
return;
}
loadFile(initpy.getPath());
}
private boolean isInLoadPath(File dir) {
for (String s : getLoadPath()) {
if (new File(s).equals(dir)) {
return true;
}
}
return false;
}
private NModuleType parseAndResolve(String file) throws Exception {
return parseAndResolve(file, null);
}
/**
* Parse a file or string and return its module parse tree.
* @param file the filename
* @param contents optional file contents. If {@code null}, loads the
* file contents from disk.
*/
@SuppressWarnings("unchecked")
private NModuleType parseAndResolve(String file, String contents) throws Exception {
// Avoid infinite recursion if any caller forgets this check. (Has happened.)
NModuleType nmt = (NModuleType)moduleTable.lookupType(file);
if (nmt != null) {
return nmt;
}
// Put it in the cache now to prevent circular import from recursing.
NModuleType mod = new NModuleType(Util.moduleNameFor(file), file, globaltable);
moduleTable.put(file, new NUrl("file://" + file), mod, NBinding.Kind.MODULE);
try {
NModule ast = null;
if (contents != null) {
ast = getAstForFile(file, contents);
} else {
ast = getAstForFile(file);
}
if (ast == null) {
return null;
}
finer("resolving: " + file);
ast.resolve(globaltable);
finer("[success]");
loadedFiles++;
return mod;
} catch (OutOfMemoryError e) {
if (astCache != null) {
astCache.clear();
}
System.gc();
return null;
}
}
private AstCache getAstCache() throws Exception {
if (astCache == null) {
astCache = AstCache.get();
}
return astCache;
}
/**
* Returns the syntax tree for {@code file}.
*/
public NModule getAstForFile(String file) throws Exception {
return getAstCache().getAST(file);
}
/**
* Returns the syntax tree for {@code file}.
*/
public NModule getAstForFile(String file, String contents) throws Exception {
return getAstCache().getAST(file, contents);
}
public NModuleType getBuiltinModule(String qname) throws Exception {
return builtins.get(qname);
}
/**
* This method searches the module path for the module {@code modname}.
* If found, it is passed to {@link #loadFile}.
*
*
The mechanisms for importing modules are in general statically
* undecidable. We make a reasonable effort to follow the most common
* lookup rules.
*
* @param modname a module name. Can be a relative path to a directory
* or a file (without the extension) or a possibly-qualified
* module name. If it is a module name, cannot contain leading dots.
*
* @see http://docs.python.org/reference/simple_stmts.html#the-import-statement
*/
public NModuleType loadModule(String modname) throws Exception {
if (failedModules.contains(modname)) {
return null;
}
NModuleType cached = getCachedModule(modname); // builtin file-less modules
if (cached != null) {
finer("\nusing cached module " + modname);
return cached;
}
NModuleType mt = getBuiltinModule(modname);
if (mt != null) {
return mt;
}
finer("looking for module " + modname);
if (modname.endsWith(".py")) {
modname = modname.substring(0, modname.length() - 3);
}
String modpath = modname.replace('.', '/');
// A nasty hack to avoid e.g. python2.5 becoming python2/5.
// Should generalize this for directory components containing '.'.
modpath = modpath.replaceFirst("(/python[23])/([0-9]/)", "$1.$2");
List loadPath = getLoadPath();
for (String p : loadPath) {
String dirname = p + modpath;
String pyname = dirname + ".py";
String initname = Util.joinPath(dirname, "__init__.py").getAbsolutePath();
String name;
// foo/bar has priority over foo/bar.py
// http://www.python.org/doc/essays/packages.html
if (Util.isReadableFile(initname)) {
name = initname;
} else if (Util.isReadableFile(pyname)) {
name = pyname;
} else {
continue;
}
name = Util.canonicalize(name);
NModuleType m = loadFile(name);
if (m != null) {
finer("load of module " + modname + "[succeeded]");
return m;
}
}
finer("failed to find module " + modname + " in load path");
failedModules.add(modname);
return null;
}
/**
* Load all Python source files recursively if the given fullname is a
* directory; otherwise just load a file. Looks at file extension to
* determine whether to load a given file.
*/
public void loadFileRecursive(String fullname) throws Exception {
File file_or_dir = new File(fullname);
if (file_or_dir.isDirectory()) {
for (File file : file_or_dir.listFiles()) {
loadFileRecursive(file.getAbsolutePath());
}
} else {
if (file_or_dir.getAbsolutePath().endsWith(".py")) {
loadFile(file_or_dir.getAbsolutePath());
}
}
}
/**
* Performs final indexing-building passes, including marking references to
* undeclared variables. Caller should invoke this method after loading all
* files.
*/
public void ready() {
fine("Checking for undeclared variables");
for (Entry> ent : locations.entrySet()) {
Ref ref = ent.getKey();
List bindings = ent.getValue();
convertCallToNew(ref, bindings);
if (countDefs(bindings) == 0) {
// XXX: fix me:
// if (ref instanceof NName && ((NName)ref).isAttribute()) {
// nunknown++; // not so serious
// } else {
// nunbound++; // more serious
// putProblem(ref, "variable may not be bound: " + ref);
// }
} else {
nloc++;
}
}
nprob = problems.size();
nparsing = parseErrs.size();
Set removals = new HashSet();
for (Entry e : allBindings.entrySet()) {
NBinding nb = e.getValue();
if (nb.isProvisional() || nb.getNumDefs() == 0) {
removals.add(e.getKey());
}
}
for (String s : removals) {
allBindings.remove(s);
}
locations.clear();
}
private void convertCallToNew(Ref ref, List bindings) {
if (ref.isRef()) {
return;
}
if (bindings.isEmpty()) {
return;
}
NBinding nb = bindings.get(0);
NType t = nb.followType();
if (t.isUnionType()) {
t = t.asUnionType().firstKnownNonNullAlternate();
if (t == null) {
return;
}
}
NType tt = t.follow();
if (!tt.isUnknownType() && !tt.isFuncType()) {
ref.markAsNew();
}
}
/**
* Clears the AST cache (to free up memory). Subsequent calls to
* {@link #getAstForFile} will either fetch the serialized AST from a
* disk cache or re-parse the file from scratch.
*/
public void clearAstCache() {
if (astCache != null) {
astCache.clear();
}
}
/**
* Clears the module table, discarding all resolved ASTs (modules)
* and their scope information.
*/
public void clearModuleTable() {
moduleTable.clear();
moduleTable = new Scope(null, Scope.Type.GLOBAL);
clearAstCache();
}
private int countDefs(List bindings) {
int count = 0;
for (NBinding b : bindings) {
count += b.getNumDefs();
}
return count;
}
private String printBindings() {
StringBuilder sb = new StringBuilder();
Set sorter = new TreeSet();
sorter.addAll(allBindings.keySet());
for (String key : sorter) {
NBinding b = allBindings.get(key);
sb.append(b.toString()).append("\n");
}
return sb.toString();
}
/**
* Reports a failed module or submodule resolution.
* @param qname module qname, e.g. "org.foo.bar"
* @param file the file where the unresolved import occurred
*/
public void recordUnresolvedModule(String qname, String file) {
Set importers = unresolvedModules.get(qname);
if (importers == null) {
importers = new TreeSet();
unresolvedModules.put(qname, importers);
}
importers.add(file);
}
/**
* Report resolution rate and other statistics data.
*/
public String getStatusReport() {
int total = nloc + nunbound + nunknown;
if (total == 0) {
total = 1;
}
StringBuilder sb = new StringBuilder();
sb.append("Summary: \n")
.append("- modules loaded: ")
.append(loadedFiles)
.append("\n- unresolved modules: ")
.append(unresolvedModules.size())
.append("\n");
for (String s : unresolvedModules.keySet()) {
sb.append(s).append(": ");
Set importers = unresolvedModules.get(s);
if (importers.size() > 5) {
sb.append(importers.iterator().next());
sb.append(" and " );
sb.append(importers.size());
sb.append(" more");
} else {
String files = importers.toString();
sb.append(files.substring(1, files.length() - 1));
}
sb.append("\n");
}
// XXX: these are no longer accurate, and need to be fixed.
// .append("\nnames resolved: " .append(percent(nloc, total)
// .append("\nunbound: " .append(percent(nunbound, total)
// .append("\nunknown: " .append(percent(nunknown, total)
sb.append("\nsemantics problems: ").append(nprob);
sb.append("\nparsing problems: ").append(nparsing);
return sb.toString();
}
private String percent(int num, int total) {
double pct = (num * 1.0) / total;
pct = Math.round(pct * 10000) / 100.0;
return num + "/" + total + " = " + pct + "%";
}
public int numFilesLoaded() {
return loadedFiles;
}
public List getLoadedFiles() {
List files = new ArrayList();
for (String file : moduleTable.keySet()) {
if (file.startsWith("/")) {
files.add(file);
}
}
return files;
}
public void log(Level level, String msg) {
if (logger.isLoggable(level)) {
logger.log(level, msg);
}
}
public void severe(String msg) {
log(Level.SEVERE, msg);
}
public void warn(String msg) {
log(Level.WARNING, msg);
}
public void info(String msg) {
log(Level.INFO, msg);
}
public void fine(String msg) {
log(Level.FINE, msg);
}
public void finer(String msg) {
log(Level.FINER, msg);
}
/**
* Releases all resources for the current indexer.
*/
public void release() {
// Null things out to catch anyone who might still be referencing them.
moduleTable = globaltable = null;
clearAstCache();
astCache = null;
locations = null;
problems.clear();
problems = null;
parseErrs.clear();
parseErrs = null;
path.clear();
path = null;
failedModules.clear();
failedModules = null;
unresolvedModules.clear();
unresolvedModules = null;
builtins = null;
allBindings.clear();
allBindings = null;
// Technically this is all that's needed for the garbage collector.
idx = null;
}
@Override
public String toString() {
return "";
}
}