com.google.javascript.jscomp.DataFlowAnalysis Maven / Gradle / Ivy

Go to download
/*
 * Copyright 2008 The Closure Compiler Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.javascript.jscomp;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;

import com.google.common.collect.ImmutableList;
import com.google.javascript.jscomp.ControlFlowGraph.Branch;
import com.google.javascript.jscomp.NodeTraversal.AbstractPostOrderCallback;
import com.google.javascript.jscomp.graph.Annotation;
import com.google.javascript.jscomp.graph.DiGraph.DiGraphNode;
import com.google.javascript.jscomp.graph.LatticeElement;
import com.google.javascript.jscomp.parsing.parser.util.format.SimpleFormat;
import com.google.javascript.rhino.Node;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;

/**
 * A framework to help writing static program analysis. A subclass of
 * this framework should specify how a single node changes the state
 * of a program. This class finds a safe estimate (a fixed-point) for
 * the whole program. The proven facts about the program will be
 * annotated with
 * {@link com.google.javascript.jscomp.graph.GraphNode#setAnnotation} to the
 * given control flow graph's nodes in form of {@link LatticeElement}
 * after calling {@link #analyze()}.
 *
 * As a guideline, the following is a list of behaviors that any analysis
 * can take:
 * 

 * Flow Direction: Is the analysis a forward or backward analysis?
 * 
Lattice Elements: How does the analysis represent the state of the
 * program at any given point?
 * 
JOIN Operation: Given two incoming paths and a lattice state value, what
 * can the compiler conclude at the join point?
 * 
Flow Equations: How does an instruction modify the state of program in
 * terms of lattice values?
 * 
Initial Entry Value: What can the compiler assume at the beginning of the
 * program?
 * 
Initial Estimate: What can the compiler assume at each point of the
 * program? (What is the BOTTOM value of the lattice) By definition this lattice
 * JOIN {@code x} for any {@code x} must also be {@code x}.
 * 
 * To make these behaviors known to the framework, the following steps must be
 * taken.
 * 
 * Flow Direction: Implement {@link #isForward()}.
 * 
Lattice Elements: Implement {@link LatticeElement}.
 * 
JOIN Operation: Implement
 *    {@link JoinOp#apply}.
 * 
Flow Equations: Implement
 * {@link #flowThrough(Object, LatticeElement)}.
 * 
Initial Entry Value: Implement {@link #createEntryLattice()}.
 * 
Initial Estimate: Implement {@link #createInitialEstimateLattice()}.
 * 
 *
 * Upon execution of the {@link #analyze()} method, nodes of the input
 * control flow graph will be annotated with a {@link FlowState} object that
 * represents maximum fixed point solution. Any previous annotations at the
 * nodes of the control flow graph will be lost.
 *
 * @param  The control flow graph's node value type.
 * @param  Lattice element type.
 */
abstract class DataFlowAnalysis {

  private final ControlFlowGraph cfg;
  final JoinOp joinOp;
  protected final Set> orderedWorkSet;

  /*
   * Feel free to increase this to a reasonable number if you are finding that
   * more and more passes need more steps before finding a fixed-point.
   * If you just have a special case, consider calling
   * {@link #analyze(int)} instead.
   */
  public static final int MAX_STEPS = 1000000;

  /**
   * Constructs a data flow analysis.
   *
   * 
Typical usage
   * 
   * DataFlowAnalysis dfa = ...
   * dfa.analyze();
   * 
   *
   * {@link #analyze()} annotates the result to the control flow graph by
   * means of {@link DiGraphNode#setAnnotation} without any
   * modification of the graph itself. Additional calls to {@link #analyze()}
   * recomputes the analysis which can be useful if the control flow graph
   * has been modified.
   *
   * @param targetCfg The control flow graph object that this object performs
   *     on. Modification of the graph requires a separate call to
   *     {@link #analyze()}.
   *
   * @see #analyze()
   */
  DataFlowAnalysis(ControlFlowGraph targetCfg, JoinOp joinOp) {
    this.cfg = targetCfg;
    this.joinOp = joinOp;
    Comparator> nodeComparator = cfg.getOptionalNodeComparator(isForward());
    if (nodeComparator != null) {
      this.orderedWorkSet = new TreeSet<>(nodeComparator);
    } else {
      this.orderedWorkSet = new LinkedHashSet<>();
    }
  }

  /**
   * Returns the control flow graph that this analysis was performed on.
   * Modifications can be done on this graph, however, the only time that the
   * annotations are correct is after {@link #analyze()} is called and before
   * the graph has been modified.
   */
  final ControlFlowGraph getCfg() {
    return cfg;
  }

  protected L join(L latticeA, L latticeB) {
    return joinOp.apply(ImmutableList.of(latticeA, latticeB));
  }

  /**
   * Checks whether the analysis is a forward flow analysis or backward flow
   * analysis.
   *
   * @return {@code true} if it is a forward analysis.
   */
  abstract boolean isForward();

  /**
   * Computes the output state for a given node given its input state.
   *
   * @param node The node.
   * @param input Input lattice that should be read-only.
   * @return Output lattice.
   */
  abstract L flowThrough(N node, L input);

  /**
   * Finds a fixed-point solution using at most {@link #MAX_STEPS}
   * iterations.
   *
   * @see #analyze(int)
   */
  final void analyze() {
    analyze(MAX_STEPS);
  }

  /**
   * Finds a fixed-point solution. The function has the side effect of replacing the existing node
   * annotations with the computed solutions using {@link
   * com.google.javascript.jscomp.graph.GraphNode#setAnnotation(Annotation)}.
   *
   * Initially, each node's input and output flow state contains the value given by {@link
   * #createInitialEstimateLattice()} (with the exception of the entry node of the graph which takes
   * on the {@link #createEntryLattice()} value. Each node will use the output state of its
   * predecessor and compute an output state according to the instruction. At that time, any nodes
   * that depend on the node's newly modified output value will need to recompute their output state
   * again. Each step will perform a computation at one node until no extra computation will modify
   * any existing output state anymore.
   *
   * @param maxSteps Max number of iterations before the method stops and throw a {@link
   *     MaxIterationsExceededException}. This will prevent the analysis from going into a infinite
   *     loop.
   */
  final void analyze(int maxSteps) {
    initialize();
    int step = 0;
    while (!orderedWorkSet.isEmpty()) {
      if (step > maxSteps) {
        throw new MaxIterationsExceededException(
            "Analysis did not terminate after " + maxSteps + " iterations");
      }
      DiGraphNode curNode = orderedWorkSet.iterator().next();
      orderedWorkSet.remove(curNode);
      joinInputs(curNode);
      if (flow(curNode)) {
        // If there is a change in the current node, we want to grab the list
        // of nodes that this node affects.
        List> nextNodes =
            isForward() ? cfg.getDirectedSuccNodes(curNode) : cfg.getDirectedPredNodes(curNode);

        for (DiGraphNode nextNode : nextNodes) {
          if (nextNode != cfg.getImplicitReturn()) {
            orderedWorkSet.add(nextNode);
          }
        }
      }
      step++;
    }
    if (isForward()) {
      joinInputs(getCfg().getImplicitReturn());
    }
  }

  /**
   * Gets the state of the initial estimation at each node.
   *
   * @return Initial state.
   */
  abstract L createInitialEstimateLattice();

  /**
   * Gets the incoming state of the entry node.
   *
   * @return Entry state.
   */
  abstract L createEntryLattice();

  /**
   * Initializes the work list and the control flow graph.
   */
  protected void initialize() {
    // TODO(user): Calling clear doesn't deallocate the memory in a
    // LinkedHashSet. Consider creating a new work set if we plan to repeatedly
    // call analyze.
    orderedWorkSet.clear();
    for (DiGraphNode node : cfg.getNodes()) {
      node.setAnnotation(new FlowState<>(createInitialEstimateLattice(),
          createInitialEstimateLattice()));
      if (node != cfg.getImplicitReturn()) {
        orderedWorkSet.add(node);
      }
    }
  }

  /**
   * Performs a single flow through a node.
   *
   * @return {@code true} if the flow state differs from the previous state.
   */
  protected boolean flow(DiGraphNode node) {
    FlowState state = node.getAnnotation();
    if (isForward()) {
      L outBefore = state.out;
      state.out = flowThrough(node.getValue(), state.in);
      return !outBefore.equals(state.out);
    } else {
      L inBefore = state.in;
      state.in = flowThrough(node.getValue(), state.out);
      return !inBefore.equals(state.in);
    }
  }

  /**
   * Computes the new flow state at a given node's entry by merging the
   * output (input) lattice of the node's predecessor (successor).
   *
   * @param node Node to compute new join.
   */
  protected void joinInputs(DiGraphNode node) {
    FlowState state = node.getAnnotation();
    if (isForward()) {
      if (cfg.getEntry() == node) {
        state.setIn(createEntryLattice());
      } else {
        List> inNodes = cfg.getDirectedPredNodes(node);
        if (inNodes.size() == 1) {
          FlowState inNodeState = inNodes.get(0).getAnnotation();
          state.setIn(inNodeState.getOut());
        } else if (inNodes.size() > 1) {
          List values = new ArrayList<>(inNodes.size());
          for (DiGraphNode currentNode : inNodes) {
            FlowState currentNodeState = currentNode.getAnnotation();
            values.add(currentNodeState.getOut());
          }
          state.setIn(joinOp.apply(values));
        }
      }
    } else {
      List> inNodes = cfg.getDirectedSuccNodes(node);
      if (inNodes.size() == 1) {
        DiGraphNode inNode = inNodes.get(0);
        if (inNode == cfg.getImplicitReturn()) {
          state.setOut(createEntryLattice());
        } else {
          FlowState inNodeState = inNode.getAnnotation();
          state.setOut(inNodeState.getIn());
        }
      } else if (inNodes.size() > 1) {
        List values = new ArrayList<>(inNodes.size());
        for (DiGraphNode currentNode : inNodes) {
          FlowState currentNodeState = currentNode.getAnnotation();
          values.add(currentNodeState.getIn());
        }
        state.setOut(joinOp.apply(values));
      }
    }
  }

  /**
   * The in and out states of a node.
   *
   * @param  Input and output lattice element type.
   */
  static class FlowState implements Annotation {
    private L in;
    private L out;

    /**
     * Private constructor. No other classes should create new states.
     *
     * @param inState Input.
     * @param outState Output.
     */
    private FlowState(L inState, L outState) {
      checkNotNull(inState);
      checkNotNull(outState);
      this.in = inState;
      this.out = outState;
    }

    L getIn() {
      return in;
    }

    void setIn(L in) {
      checkNotNull(in);
      this.in = in;
    }

    L getOut() {
      return out;
    }

    void setOut(L out) {
      checkNotNull(out);
      this.out = out;
    }

    @Override
    public String toString() {
      return SimpleFormat.format("IN: %s OUT: %s", in, out);
    }

    @Override
    public boolean equals(Object o) {
      if (o instanceof FlowState) {
        FlowState that = (FlowState) o;
        return that.in.equals(this.in)
            && that.out.equals(this.out);
      }
      return false;
    }

    @Override
    public int hashCode() {
      return Objects.hash(in, out);
    }
  }

  /**
   * The exception to be thrown if the analysis has been running for a long
   * number of iterations. Chances are the analysis is not monotonic, a
   * fixed-point cannot be found and it is currently stuck in an infinite loop.
   */
  static class MaxIterationsExceededException extends RuntimeException {
    private static final long serialVersionUID = 1L;
    MaxIterationsExceededException(String msg) {
      super(msg);
    }
  }

  abstract static class BranchedForwardDataFlowAnalysis
       extends DataFlowAnalysis {

    @Override
    protected void initialize() {
      orderedWorkSet.clear();
      for (DiGraphNode node : getCfg().getNodes()) {
        int outEdgeCount = getCfg().getOutEdges(node.getValue()).size();
        List outLattices = new ArrayList<>();
        for (int i = 0; i < outEdgeCount; i++) {
          outLattices.add(createInitialEstimateLattice());
        }
        node.setAnnotation(new BranchedFlowState<>(
            createInitialEstimateLattice(), outLattices));
        if (node != getCfg().getImplicitReturn()) {
          orderedWorkSet.add(node);
        }
      }
    }

    BranchedForwardDataFlowAnalysis(ControlFlowGraph targetCfg, JoinOp joinOp) {
      super(targetCfg, joinOp);
    }

    @Override
    final boolean isForward() {
      return true;
    }

    /**
     * The branched flow function maps a single lattice to a list of output
     * lattices.
     *
     * 
Each outgoing edge of a node will have a corresponding output lattice
     * in the ordered returned by
     * {@link com.google.javascript.jscomp.graph.DiGraph#getOutEdges(Object)}
     * in the returned list.
     *
     * @return A list of output values depending on the edge's branch type.
     */
    abstract List branchedFlowThrough(N node, L input);

    @Override
    protected final boolean flow(DiGraphNode node) {
      BranchedFlowState state = node.getAnnotation();
      List outBefore = state.out;
      state.out = branchedFlowThrough(node.getValue(), state.in);
      checkState(outBefore.size() == state.out.size());
      for (int i = 0; i < outBefore.size(); i++) {
        if (!outBefore.get(i).equals(state.out.get(i))) {
          return true;
        }
      }
      return false;
    }

    @Override
    protected void joinInputs(DiGraphNode node) {
      BranchedFlowState state = node.getAnnotation();
      List> predNodes = getCfg().getDirectedPredNodes(node);
      List values = new ArrayList<>(predNodes.size());

      for (DiGraphNode predNode : predNodes) {
        BranchedFlowState predNodeState = predNode.getAnnotation();

        L in = predNodeState.out.get(
            getCfg().getDirectedSuccNodes(predNode).indexOf(node));

        values.add(in);
      }
      if (getCfg().getEntry() == node) {
        state.setIn(createEntryLattice());
      } else if (!values.isEmpty()) {
        state.setIn(joinOp.apply(values));
      }
    }
  }

  /**
   * The in and out states of a node.
   *
   * @param  Input and output lattice element type.
   */
  static class BranchedFlowState
      implements Annotation {
    private L in;
    private List out;

    /**
     * Private constructor. No other classes should create new states.
     *
     * @param inState Input.
     * @param outState Output.
     */
    private BranchedFlowState(L inState, List outState) {
      checkNotNull(inState);
      checkNotNull(outState);
      this.in = inState;
      this.out = outState;
    }

    L getIn() {
      return in;
    }

    void setIn(L in) {
      checkNotNull(in);
      this.in = in;
    }

    @Override
    public String toString() {
      return SimpleFormat.format("IN: %s OUT: %s", in, out);
    }

    @Override
    public boolean equals(Object o) {
      if (o instanceof BranchedFlowState) {
        BranchedFlowState that = (BranchedFlowState) o;
        return that.in.equals(this.in)
            && that.out.equals(this.out);
      }
      return false;
    }

    @Override
    public int hashCode() {
      return Objects.hash(in, out);
    }
  }

  /**
   * Compute set of escaped variables. When a variable is escaped in a dataflow analysis, it can be
   * referenced outside of the code that we are analyzing. A variable is escaped if any of the
   * following is true:
   *
   * 1. Exported variables as they can be needed after the script terminates. 2. Names of named
   * functions because in JavaScript, function foo(){} does not kill foo in the dataflow.
   *
   * @param jsScope Must be a function scope
   */
  static void computeEscaped(
      final Scope jsScope,
      final Set escaped,
      AbstractCompiler compiler,
      SyntacticScopeCreator scopeCreator) {

    checkArgument(jsScope.isFunctionScope());

    AbstractPostOrderCallback finder =
        new AbstractPostOrderCallback() {
          @Override
          public void visit(NodeTraversal t, Node n, Node parent) {

            Node enclosingBlock = NodeUtil.getEnclosingFunction(n);
            if (jsScope.getRootNode() == enclosingBlock || !n.isName() || parent.isFunction()) {
              return;
            }

            String name = n.getString();
            Var var = t.getScope().getVar(name);
            if (var != null) {
              Node enclosingScopeNode = NodeUtil.getEnclosingFunction(var.getNode());

              if (enclosingScopeNode == jsScope.getRootNode()) {
                escaped.add(var);
              }
            }
          }
        };

    Map allVarsInFn = new HashMap<>();
    List orderedVars = new ArrayList<>();
    NodeUtil.getAllVarsDeclaredInFunction(
        allVarsInFn, orderedVars, compiler, scopeCreator, jsScope);
    NodeTraversal t = new NodeTraversal(compiler, finder, scopeCreator);
    t.traverseAtScope(jsScope);

    // TODO (simranarora) catch variables should not be considered escaped in ES6. Getting rid of
    // the catch check is causing breakages however
    for (Var var : allVarsInFn.values()) {
      if (var.getParentNode().isCatch()
          || compiler.getCodingConvention().isExported(var.getName())) {
        escaped.add(var);
      }
    }
  }
}