All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.re2j.Prog Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2020 The Go Authors. All rights reserved.
 *
 * Use of this source code is governed by a BSD-style
 * license that can be found in the LICENSE file.
 */
// Original Go source here:
// http://code.google.com/p/go/source/browse/src/pkg/regexp/syntax/prog.go

package com.google.re2j;

import java.util.Arrays;

/**
 * A Prog is a compiled regular expression program.
 */
final class Prog {

  Inst[] inst = new Inst[10];
  int instSize = 0;
  int start; // index of start instruction
  int numCap = 2; // number of CAPTURE insts in re
  // 2 => implicit ( and ) for whole match $0

  // Constructs an empty program.
  Prog() {}

  // Returns the instruction at the specified pc.
  // Precondition: pc > 0 && pc < numInst().
  Inst getInst(int pc) {
    return inst[pc];
  }

  // Returns the number of instructions in this program.
  int numInst() {
    return instSize;
  }

  // Adds a new instruction to this program, with operator |op| and |pc| equal
  // to |numInst()|.
  void addInst(int op) {
    if (instSize >= inst.length) {
      inst = Arrays.copyOf(inst, inst.length * 2);
    }
    inst[instSize] = new Inst(op);
    instSize++;
  }

  // skipNop() follows any no-op or capturing instructions and returns the
  // resulting instruction.
  Inst skipNop(int pc) {
    Inst i = inst[pc];
    while (i.op == Inst.NOP || i.op == Inst.CAPTURE) {
      i = inst[pc];
      pc = i.out;
    }
    return i;
  }

  // prefix() returns a pair of a literal string that all matches for the
  // regexp must start with, and a boolean which is true if the prefix is the
  // entire match.  The string is returned by appending to |prefix|.
  boolean prefix(StringBuilder prefix) {
    Inst i = skipNop(start);

    // Avoid allocation of buffer if prefix is empty.
    if (!Inst.isRuneOp(i.op) || i.runes.length != 1) {
      return i.op == Inst.MATCH; // (append "" to prefix)
    }

    // Have prefix; gather characters.
    while (Inst.isRuneOp(i.op) && i.runes.length == 1 && (i.arg & RE2.FOLD_CASE) == 0) {
      prefix.appendCodePoint(i.runes[0]); // an int, not a byte.
      i = skipNop(i.out);
    }
    return i.op == Inst.MATCH;
  }

  // startCond() returns the leading empty-width conditions that must be true
  // in any match.  It returns -1 (all bits set) if no matches are possible.
  int startCond() {
    int flag = 0; // bitmask of EMPTY_* flags
    int pc = start;
    loop:
    for (; ; ) {
      Inst i = inst[pc];
      switch (i.op) {
        case Inst.EMPTY_WIDTH:
          flag |= i.arg;
          break;
        case Inst.FAIL:
          return -1;
        case Inst.CAPTURE:
        case Inst.NOP:
          break; // skip
        default:
          break loop;
      }
      pc = i.out;
    }
    return flag;
  }

  // --- Patch list ---

  // A patchlist is a list of instruction pointers that need to be filled in
  // (patched).  Because the pointers haven't been filled in yet, we can reuse
  // their storage to hold the list.  It's kind of sleazy, but works well in
  // practice.  See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.

  // These aren't really pointers: they're integers, so we can reinterpret them
  // this way without using package unsafe.  A value l denotes p.inst[l>>1].out
  // (l&1==0) or .arg (l&1==1).  l == 0 denotes the empty list, okay because we
  // start every program with a fail instruction, so we'll never want to point
  // at its output link.

  int next(int l) {
    Inst i = inst[l >> 1];
    if ((l & 1) == 0) {
      return i.out;
    }
    return i.arg;
  }

  void patch(int l, int val) {
    while (l != 0) {
      Inst i = inst[l >> 1];
      if ((l & 1) == 0) {
        l = i.out;
        i.out = val;
      } else {
        l = i.arg;
        i.arg = val;
      }
    }
  }

  int append(int l1, int l2) {
    if (l1 == 0) {
      return l2;
    }
    if (l2 == 0) {
      return l1;
    }
    int last = l1;
    for (; ; ) {
      int next = next(last);
      if (next == 0) {
        break;
      }
      last = next;
    }
    Inst i = inst[last >> 1];
    if ((last & 1) == 0) {
      i.out = l2;
    } else {
      i.arg = l2;
    }
    return l1;
  }

  // ---

  @Override
  public String toString() {
    StringBuilder out = new StringBuilder();
    for (int pc = 0; pc < instSize; ++pc) {
      int len = out.length();
      out.append(pc);
      if (pc == start) {
        out.append('*');
      }
      // Use spaces not tabs since they're not always preserved in
      // Google Java source, such as our tests.
      out.append("        ".substring(out.length() - len)).append(inst[pc]).append('\n');
    }
    return out.toString();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy