org.apache.lucene.sandbox.codecs.idversion.IDVersionSegmentTermsEnum Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-sandbox Show documentation
Lucene Sandbox
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.sandbox.codecs.idversion;

import java.io.IOException;
import java.io.PrintStream;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.index.BaseTermsEnum;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.apache.lucene.util.fst.Util;

/**
 * Iterates through terms in this field; this class is public so users can cast it to call {@link
 * #seekExact(BytesRef, long)} for optimistic-concurrency, and also {@link #getVersion} to get the
 * version of the currently seek'd term.
 */
public final class IDVersionSegmentTermsEnum extends BaseTermsEnum {

  // Lazy init:
  IndexInput in;

  // static boolean DEBUG = false;

  private IDVersionSegmentTermsEnumFrame[] stack;
  private final IDVersionSegmentTermsEnumFrame staticFrame;
  IDVersionSegmentTermsEnumFrame currentFrame;
  boolean termExists;
  final VersionFieldReader fr;

  private int targetBeforeCurrentLength;

  private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();

  // What prefix of the current term was present in the index:
  private int validIndexPrefix;

  // assert only:
  private boolean eof;

  final BytesRefBuilder term = new BytesRefBuilder();
  private final FST.BytesReader fstReader;

  @SuppressWarnings({"rawtypes", "unchecked"})
  private FST.Arc>[] arcs = new FST.Arc[1];

  IDVersionSegmentTermsEnum(VersionFieldReader fr) throws IOException {
    this.fr = fr;

    // if (DEBUG) System.out.println("BTTR.init seg=" + segment);
    stack = new IDVersionSegmentTermsEnumFrame[0];

    // Used to hold seek by TermState, or cached seek
    staticFrame = new IDVersionSegmentTermsEnumFrame(this, -1);

    if (fr.index == null) {
      fstReader = null;
    } else {
      fstReader = fr.index.getBytesReader();
    }

    // Init w/ root block; don't use index since it may
    // not (and need not) have been loaded
    for (int arcIdx = 0; arcIdx < arcs.length; arcIdx++) {
      arcs[arcIdx] = new FST.Arc<>();
    }

    currentFrame = staticFrame;
    final FST.Arc> arc;
    if (fr.index != null) {
      arc = fr.index.getFirstArc(arcs[0]);
      // Empty string prefix must have an output in the index!
      assert arc.isFinal();
    } else {
      arc = null;
    }
    currentFrame = staticFrame;
    // currentFrame = pushFrame(arc, rootCode, 0);
    // currentFrame.loadBlock();
    validIndexPrefix = 0;
    // if (DEBUG) {
    //   System.out.println("init frame state " + currentFrame.ord);
    //   printSeekState();
    // }

    // System.out.println();
    // computeBlockStats().print(System.out);
  }

  // Not private to avoid synthetic access$NNN methods
  void initIndexInput() {
    if (this.in == null) {
      this.in = fr.parent.in.clone();
    }
  }

  private IDVersionSegmentTermsEnumFrame getFrame(int ord) throws IOException {
    if (ord >= stack.length) {
      final IDVersionSegmentTermsEnumFrame[] next =
          new IDVersionSegmentTermsEnumFrame
              [ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
      System.arraycopy(stack, 0, next, 0, stack.length);
      for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
        next[stackOrd] = new IDVersionSegmentTermsEnumFrame(this, stackOrd);
      }
      stack = next;
    }
    assert stack[ord].ord == ord;
    return stack[ord];
  }

  private FST.Arc> getArc(int ord) {
    if (ord >= arcs.length) {
      @SuppressWarnings({"rawtypes", "unchecked"})
      final FST.Arc>[] next =
          new FST.Arc[ArrayUtil.oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
      System.arraycopy(arcs, 0, next, 0, arcs.length);
      for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) {
        next[arcOrd] = new FST.Arc<>();
      }
      arcs = next;
    }
    return arcs[ord];
  }

  // Pushes a frame we seek'd to
  IDVersionSegmentTermsEnumFrame pushFrame(
      FST.Arc> arc, Pair frameData, int length)
      throws IOException {
    scratchReader.reset(
        frameData.output1.bytes, frameData.output1.offset, frameData.output1.length);
    final long code = scratchReader.readVLong();
    final long fpSeek = code >>> VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
    final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
    f.maxIDVersion = Long.MAX_VALUE - frameData.output2;
    f.hasTerms = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
    f.hasTermsOrig = f.hasTerms;
    f.isFloor = (code & VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
    if (f.isFloor) {
      f.setFloorData(scratchReader, frameData.output1);
    }
    pushFrame(arc, fpSeek, length);

    return f;
  }

  // Pushes next'd frame or seek'd frame; we later
  // lazy-load the frame only when needed
  IDVersionSegmentTermsEnumFrame pushFrame(FST.Arc> arc, long fp, int length)
      throws IOException {
    final IDVersionSegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
    f.arc = arc;
    if (f.fpOrig == fp && f.nextEnt != -1) {
      // if (DEBUG) System.out.println("      push reused frame ord=" + f.ord + " fp=" + f.fp +
      // " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" +
      // f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" +
      // term.length + " vs prefix=" + f.prefix);
      if (f.prefix > targetBeforeCurrentLength) {
        f.rewind();
      } else {
        // if (DEBUG) {
        //   System.out.println("        skip rewind!");
        // }
      }
      assert length == f.prefix;
    } else {
      f.nextEnt = -1;
      f.prefix = length;
      f.state.termBlockOrd = 0;
      f.fpOrig = f.fp = fp;
      f.lastSubFP = -1;
      // if (DEBUG) {
      //   final int sav = term.length;
      //   term.length = length;
      //   System.out.println("      push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" +
      // f.hasTerms + " isFloor=" + f.isFloor + " pref=" + ToStringUtils.bytesRefToString(term));
      //   term.length = sav;
      // }
    }

    return f;
  }

  // asserts only
  private boolean clearEOF() {
    eof = false;
    return true;
  }

  // asserts only
  private boolean setEOF() {
    eof = true;
    return true;
  }

  @Override
  public boolean seekExact(final BytesRef target) throws IOException {
    return seekExact(target, 0);
  }

  /** Get the version of the currently seek'd term; only valid if we are positioned. */
  public long getVersion() {
    return ((IDVersionTermState) currentFrame.state).idVersion;
  }

  /**
   * Optimized version of {@link #seekExact(BytesRef)} that can sometimes fail-fast if the version
   * indexed with the requested ID is less than the specified minIDVersion. Applications that index
   * a monotonically increasing global version with each document can use this for fast optimistic
   * concurrency.
   */
  public boolean seekExact(final BytesRef target, long minIDVersion) throws IOException {

    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
    }

    term.grow(1 + target.length);

    assert clearEOF();

    //  if (DEBUG) {
    //    System.out.println("\nBTTR.seekExact seg=" + fr.parent.segment + " target=" +
    // fr.fieldInfo.name + ":" + ToStringUtils.bytesRefToString(target) + " minIDVersion=" +
    // minIDVersion + " current=" + ToStringUtils.bytesRefToString(term) + " (exists?=" +
    // termExists + ") validIndexPrefix=" + validIndexPrefix);
    //   printSeekState(System.out);
    //  }

    FST.Arc> arc;
    int targetUpto;
    Pair output;

    long startFrameFP = currentFrame.fp;

    targetBeforeCurrentLength = currentFrame.ord;

    boolean changed = false;

    // TODO: we could stop earlier w/ the version check, every time we traverse an index arc we can
    // check?

    if (currentFrame != staticFrame) {

      // We are already seek'd; find the common
      // prefix of new seek term vs current term and
      // re-use the corresponding seek state.  For
      // example, if app first seeks to foobar, then
      // seeks to foobaz, we can re-use the seek state
      // for the first 5 bytes.

      // if (DEBUG) {
      //    System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
      //  }

      arc = arcs[0];
      assert arc.isFinal();
      output = arc.output();
      targetUpto = 0;

      IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
      assert validIndexPrefix <= term.length()
          : "validIndexPrefix="
              + validIndexPrefix
              + " term.length="
              + term.length()
              + " seg="
              + fr.parent;

      final int targetLimit = Math.min(target.length, validIndexPrefix);

      int cmp = 0;

      // TODO: reverse vLong byte order for better FST
      // prefix output sharing

      // First compare up to valid seek frames:
      while (targetUpto < targetLimit) {
        cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
        // if (DEBUG) {
        //    System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit
        // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
        // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output
        // + " output=" + output);
        // }
        if (cmp != 0) {
          break;
        }
        arc = arcs[1 + targetUpto];
        // if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
        // System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char)
        // (target.bytes[target.offset + targetUpto] & 0xFF));
        // }
        assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF)
            : "arc.label="
                + (char) arc.label()
                + " targetLabel="
                + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
        if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
          output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
        }
        if (arc.isFinal()) {
          lastFrame = stack[1 + lastFrame.ord];
        }
        targetUpto++;
      }

      if (cmp == 0) {
        final int targetUptoMid = targetUpto;

        // Second compare the rest of the term, but
        // don't save arc/output/frame; we only do this
        // to find out if the target term is before,
        // equal or after the current term
        final int targetLimit2 = Math.min(target.length, term.length());
        while (targetUpto < targetLimit2) {
          cmp =
              (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
          // if (DEBUG) {
          //    System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" +
          // targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset +
          // targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
          // }
          if (cmp != 0) {
            break;
          }
          targetUpto++;
        }

        if (cmp == 0) {
          cmp = term.length() - target.length;
        }
        targetUpto = targetUptoMid;
      }

      if (cmp < 0) {
        // Common case: target term is after current
        // term, ie, app is seeking multiple terms
        // in sorted order
        // if (DEBUG) {
        //    System.out.println("  target is after current (shares prefixLen=" + targetUpto + ");
        // frame.ord=" + lastFrame.ord + "; targetUpto=" + targetUpto);
        //  }
        currentFrame = lastFrame;

      } else if (cmp > 0) {
        // Uncommon case: target term
        // is before current term; this means we can
        // keep the currentFrame but we must rewind it
        // (so we scan from the start)
        targetBeforeCurrentLength = 0;
        changed = true;
        // if (DEBUG) {
        //    System.out.println("  target is before current (shares prefixLen=" + targetUpto + ");
        // rewind frame ord=" + lastFrame.ord);
        //  }
        currentFrame = lastFrame;
        currentFrame.rewind();
      } else {
        // Target is exactly the same as current term
        assert term.length() == target.length;
        if (termExists) {

          if (currentFrame.maxIDVersion < minIDVersion) {
            // The max version for all terms in this block is lower than the minVersion
            // if (DEBUG) {
            //   System.out.println("  target is same as current maxIDVersion=" +
            // currentFrame.maxIDVersion + " is < minIDVersion=" + minIDVersion + "; return false");
            // }
            return false;
          }

          currentFrame.decodeMetaData();
          if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
            // This term's version is lower than the minVersion
            // if (DEBUG) {
            //   System.out.println("  target is same as current but version=" +
            // ((IDVersionTermState) currentFrame.state).idVersion + " is < minIDVersion=" +
            // minIDVersion + "; return false");
            // }
            return false;
          }
          // System.out.println("  term version=" + ((IDVersionTermState)
          // currentFrame.state).idVersion + " frame version=" + currentFrame.maxIDVersion + " frame
          // ord=" + currentFrame.ord);

          // if (DEBUG) {
          //    System.out.println("  target is same as current; return true");
          //  }
          return true;
        } else {
          // if (DEBUG) {
          //    System.out.println("  target is same as current but term doesn't exist");
          //  }
        }
        // validIndexPrefix = currentFrame.depth;
        // term.length = target.length;
        // return termExists;
      }

    } else {

      targetBeforeCurrentLength = -1;
      arc = fr.index.getFirstArc(arcs[0]);
      // System.out.println("first arc=" + arc);

      // Empty string prefix must have an output (block) in the index!
      assert arc.isFinal();
      assert arc.output() != null;

      // if (DEBUG) {
      //    System.out.println("    no seek state; push root frame");
      //  }

      output = arc.output();

      currentFrame = staticFrame;

      // term.length = 0;
      targetUpto = 0;
      currentFrame =
          pushFrame(
              arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
    }

    // if (DEBUG) {
    //   System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output +
    // " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" +
    // targetBeforeCurrentLength + " termExists=" + termExists);
    // }

    // We are done sharing the common prefix with the incoming target and where we are currently
    // seek'd; now continue walking the index:
    while (targetUpto < target.length) {

      final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;

      final FST.Arc> nextArc =
          fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);

      if (nextArc == null) {

        // Index is exhausted
        // if (DEBUG) {
        //    System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " +
        // Integer.toHexString(targetLabel) + " termExists=" + termExists);
        //  }

        validIndexPrefix = currentFrame.prefix;
        // validIndexPrefix = targetUpto;

        currentFrame.scanToFloorFrame(target);

        if (!currentFrame.hasTerms) {
          termExists = false;
          term.setByteAt(targetUpto, (byte) targetLabel);
          term.setLength(1 + targetUpto);
          // if (DEBUG) {
          //    System.out.println("  FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
          //  }
          return false;
        }
        // System.out.println("  check maxVersion=" + currentFrame.maxIDVersion + " vs " +
        // minIDVersion);

        // if (DEBUG) {
        //   System.out.println("  frame.maxIDVersion=" + currentFrame.maxIDVersion +  " vs
        // minIDVersion=" + minIDVersion);
        // }

        if (currentFrame.maxIDVersion < minIDVersion) {
          // The max version for all terms in this block is lower than the minVersion
          if (currentFrame.fp != startFrameFP || changed) {
            // if (targetUpto+1 > term.length) {
            termExists = false;
            term.setByteAt(targetUpto, (byte) targetLabel);
            term.setLength(1 + targetUpto);
            // if (DEBUG) {
            //   System.out.println("    reset current term");
            // }
            validIndexPrefix = Math.min(validIndexPrefix, term.length());
          }
          // if (currentFrame.ord != startFrameOrd) {
          // termExists = false;
          // }
          // if (DEBUG) {
          //   System.out.println("    FAST version NOT_FOUND term=" +
          // ToStringUtils.bytesRefToString(term) + " targetUpto=" + targetUpto +
          // " currentFrame.maxIDVersion=" + currentFrame.maxIDVersion + " validIndexPrefix=" +
          // validIndexPrefix + " startFrameFP=" + startFrameFP + " vs " + currentFrame.fp +
          // " termExists=" + termExists);
          // }
          return false;
        }

        currentFrame.loadBlock();

        // if (DEBUG) {
        //   System.out.println("    scan currentFrame ord=" + currentFrame.ord);
        // }
        final SeekStatus result = currentFrame.scanToTerm(target, true);
        if (result == SeekStatus.FOUND) {
          currentFrame.decodeMetaData();
          if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
            // This term's version is lower than the minVersion
            // if (DEBUG) {
            //   System.out.println("    return NOT_FOUND: idVersion=" + ((IDVersionTermState)
            // currentFrame.state).idVersion + " vs minIDVersion=" + minIDVersion);
            // }
            return false;
          }

          // if (DEBUG) {
          //    System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
          //  }

          return true;
        } else {
          // if (DEBUG) {
          //    System.out.println("  got " + result + "; return NOT_FOUND term=" +
          // ToStringUtils.bytesRefToString(term));
          // }
          return false;
        }
      } else {
        // Follow this arc
        arc = nextArc;
        if (term.byteAt(targetUpto) != (byte) targetLabel) {
          // if (DEBUG) {
          //   System.out.println("  now set termExists=false targetUpto=" + targetUpto + " term=" +
          // term.bytes[targetUpto] + " targetLabel=" + targetLabel);
          // }
          changed = true;
          term.setByteAt(targetUpto, (byte) targetLabel);
          termExists = false;
        }
        // Aggregate output as we go:
        assert arc.output() != null;
        if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
          output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
        }

        // if (DEBUG) {
        //    System.out.println("    index: follow label=" + (char) ((target.bytes[target.offset +
        // targetUpto]&0xff)) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
        //  }
        targetUpto++;

        if (arc.isFinal()) {
          // if (DEBUG) System.out.println("    arc is final!");
          currentFrame =
              pushFrame(
                  arc,
                  VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()),
                  targetUpto);
          // if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" +
          // currentFrame.hasTerms);
        }
      }
    }

    // validIndexPrefix = targetUpto;
    validIndexPrefix = currentFrame.prefix;

    currentFrame.scanToFloorFrame(target);

    // Target term is entirely contained in the index:
    if (!currentFrame.hasTerms) {
      termExists = false;
      term.setLength(targetUpto);
      // if (DEBUG) {
      //    System.out.println("  FAST NOT_FOUND term=" + ToStringUtils.bytesRefToString(term));
      //  }
      return false;
    }

    // if (DEBUG) {
    //   System.out.println("  frame.maxIDVersion=" + currentFrame.maxIDVersion +  " vs
    // minIDVersion=" + minIDVersion);
    // }

    if (currentFrame.maxIDVersion < minIDVersion) {
      // The max version for all terms in this block is lower than the minVersion
      termExists = false;
      term.setLength(targetUpto);
      return false;
    }

    currentFrame.loadBlock();

    final SeekStatus result = currentFrame.scanToTerm(target, true);
    if (result == SeekStatus.FOUND) {
      // if (DEBUG) {
      //    System.out.println("  return FOUND term=" + term.utf8ToString() + " " + term);
      //  }
      currentFrame.decodeMetaData();
      if (((IDVersionTermState) currentFrame.state).idVersion < minIDVersion) {
        // This term's version is lower than the minVersion
        return false;
      }
      return true;
    } else {
      // if (DEBUG) {
      //    System.out.println("  got result " + result + "; return NOT_FOUND term=" +
      // term.utf8ToString());
      //  }

      return false;
    }
  }

  @Override
  public SeekStatus seekCeil(final BytesRef target) throws IOException {
    if (fr.index == null) {
      throw new IllegalStateException("terms index was not loaded");
    }

    term.grow(1 + target.length);

    assert clearEOF();

    // if (DEBUG) {
    // System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" +
    // target.utf8ToString() + " " + target + " current=" + ToStringUtils.bytesRefToString(term) +
    // " (exists?=" + termExists + ") validIndexPrefix=  " + validIndexPrefix);
    // printSeekState();
    // }

    FST.Arc> arc;
    int targetUpto;
    Pair output;

    targetBeforeCurrentLength = currentFrame.ord;

    if (currentFrame != staticFrame) {

      // We are already seek'd; find the common
      // prefix of new seek term vs current term and
      // re-use the corresponding seek state.  For
      // example, if app first seeks to foobar, then
      // seeks to foobaz, we can re-use the seek state
      // for the first 5 bytes.

      // if (DEBUG) {
      // System.out.println("  re-use current seek state validIndexPrefix=" + validIndexPrefix);
      // }

      arc = arcs[0];
      assert arc.isFinal();
      output = arc.output();
      targetUpto = 0;

      IDVersionSegmentTermsEnumFrame lastFrame = stack[0];
      assert validIndexPrefix <= term.length();

      final int targetLimit = Math.min(target.length, validIndexPrefix);

      int cmp = 0;

      // TODO: we should write our vLong backwards (MSB
      // first) to get better sharing from the FST

      // First compare up to valid seek frames:
      while (targetUpto < targetLimit) {
        cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
        // if (DEBUG) {
        // System.out.println("    cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit +
        // ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) +
        // " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")"   + " arc.output=" + arc.output
        // + " output=" + output);
        // }
        if (cmp != 0) {
          break;
        }
        arc = arcs[1 + targetUpto];
        assert arc.label() == (target.bytes[target.offset + targetUpto] & 0xFF)
            : "arc.label="
                + (char) arc.label()
                + " targetLabel="
                + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
        // TODO: we could save the outputs in local
        // byte[][] instead of making new objs ever
        // seek; but, often the FST doesn't have any
        // shared bytes (but this could change if we
        // reverse vLong byte order)
        if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
          output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
        }
        if (arc.isFinal()) {
          lastFrame = stack[1 + lastFrame.ord];
        }
        targetUpto++;
      }

      if (cmp == 0) {
        final int targetUptoMid = targetUpto;
        // Second compare the rest of the term, but
        // don't save arc/output/frame:
        final int targetLimit2 = Math.min(target.length, term.length());
        while (targetUpto < targetLimit2) {
          cmp =
              (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
          // if (DEBUG) {
          // System.out.println("    cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit
          // + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto])
          // + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
          // }
          if (cmp != 0) {
            break;
          }
          targetUpto++;
        }

        if (cmp == 0) {
          cmp = term.length() - target.length;
        }
        targetUpto = targetUptoMid;
      }

      if (cmp < 0) {
        // Common case: target term is after current
        // term, ie, app is seeking multiple terms
        // in sorted order
        // if (DEBUG) {
        // System.out.println("  target is after current (shares prefixLen=" + targetUpto + ");
        // clear frame.scanned ord=" + lastFrame.ord);
        // }
        currentFrame = lastFrame;

      } else if (cmp > 0) {
        // Uncommon case: target term
        // is before current term; this means we can
        // keep the currentFrame but we must rewind it
        // (so we scan from the start)
        targetBeforeCurrentLength = 0;
        // if (DEBUG) {
        // System.out.println("  target is before current (shares prefixLen=" + targetUpto + ");
        // rewind frame ord=" + lastFrame.ord);
        // }
        currentFrame = lastFrame;
        currentFrame.rewind();
      } else {
        // Target is exactly the same as current term
        assert term.length() == target.length;
        if (termExists) {
          // if (DEBUG) {
          // System.out.println("  target is same as current; return FOUND");
          // }
          return SeekStatus.FOUND;
        } else {
          // if (DEBUG) {
          // System.out.println("  target is same as current but term doesn't exist");
          // }
        }
      }

    } else {

      targetBeforeCurrentLength = -1;
      arc = fr.index.getFirstArc(arcs[0]);

      // Empty string prefix must have an output (block) in the index!
      assert arc.isFinal();
      assert arc.output() != null;

      // if (DEBUG) {
      // System.out.println("    no seek state; push root frame");
      // }

      output = arc.output();

      currentFrame = staticFrame;

      // term.length = 0;
      targetUpto = 0;
      currentFrame =
          pushFrame(
              arc, VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
    }

    // if (DEBUG) {
    // System.out.println("  start index loop targetUpto=" + targetUpto + " output=" + output +
    // " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" +
    // targetBeforeCurrentLength);
    // }

    // We are done sharing the common prefix with the incoming target and where we are currently
    // seek'd; now continue walking the index:
    while (targetUpto < target.length) {

      final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;

      final FST.Arc> nextArc =
          fr.index.findTargetArc(targetLabel, arc, getArc(1 + targetUpto), fstReader);

      if (nextArc == null) {

        // Index is exhausted
        // if (DEBUG) {
        //   System.out.println("    index: index exhausted label=" + ((char) targetLabel) + " " +
        // toHex(targetLabel));
        // }

        validIndexPrefix = currentFrame.prefix;
        // validIndexPrefix = targetUpto;

        currentFrame.scanToFloorFrame(target);

        currentFrame.loadBlock();

        final SeekStatus result = currentFrame.scanToTerm(target, false);
        if (result == SeekStatus.END) {
          term.copyBytes(target);
          termExists = false;

          if (next() != null) {
            // if (DEBUG) {
            // System.out.println("  return NOT_FOUND term=" +
            // ToStringUtils.bytesRefToString(term));
            // }
            return SeekStatus.NOT_FOUND;
          } else {
            // if (DEBUG) {
            // System.out.println("  return END");
            // }
            return SeekStatus.END;
          }
        } else {
          // if (DEBUG) {
          // System.out.println("  return " + result + " term=" +
          // ToStringUtils.bytesRefToString(term));
          // }
          return result;
        }
      } else {
        // Follow this arc
        term.setByteAt(targetUpto, (byte) targetLabel);
        arc = nextArc;
        // Aggregate output as we go:
        assert arc.output() != null;
        if (arc.output() != VersionBlockTreeTermsWriter.NO_OUTPUT) {
          output = VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.output());
        }

        // if (DEBUG) {
        // System.out.println("    index: follow label=" + toHex(target.bytes[target.offset +
        // targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
        // }
        targetUpto++;

        if (arc.isFinal()) {
          // if (DEBUG) System.out.println("    arc is final!");
          currentFrame =
              pushFrame(
                  arc,
                  VersionBlockTreeTermsWriter.FST_OUTPUTS.add(output, arc.nextFinalOutput()),
                  targetUpto);
          // if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" +
          // currentFrame.hasTerms);
        }
      }
    }

    // validIndexPrefix = targetUpto;
    validIndexPrefix = currentFrame.prefix;

    currentFrame.scanToFloorFrame(target);

    currentFrame.loadBlock();

    final SeekStatus result = currentFrame.scanToTerm(target, false);

    if (result == SeekStatus.END) {
      term.copyBytes(target);
      termExists = false;
      if (next() != null) {
        // if (DEBUG) {
        // System.out.println("  return NOT_FOUND term=" + term.utf8ToString() + " " + term);
        // }
        return SeekStatus.NOT_FOUND;
      } else {
        // if (DEBUG) {
        // System.out.println("  return END");
        // }
        return SeekStatus.END;
      }
    } else {
      return result;
    }
  }

  @SuppressWarnings("unused")
  private void printSeekState(PrintStream out) throws IOException {
    if (currentFrame == staticFrame) {
      out.println("  no prior seek");
    } else {
      out.println("  prior seek state:");
      int ord = 0;
      boolean isSeekFrame = true;
      while (true) {
        IDVersionSegmentTermsEnumFrame f = getFrame(ord);
        assert f != null;
        final BytesRef prefix = new BytesRef(term.bytes(), 0, f.prefix);
        if (f.nextEnt == -1) {
          out.println(
              "    frame "
                  + (isSeekFrame ? "(seek)" : "(next)")
                  + " ord="
                  + ord
                  + " fp="
                  + f.fp
                  + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
                  + " prefixLen="
                  + f.prefix
                  + " prefix="
                  + ToStringUtils.bytesRefToString(prefix)
                  + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
                  + " hasTerms="
                  + f.hasTerms
                  + " isFloor="
                  + f.isFloor
                  + " code="
                  + ((f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
                      + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0)
                      + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0))
                  + " isLastInFloor="
                  + f.isLastInFloor
                  + " mdUpto="
                  + f.metaDataUpto
                  + " tbOrd="
                  + f.getTermBlockOrd());
        } else {
          out.println(
              "    frame "
                  + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)")
                  + " ord="
                  + ord
                  + " fp="
                  + f.fp
                  + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "")
                  + " prefixLen="
                  + f.prefix
                  + " prefix="
                  + ToStringUtils.bytesRefToString(prefix)
                  + " nextEnt="
                  + f.nextEnt
                  + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")"))
                  + " hasTerms="
                  + f.hasTerms
                  + " isFloor="
                  + f.isFloor
                  + " code="
                  + ((f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
                      + (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0)
                      + (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0))
                  + " lastSubFP="
                  + f.lastSubFP
                  + " isLastInFloor="
                  + f.isLastInFloor
                  + " mdUpto="
                  + f.metaDataUpto
                  + " tbOrd="
                  + f.getTermBlockOrd());
        }
        if (fr.index != null) {
          assert !isSeekFrame || f.arc != null : "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
          if (f.prefix > 0 && isSeekFrame && f.arc.label() != (term.byteAt(f.prefix - 1) & 0xFF)) {
            out.println(
                "      broken seek state: arc.label="
                    + (char) f.arc.label()
                    + " vs term byte="
                    + (char) (term.byteAt(f.prefix - 1) & 0xFF));
            throw new RuntimeException("seek state is broken");
          }
          Pair output = Util.get(fr.index, prefix);
          if (output == null) {
            out.println("      broken seek state: prefix is not final in index");
            throw new RuntimeException("seek state is broken");
          } else if (isSeekFrame && !f.isFloor) {
            final ByteArrayDataInput reader =
                new ByteArrayDataInput(
                    output.output1.bytes, output.output1.offset, output.output1.length);
            final long codeOrig = reader.readVLong();
            final long code =
                (f.fp << VersionBlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS)
                    | (f.hasTerms ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS : 0)
                    | (f.isFloor ? VersionBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR : 0);
            if (codeOrig != code) {
              out.println(
                  "      broken seek state: output code="
                      + codeOrig
                      + " doesn't match frame code="
                      + code);
              throw new RuntimeException("seek state is broken");
            }
          }
        }
        if (f == currentFrame) {
          break;
        }
        if (f.prefix == validIndexPrefix) {
          isSeekFrame = false;
        }
        ord++;
      }
    }
  }

  /* Decodes only the term bytes of the next term.  If caller then asks for
  metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
  decode all metadata up to the current term. */
  @Override
  public BytesRef next() throws IOException {

    if (in == null) {
      // Fresh TermsEnum; seek to first term:
      final FST.Arc> arc;
      if (fr.index != null) {
        arc = fr.index.getFirstArc(arcs[0]);
        // Empty string prefix must have an output in the index!
        assert arc.isFinal();
      } else {
        arc = null;
      }
      currentFrame = pushFrame(arc, fr.rootCode, 0);
      currentFrame.loadBlock();
    }

    targetBeforeCurrentLength = currentFrame.ord;

    assert !eof;
    // if (DEBUG) {
    // System.out.println("\nBTTR.next seg=" + segment + " term=" +
    // ToStringUtils.bytesRefToString(term) + " termExists?=" + termExists +
    // " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd +
    // " validIndexPrefix=" + validIndexPrefix);
    // printSeekState();
    // }

    if (currentFrame == staticFrame) {
      // If seek was previously called and the term was
      // cached, or seek(TermState) was called, usually
      // caller is just going to pull a D/&PEnum or get
      // docFreq, etc.  But, if they then call next(),
      // this method catches up all internal state so next()
      // works properly:
      // if (DEBUG) System.out.println("  re-seek to pending term=" + term.utf8ToString() + " " +
      // term);
      final boolean result = seekExact(term.get());
      assert result;
    }

    // Pop finished blocks
    while (currentFrame.nextEnt == currentFrame.entCount) {
      if (!currentFrame.isLastInFloor) {
        currentFrame.loadNextFloorBlock();
      } else {
        // if (DEBUG) System.out.println("  pop frame");
        if (currentFrame.ord == 0) {
          // if (DEBUG) System.out.println("  return null");
          assert setEOF();
          term.clear();
          validIndexPrefix = 0;
          currentFrame.rewind();
          termExists = false;
          return null;
        }
        final long lastFP = currentFrame.fpOrig;
        currentFrame = stack[currentFrame.ord - 1];

        if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
          // We popped into a frame that's not loaded
          // yet or not scan'd to the right entry
          currentFrame.scanToFloorFrame(term.get());
          currentFrame.loadBlock();
          currentFrame.scanToSubBlock(lastFP);
        }

        // Note that the seek state (last seek) has been
        // invalidated beyond this depth
        validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
        // if (DEBUG) {
        // System.out.println("  reset validIndexPrefix=" + validIndexPrefix);
        // }
      }
    }

    while (true) {
      if (currentFrame.next()) {
        // Push to new block:
        // if (DEBUG) System.out.println("  push frame");
        currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length());
        // This is a "next" frame -- even if it's
        // floor'd we must pretend it isn't so we don't
        // try to scan to the right floor frame:
        currentFrame.isFloor = false;
        // currentFrame.hasTerms = true;
        currentFrame.loadBlock();
      } else {
        // if (DEBUG) System.out.println("  return term=" + term.utf8ToString() + " " + term +
        // " currentFrame.ord=" + currentFrame.ord);
        return term.get();
      }
    }
  }

  @Override
  public BytesRef term() {
    assert !eof;
    return term.get();
  }

  @Override
  public int docFreq() throws IOException {
    assert !eof;
    return 1;
  }

  @Override
  public long totalTermFreq() throws IOException {
    assert !eof;
    return 1;
  }

  @Override
  public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
    assert !eof;
    // if (DEBUG) {
    // System.out.println("BTTR.docs seg=" + segment);
    // }
    currentFrame.decodeMetaData();
    // if (DEBUG) {
    // System.out.println("  state=" + currentFrame.state);
    // }
    return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, reuse, flags);
  }

  @Override
  public ImpactsEnum impacts(int flags) throws IOException {
    // Only one posting, the slow impl is fine
    // We could make this throw UOE but then CheckIndex is angry
    return new SlowImpactsEnum(postings(null, flags));
  }

  @Override
  public void seekExact(BytesRef target, TermState otherState) {
    // if (DEBUG) {
    //   System.out.println("BTTR.seekExact termState seg=" + segment + " target=" +
    // target.utf8ToString() + " " + target + " state=" + otherState);
    // }
    assert clearEOF();
    if (target.compareTo(term.get()) != 0 || !termExists) {
      assert otherState != null && otherState instanceof BlockTermState;
      currentFrame = staticFrame;
      currentFrame.state.copyFrom(otherState);
      term.copyBytes(target);
      currentFrame.metaDataUpto = currentFrame.getTermBlockOrd();
      assert currentFrame.metaDataUpto > 0;
      validIndexPrefix = 0;
    } else {
      // if (DEBUG) {
      //   System.out.println("  skip seek: already on target state=" + currentFrame.state);
      // }
    }
  }

  @Override
  public TermState termState() throws IOException {
    assert !eof;
    currentFrame.decodeMetaData();
    TermState ts = currentFrame.state.clone();
    // if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
    return ts;
  }

  @Override
  public void seekExact(long ord) {
    throw new UnsupportedOperationException();
  }

  @Override
  public long ord() {
    throw new UnsupportedOperationException();
  }

  @Override
  public String toString() {
    return "IDVersionSegmentTermsEnum(seg=" + fr.parent + ")";
  }
}