All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.codecs.simpletext.SimpleTextDocValuesReader Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.simpletext;

import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.DOCCOUNT;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORIGIN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.TYPE;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.ParseException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.function.IntFunction;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.StringHelper;

class SimpleTextDocValuesReader extends DocValuesProducer {

  static class OneField {
    int docCount;
    long dataStartFilePointer;
    String pattern;
    String ordPattern;
    int maxLength;
    boolean fixedLength;
    long origin;
    long minValue;
    long maxValue;
    long numValues;
  }

  final int maxDoc;
  final IndexInput data;
  final BytesRefBuilder scratch = new BytesRefBuilder();
  final Map fields = new HashMap<>();

  public SimpleTextDocValuesReader(SegmentReadState state, String ext) throws IOException {
    // System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " file=" +
    // IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext));
    data =
        state.directory.openInput(
            IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext),
            state.context);
    maxDoc = state.segmentInfo.maxDoc();
    while (true) {
      readLine();
      // System.out.println("READ field=" + scratch.utf8ToString());
      if (scratch.get().equals(END)) {
        break;
      }
      assert startsWith(FIELD) : scratch.get().utf8ToString();
      String fieldName = stripPrefix(FIELD);
      // System.out.println("  field=" + fieldName);

      OneField field = new OneField();
      fields.put(fieldName, field);

      readLine();
      assert startsWith(TYPE) : scratch.get().utf8ToString();

      DocValuesType dvType = DocValuesType.valueOf(stripPrefix(TYPE));
      assert dvType != DocValuesType.NONE;

      if (dvType == DocValuesType.NUMERIC || dvType == DocValuesType.SORTED_NUMERIC) {
        readLine();
        assert startsWith(MINVALUE)
            : "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
        field.minValue = Long.parseLong(stripPrefix(MINVALUE));
        readLine();
        assert startsWith(MAXVALUE)
            : "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
        field.maxValue = Long.parseLong(stripPrefix(MAXVALUE));
      }

      readLine();
      assert startsWith(DOCCOUNT)
          : "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
      field.docCount = Integer.parseInt(stripPrefix(DOCCOUNT));

      if (dvType == DocValuesType.NUMERIC) {
        readLine();
        assert startsWith(ORIGIN)
            : "got " + scratch.get().utf8ToString() + " field=" + fieldName + " ext=" + ext;
        field.origin = Long.parseLong(stripPrefix(ORIGIN));
        readLine();
        assert startsWith(PATTERN);
        field.pattern = stripPrefix(PATTERN);
        field.dataStartFilePointer = data.getFilePointer();
        data.seek(data.getFilePointer() + (1 + field.pattern.length() + 2) * (long) maxDoc);
      } else if (dvType == DocValuesType.BINARY || dvType == DocValuesType.SORTED_NUMERIC) {
        readLine();
        assert startsWith(MAXLENGTH);
        field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
        readLine();
        assert startsWith(PATTERN);
        field.pattern = stripPrefix(PATTERN);
        field.dataStartFilePointer = data.getFilePointer();
        data.seek(
            data.getFilePointer()
                + (9 + field.pattern.length() + field.maxLength + 2) * (long) maxDoc);
      } else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) {
        readLine();
        assert startsWith(NUMVALUES);
        field.numValues = Long.parseLong(stripPrefix(NUMVALUES));
        readLine();
        assert startsWith(MAXLENGTH);
        field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
        readLine();
        assert startsWith(PATTERN);
        field.pattern = stripPrefix(PATTERN);
        readLine();
        assert startsWith(ORDPATTERN);
        field.ordPattern = stripPrefix(ORDPATTERN);
        field.dataStartFilePointer = data.getFilePointer();
        data.seek(
            data.getFilePointer()
                + (9 + field.pattern.length() + field.maxLength) * field.numValues
                + (1 + field.ordPattern.length()) * (long) maxDoc);
      } else {
        throw new AssertionError();
      }
    }

    // We should only be called from above if at least one
    // field has DVs:
    assert !fields.isEmpty();
  }

  @Override
  public NumericDocValues getNumeric(FieldInfo fieldInfo) throws IOException {
    IntFunction values = getNumericNonIterator(fieldInfo);
    if (values == null) {
      return null;
    } else {
      DocValuesIterator docsWithField = getNumericDocsWithField(fieldInfo);
      return new NumericDocValues() {

        @Override
        public int nextDoc() throws IOException {
          return docsWithField.nextDoc();
        }

        @Override
        public int docID() {
          return docsWithField.docID();
        }

        @Override
        public long cost() {
          return docsWithField.cost();
        }

        @Override
        public int advance(int target) throws IOException {
          return docsWithField.advance(target);
        }

        @Override
        public boolean advanceExact(int target) throws IOException {
          return docsWithField.advanceExact(target);
        }

        @Override
        public long longValue() throws IOException {
          return values.apply(docsWithField.docID());
        }
      };
    }
  }

  IntFunction getNumericNonIterator(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);
    assert field != null;

    // SegmentCoreReaders already verifies this field is
    // valid:
    assert field != null : "field=" + fieldInfo.name + " fields=" + fields;

    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
    final DecimalFormat decoder =
        new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));

    decoder.setParseBigDecimal(true);

    return new IntFunction() {
      @Override
      public Long apply(int docID) {
        try {
          // System.out.println(Thread.currentThread().getName() + ": get docID=" + docID + " in=" +
          // in);
          if (docID < 0 || docID >= maxDoc) {
            throw new IndexOutOfBoundsException(
                "docID must be 0 .. " + (maxDoc - 1) + "; got " + docID);
          }
          in.seek(field.dataStartFilePointer + (1 + field.pattern.length() + 2) * (long) docID);
          SimpleTextUtil.readLine(in, scratch);
          // System.out.println("parsing delta: " + scratch.utf8ToString());
          BigDecimal bd;
          try {
            bd = (BigDecimal) decoder.parse(scratch.get().utf8ToString());
          } catch (ParseException pe) {
            throw new CorruptIndexException("failed to parse BigDecimal value", in, pe);
          }
          SimpleTextUtil.readLine(in, scratch); // read the line telling us if it's real or not
          return BigInteger.valueOf(field.origin).add(bd.toBigIntegerExact()).longValue();
        } catch (IOException ioe) {
          throw new RuntimeException(ioe);
        }
      }
    };
  }

  private abstract static class DocValuesIterator extends DocIdSetIterator {
    abstract boolean advanceExact(int target) throws IOException;
  }

  private DocValuesIterator getNumericDocsWithField(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);
    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
    return new DocValuesIterator() {

      int doc = -1;

      @Override
      public int nextDoc() throws IOException {
        return advance(docID() + 1);
      }

      @Override
      public int docID() {
        return doc;
      }

      @Override
      public long cost() {
        return maxDoc;
      }

      @Override
      public int advance(int target) throws IOException {
        for (int i = target; i < maxDoc; ++i) {
          in.seek(field.dataStartFilePointer + (1 + field.pattern.length() + 2) * (long) i);
          SimpleTextUtil.readLine(in, scratch); // data
          SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
          if (scratch.byteAt(0) == (byte) 'T') {
            return doc = i;
          }
        }
        return doc = NO_MORE_DOCS;
      }

      @Override
      boolean advanceExact(int target) throws IOException {
        this.doc = target;
        in.seek(field.dataStartFilePointer + (1 + field.pattern.length() + 2) * (long) target);
        SimpleTextUtil.readLine(in, scratch); // data
        SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
        return scratch.byteAt(0) == (byte) 'T';
      }
    };
  }

  @Override
  public synchronized BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);

    // SegmentCoreReaders already verifies this field is
    // valid:
    assert field != null;

    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
    final DecimalFormat decoder =
        new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));

    DocValuesIterator docsWithField = getBinaryDocsWithField(fieldInfo);

    IntFunction values =
        new IntFunction() {
          final BytesRefBuilder term = new BytesRefBuilder();
          final BytesRefBuilder termByteArray = new BytesRefBuilder();

          @Override
          public BytesRef apply(int docID) {
            try {
              if (docID < 0 || docID >= maxDoc) {
                throw new IndexOutOfBoundsException(
                    "docID must be 0 .. " + (maxDoc - 1) + "; got " + docID);
              }
              in.seek(
                  field.dataStartFilePointer
                      + (9 + field.pattern.length() + field.maxLength + 2) * (long) docID);
              SimpleTextUtil.readLine(in, scratch);
              assert StringHelper.startsWith(scratch.get(), LENGTH);
              int len;
              try {
                len =
                    decoder
                        .parse(
                            new String(
                                scratch.bytes(),
                                LENGTH.length,
                                scratch.length() - LENGTH.length,
                                StandardCharsets.UTF_8))
                        .intValue();
              } catch (ParseException pe) {
                throw new CorruptIndexException("failed to parse int length", in, pe);
              }
              termByteArray.growNoCopy(len);
              termByteArray.setLength(len);
              in.readBytes(termByteArray.bytes(), 0, len);
              term.copyBytes(SimpleTextUtil.fromBytesRefString(termByteArray.get().utf8ToString()));
              return term.get();
            } catch (IOException ioe) {
              throw new RuntimeException(ioe);
            }
          }
        };
    return new BinaryDocValues() {

      @Override
      public int nextDoc() throws IOException {
        return docsWithField.nextDoc();
      }

      @Override
      public int docID() {
        return docsWithField.docID();
      }

      @Override
      public long cost() {
        return docsWithField.cost();
      }

      @Override
      public int advance(int target) throws IOException {
        return docsWithField.advance(target);
      }

      @Override
      public boolean advanceExact(int target) throws IOException {
        return docsWithField.advanceExact(target);
      }

      @Override
      public BytesRef binaryValue() throws IOException {
        return values.apply(docsWithField.docID());
      }
    };
  }

  private DocValuesIterator getBinaryDocsWithField(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);
    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
    final DecimalFormat decoder =
        new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));

    return new DocValuesIterator() {

      int doc = -1;

      @Override
      public int nextDoc() throws IOException {
        return advance(docID() + 1);
      }

      @Override
      public int docID() {
        return doc;
      }

      @Override
      public long cost() {
        return maxDoc;
      }

      @Override
      public int advance(int target) throws IOException {
        for (int i = target; i < maxDoc; ++i) {
          in.seek(
              field.dataStartFilePointer
                  + (9 + field.pattern.length() + field.maxLength + 2) * (long) i);
          SimpleTextUtil.readLine(in, scratch);
          assert StringHelper.startsWith(scratch.get(), LENGTH);
          int len;
          try {
            len =
                decoder
                    .parse(
                        new String(
                            scratch.bytes(),
                            LENGTH.length,
                            scratch.length() - LENGTH.length,
                            StandardCharsets.UTF_8))
                    .intValue();
          } catch (ParseException pe) {
            throw new CorruptIndexException("failed to parse int length", in, pe);
          }
          // skip past bytes
          byte[] bytes = new byte[len];
          in.readBytes(bytes, 0, len);
          SimpleTextUtil.readLine(in, scratch); // newline
          SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
          if (scratch.byteAt(0) == (byte) 'T') {
            return doc = i;
          }
        }
        return doc = NO_MORE_DOCS;
      }

      @Override
      boolean advanceExact(int target) throws IOException {
        this.doc = target;
        in.seek(
            field.dataStartFilePointer
                + (9 + field.pattern.length() + field.maxLength + 2) * (long) target);
        SimpleTextUtil.readLine(in, scratch);
        assert StringHelper.startsWith(scratch.get(), LENGTH);
        int len;
        try {
          len =
              decoder
                  .parse(
                      new String(
                          scratch.bytes(),
                          LENGTH.length,
                          scratch.length() - LENGTH.length,
                          StandardCharsets.UTF_8))
                  .intValue();
        } catch (ParseException pe) {
          throw new CorruptIndexException("failed to parse int length", in, pe);
        }
        // skip past bytes
        byte[] bytes = new byte[len];
        in.readBytes(bytes, 0, len);
        SimpleTextUtil.readLine(in, scratch); // newline
        SimpleTextUtil.readLine(in, scratch); // 'T' or 'F'
        return scratch.byteAt(0) == (byte) 'T';
      }
    };
  }

  @Override
  public SortedDocValues getSorted(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);

    // SegmentCoreReaders already verifies this field is
    // valid:
    assert field != null;

    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
    final DecimalFormat decoder =
        new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
    final DecimalFormat ordDecoder =
        new DecimalFormat(field.ordPattern, new DecimalFormatSymbols(Locale.ROOT));

    return new SortedDocValues() {

      int doc = -1;

      @Override
      public int nextDoc() throws IOException {
        return advance(docID() + 1);
      }

      @Override
      public int docID() {
        return doc;
      }

      @Override
      public long cost() {
        return maxDoc;
      }

      int ord;

      @Override
      public int advance(int target) throws IOException {
        for (int i = target; i < maxDoc; ++i) {
          in.seek(
              field.dataStartFilePointer
                  + field.numValues * (9 + field.pattern.length() + field.maxLength)
                  + i * (long) (1 + field.ordPattern.length()));
          SimpleTextUtil.readLine(in, scratch);
          try {
            ord = (int) ordDecoder.parse(scratch.get().utf8ToString()).longValue() - 1;
          } catch (ParseException pe) {
            throw new CorruptIndexException("failed to parse ord", in, pe);
          }
          if (ord >= 0) {
            return doc = i;
          }
        }
        return doc = NO_MORE_DOCS;
      }

      @Override
      public boolean advanceExact(int target) throws IOException {
        this.doc = target;
        in.seek(
            field.dataStartFilePointer
                + field.numValues * (9 + field.pattern.length() + field.maxLength)
                + target * (long) (1 + field.ordPattern.length()));
        SimpleTextUtil.readLine(in, scratch);
        try {
          ord = (int) ordDecoder.parse(scratch.get().utf8ToString()).longValue() - 1;
        } catch (ParseException pe) {
          throw new CorruptIndexException("failed to parse ord", in, pe);
        }
        return ord >= 0;
      }

      @Override
      public int ordValue() {
        return ord;
      }

      final BytesRefBuilder term = new BytesRefBuilder();

      @Override
      public BytesRef lookupOrd(int ord) throws IOException {
        if (ord < 0 || ord >= field.numValues) {
          throw new IndexOutOfBoundsException(
              "ord must be 0 .. " + (field.numValues - 1) + "; got " + ord);
        }
        in.seek(
            field.dataStartFilePointer
                + ord * (long) (9 + field.pattern.length() + field.maxLength));
        SimpleTextUtil.readLine(in, scratch);
        assert StringHelper.startsWith(scratch.get(), LENGTH)
            : "got " + scratch.get().utf8ToString() + " in=" + in;
        int len;
        try {
          len =
              decoder
                  .parse(
                      new String(
                          scratch.bytes(),
                          LENGTH.length,
                          scratch.length() - LENGTH.length,
                          StandardCharsets.UTF_8))
                  .intValue();
        } catch (ParseException pe) {
          throw new CorruptIndexException("failed to parse int length", in, pe);
        }
        term.growNoCopy(len);
        term.setLength(len);
        in.readBytes(term.bytes(), 0, len);
        return term.get();
      }

      @Override
      public int getValueCount() {
        return (int) field.numValues;
      }
    };
  }

  @Override
  public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
    final BinaryDocValues binary = getBinary(field);
    return new SortedNumericDocValues() {

      @Override
      public int nextDoc() throws IOException {
        int doc = binary.nextDoc();
        setCurrentDoc();
        return doc;
      }

      @Override
      public int docID() {
        return binary.docID();
      }

      @Override
      public long cost() {
        return binary.cost();
      }

      @Override
      public int advance(int target) throws IOException {
        int doc = binary.advance(target);
        setCurrentDoc();
        return doc;
      }

      @Override
      public boolean advanceExact(int target) throws IOException {
        if (binary.advanceExact(target)) {
          setCurrentDoc();
          return true;
        }
        return false;
      }

      long[] values;
      int index;

      private void setCurrentDoc() throws IOException {
        if (docID() == NO_MORE_DOCS) {
          return;
        }
        String csv = binary.binaryValue().utf8ToString();
        if (csv.length() == 0) {
          values = new long[0];
        } else {
          String[] s = csv.split(",");
          values = new long[s.length];
          for (int i = 0; i < values.length; i++) {
            values[i] = Long.parseLong(s[i]);
          }
        }
        index = 0;
      }

      @Override
      public long nextValue() throws IOException {
        return values[index++];
      }

      @Override
      public int docValueCount() {
        return values.length;
      }
    };
  }

  @Override
  public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException {
    final OneField field = fields.get(fieldInfo.name);

    // SegmentCoreReaders already verifies this field is
    // valid:
    assert field != null;

    final IndexInput in = data.clone();
    final BytesRefBuilder scratch = new BytesRefBuilder();
    final DecimalFormat decoder =
        new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));

    return new SortedSetDocValues() {

      String[] currentOrds = new String[0];
      int currentIndex = 0;
      final BytesRefBuilder term = new BytesRefBuilder();
      int doc = -1;

      @Override
      public int nextDoc() throws IOException {
        return advance(doc + 1);
      }

      @Override
      public int docID() {
        return doc;
      }

      @Override
      public long cost() {
        return maxDoc;
      }

      @Override
      public int advance(int target) throws IOException {
        for (int i = target; i < maxDoc; ++i) {
          in.seek(
              field.dataStartFilePointer
                  + field.numValues * (long) (9 + field.pattern.length() + field.maxLength)
                  + i * (long) (1 + field.ordPattern.length()));
          SimpleTextUtil.readLine(in, scratch);
          String ordList = scratch.get().utf8ToString().trim();
          if (ordList.isEmpty() == false) {
            currentOrds = ordList.split(",");
            currentIndex = 0;
            return doc = i;
          }
        }
        return doc = NO_MORE_DOCS;
      }

      @Override
      public boolean advanceExact(int target) throws IOException {
        in.seek(
            field.dataStartFilePointer
                + field.numValues * (long) (9 + field.pattern.length() + field.maxLength)
                + target * (long) (1 + field.ordPattern.length()));
        SimpleTextUtil.readLine(in, scratch);
        String ordList = scratch.get().utf8ToString().trim();
        doc = target;
        if (ordList.isEmpty() == false) {
          currentOrds = ordList.split(",");
          currentIndex = 0;
          return true;
        }
        return false;
      }

      @Override
      public long nextOrd() throws IOException {
        return Long.parseLong(currentOrds[currentIndex++]);
      }

      @Override
      public int docValueCount() {
        return currentOrds.length;
      }

      @Override
      public BytesRef lookupOrd(long ord) throws IOException {
        if (ord < 0 || ord >= field.numValues) {
          throw new IndexOutOfBoundsException(
              "ord must be 0 .. " + (field.numValues - 1) + "; got " + ord);
        }
        in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
        SimpleTextUtil.readLine(in, scratch);
        assert StringHelper.startsWith(scratch.get(), LENGTH)
            : "got " + scratch.get().utf8ToString() + " in=" + in;
        int len;
        try {
          len =
              decoder
                  .parse(
                      new String(
                          scratch.bytes(),
                          LENGTH.length,
                          scratch.length() - LENGTH.length,
                          StandardCharsets.UTF_8))
                  .intValue();
        } catch (ParseException pe) {
          throw new CorruptIndexException("failed to parse int length", in, pe);
        }
        term.growNoCopy(len);
        term.setLength(len);
        in.readBytes(term.bytes(), 0, len);
        return term.get();
      }

      @Override
      public long getValueCount() {
        return field.numValues;
      }
    };
  }

  @Override
  public void close() throws IOException {
    data.close();
  }

  /** Used only in ctor: */
  private void readLine() throws IOException {
    SimpleTextUtil.readLine(data, scratch);
    // System.out.println("line: " + scratch.utf8ToString());
  }

  /** Used only in ctor: */
  private boolean startsWith(BytesRef prefix) {
    return StringHelper.startsWith(scratch.get(), prefix);
  }

  /** Used only in ctor: */
  private String stripPrefix(BytesRef prefix) {
    return new String(
        scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8);
  }

  @Override
  public String toString() {
    return getClass().getSimpleName() + "(fields=" + fields.size() + ")";
  }

  @Override
  public void checkIntegrity() throws IOException {
    BytesRefBuilder scratch = new BytesRefBuilder();
    IndexInput clone = data.clone();
    clone.seek(0);
    // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included
    // in SimpleTextUtil.CHECKSUM):
    long footerStartPos = clone.length() - (SimpleTextUtil.CHECKSUM.length + 21);
    ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
    while (true) {
      SimpleTextUtil.readLine(input, scratch);
      if (input.getFilePointer() >= footerStartPos) {
        // Make sure we landed at precisely the right location:
        if (input.getFilePointer() != footerStartPos) {
          throw new CorruptIndexException(
              "SimpleText failure: footer does not start at expected position current="
                  + input.getFilePointer()
                  + " vs expected="
                  + footerStartPos,
              input);
        }
        SimpleTextUtil.checkFooter(input);
        break;
      }
    }
  }

  @Override
  public DocValuesSkipper getSkipper(FieldInfo fieldInfo) {
    final boolean numeric =
        fieldInfo.getDocValuesType() == DocValuesType.NUMERIC
            || fieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
    final OneField field = fields.get(fieldInfo.name);

    // SegmentCoreReaders already verifies this field is
    // valid:
    assert field != null;

    return new DocValuesSkipper() {
      int doc = -1;

      @Override
      public int numLevels() {
        return 1;
      }

      @Override
      public long minValue(int level) {
        return minValue();
      }

      @Override
      public long maxValue(int level) {
        return maxValue();
      }

      @Override
      public int docCount(int level) {
        return docCount();
      }

      @Override
      public long minValue() {
        return numeric ? field.minValue : 0;
      }

      @Override
      public long maxValue() {
        return numeric ? field.maxValue : field.numValues - 1;
      }

      @Override
      public int docCount() {
        return field.docCount;
      }

      @Override
      public int minDocID(int level) {
        if (doc == -1) {
          return -1;
        } else if (doc >= maxDoc || field.docCount == 0) {
          return DocIdSetIterator.NO_MORE_DOCS;
        } else {
          return 0;
        }
      }

      @Override
      public int maxDocID(int level) {
        if (doc == -1) {
          return -1;
        } else if (doc >= maxDoc || field.docCount == 0) {
          return DocIdSetIterator.NO_MORE_DOCS;
        } else {
          return maxDoc;
        }
      }

      @Override
      public void advance(int target) {
        doc = target;
      }
    };
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy