All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.IndexingChain Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
import org.apache.lucene.codecs.NormsConsumer;
import org.apache.lucene.codecs.NormsFormat;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.InvertableType;
import org.apache.lucene.document.KnnByteVectorField;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.document.StoredValue;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.Version;

/** Default general purpose indexing chain, which handles indexing all types of fields. */
final class IndexingChain implements Accountable {

  final Counter bytesUsed = Counter.newCounter();
  final FieldInfos.Builder fieldInfos;

  // Writes postings and term vectors:
  final TermsHash termsHash;
  // Shared pool for doc-value terms
  final ByteBlockPool docValuesBytePool;
  // Writes stored fields
  final StoredFieldsConsumer storedFieldsConsumer;
  final VectorValuesConsumer vectorValuesConsumer;
  final TermVectorsConsumer termVectorsWriter;

  // NOTE: I tried using Hash Map
  // but it was ~2% slower on Wiki and Geonames with Java
  // 1.7.0_25:
  private PerField[] fieldHash = new PerField[2];
  private int hashMask = 1;

  private int totalFieldCount;
  private long nextFieldGen;

  // Holds fields seen in each document
  private PerField[] fields = new PerField[1];
  private PerField[] docFields = new PerField[2];
  private final InfoStream infoStream;
  private final ByteBlockPool.Allocator byteBlockAllocator;
  private final LiveIndexWriterConfig indexWriterConfig;
  private final int indexCreatedVersionMajor;
  private final Consumer abortingExceptionConsumer;
  private boolean hasHitAbortingException;

  IndexingChain(
      int indexCreatedVersionMajor,
      SegmentInfo segmentInfo,
      Directory directory,
      FieldInfos.Builder fieldInfos,
      LiveIndexWriterConfig indexWriterConfig,
      Consumer abortingExceptionConsumer) {
    this.indexCreatedVersionMajor = indexCreatedVersionMajor;
    byteBlockAllocator = new ByteBlockPool.DirectTrackingAllocator(bytesUsed);
    IntBlockPool.Allocator intBlockAllocator = new IntBlockAllocator(bytesUsed);
    this.indexWriterConfig = indexWriterConfig;
    assert segmentInfo.getIndexSort() == indexWriterConfig.getIndexSort();
    this.fieldInfos = fieldInfos;
    this.infoStream = indexWriterConfig.getInfoStream();
    this.abortingExceptionConsumer = abortingExceptionConsumer;
    this.vectorValuesConsumer =
        new VectorValuesConsumer(indexWriterConfig.getCodec(), directory, segmentInfo, infoStream);

    if (segmentInfo.getIndexSort() == null) {
      storedFieldsConsumer =
          new StoredFieldsConsumer(indexWriterConfig.getCodec(), directory, segmentInfo);
      termVectorsWriter =
          new TermVectorsConsumer(
              intBlockAllocator,
              byteBlockAllocator,
              directory,
              segmentInfo,
              indexWriterConfig.getCodec());
    } else {
      storedFieldsConsumer =
          new SortingStoredFieldsConsumer(indexWriterConfig.getCodec(), directory, segmentInfo);
      termVectorsWriter =
          new SortingTermVectorsConsumer(
              intBlockAllocator,
              byteBlockAllocator,
              directory,
              segmentInfo,
              indexWriterConfig.getCodec());
    }
    termsHash =
        new FreqProxTermsWriter(
            intBlockAllocator, byteBlockAllocator, bytesUsed, termVectorsWriter);
    docValuesBytePool = new ByteBlockPool(byteBlockAllocator);
  }

  private void onAbortingException(Throwable th) {
    assert th != null;
    this.hasHitAbortingException = true;
    abortingExceptionConsumer.accept(th);
  }

  private LeafReader getDocValuesLeafReader() {
    return new DocValuesLeafReader() {
      @Override
      public NumericDocValues getNumericDocValues(String field) {
        PerField pf = getPerField(field);
        if (pf == null) {
          return null;
        }
        if (pf.fieldInfo.getDocValuesType() == DocValuesType.NUMERIC) {
          return (NumericDocValues) pf.docValuesWriter.getDocValues();
        }
        return null;
      }

      @Override
      public BinaryDocValues getBinaryDocValues(String field) {
        PerField pf = getPerField(field);
        if (pf == null) {
          return null;
        }
        if (pf.fieldInfo.getDocValuesType() == DocValuesType.BINARY) {
          return (BinaryDocValues) pf.docValuesWriter.getDocValues();
        }
        return null;
      }

      @Override
      public SortedDocValues getSortedDocValues(String field) throws IOException {
        PerField pf = getPerField(field);
        if (pf == null) {
          return null;
        }
        if (pf.fieldInfo.getDocValuesType() == DocValuesType.SORTED) {
          return (SortedDocValues) pf.docValuesWriter.getDocValues();
        }
        return null;
      }

      @Override
      public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
        PerField pf = getPerField(field);
        if (pf == null) {
          return null;
        }
        if (pf.fieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC) {
          return (SortedNumericDocValues) pf.docValuesWriter.getDocValues();
        }
        return null;
      }

      @Override
      public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
        PerField pf = getPerField(field);
        if (pf == null) {
          return null;
        }
        if (pf.fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) {
          return (SortedSetDocValues) pf.docValuesWriter.getDocValues();
        }
        return null;
      }

      @Override
      public FieldInfos getFieldInfos() {
        return fieldInfos.finish();
      }
    };
  }

  private Sorter.DocMap maybeSortSegment(SegmentWriteState state) throws IOException {
    Sort indexSort = state.segmentInfo.getIndexSort();
    if (indexSort == null) {
      return null;
    }

    LeafReader docValuesReader = getDocValuesLeafReader();
    Function comparatorWrapper =
        Function.identity();

    if (state.segmentInfo.getHasBlocks() && state.fieldInfos.getParentField() != null) {
      final DocIdSetIterator readerValues =
          docValuesReader.getNumericDocValues(state.fieldInfos.getParentField());
      if (readerValues == null) {
        throw new CorruptIndexException(
            "missing doc values for parent field \"" + state.fieldInfos.getParentField() + "\"",
            "IndexingChain");
      }
      BitSet parents = BitSet.of(readerValues, state.segmentInfo.maxDoc());
      comparatorWrapper =
          in ->
              (docID1, docID2) ->
                  in.compare(parents.nextSetBit(docID1), parents.nextSetBit(docID2));
    }
    if (state.segmentInfo.getHasBlocks()
        && state.fieldInfos.getParentField() == null
        && indexCreatedVersionMajor >= Version.LUCENE_10_0_0.major) {
      throw new CorruptIndexException(
          "parent field is not set but the index has blocks and uses index sorting. indexCreatedVersionMajor: "
              + indexCreatedVersionMajor,
          "IndexingChain");
    }
    List comparators = new ArrayList<>();
    for (int i = 0; i < indexSort.getSort().length; i++) {
      SortField sortField = indexSort.getSort()[i];
      IndexSorter sorter = sortField.getIndexSorter();
      if (sorter == null) {
        throw new UnsupportedOperationException("Cannot sort index using sort field " + sortField);
      }

      IndexSorter.DocComparator docComparator =
          sorter.getDocComparator(docValuesReader, state.segmentInfo.maxDoc());
      comparators.add(comparatorWrapper.apply(docComparator));
    }
    Sorter sorter = new Sorter(indexSort);
    // returns null if the documents are already sorted
    return sorter.sort(
        state.segmentInfo.maxDoc(), comparators.toArray(IndexSorter.DocComparator[]::new));
  }

  Sorter.DocMap flush(SegmentWriteState state) throws IOException {

    // NOTE: caller (DocumentsWriterPerThread) handles
    // aborting on any exception from this method
    Sorter.DocMap sortMap = maybeSortSegment(state);
    int maxDoc = state.segmentInfo.maxDoc();
    long t0 = System.nanoTime();
    writeNorms(state, sortMap);
    if (infoStream.isEnabled("IW")) {
      infoStream.message(
          "IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write norms");
    }
    SegmentReadState readState =
        new SegmentReadState(
            state.directory,
            state.segmentInfo,
            state.fieldInfos,
            IOContext.DEFAULT,
            state.segmentSuffix);

    t0 = System.nanoTime();
    writeDocValues(state, sortMap);
    if (infoStream.isEnabled("IW")) {
      infoStream.message(
          "IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write docValues");
    }

    t0 = System.nanoTime();
    writePoints(state, sortMap);
    if (infoStream.isEnabled("IW")) {
      infoStream.message(
          "IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write points");
    }

    t0 = System.nanoTime();
    vectorValuesConsumer.flush(state, sortMap);
    if (infoStream.isEnabled("IW")) {
      infoStream.message(
          "IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write vectors");
    }

    // it's possible all docs hit non-aborting exceptions...
    t0 = System.nanoTime();
    storedFieldsConsumer.finish(maxDoc);
    storedFieldsConsumer.flush(state, sortMap);
    if (infoStream.isEnabled("IW")) {
      infoStream.message(
          "IW",
          TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to finish stored fields");
    }

    t0 = System.nanoTime();
    Map fieldsToFlush = new HashMap<>();
    for (int i = 0; i < fieldHash.length; i++) {
      PerField perField = fieldHash[i];
      while (perField != null) {
        if (perField.invertState != null) {
          fieldsToFlush.put(perField.fieldInfo.name, perField.termsHashPerField);
        }
        perField = perField.next;
      }
    }

    try (NormsProducer norms =
        readState.fieldInfos.hasNorms()
            ? state.segmentInfo.getCodec().normsFormat().normsProducer(readState)
            : null) {
      NormsProducer normsMergeInstance = null;
      if (norms != null) {
        // Use the merge instance in order to reuse the same IndexInput for all terms
        normsMergeInstance = norms.getMergeInstance();
      }
      termsHash.flush(fieldsToFlush, state, sortMap, normsMergeInstance);
    }
    if (infoStream.isEnabled("IW")) {
      infoStream.message(
          "IW",
          TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0)
              + " ms to write postings and finish vectors");
    }

    // Important to save after asking consumer to flush so
    // consumer can alter the FieldInfo* if necessary.  EG,
    // FreqProxTermsWriter does this with
    // FieldInfo.storePayload.
    t0 = System.nanoTime();
    indexWriterConfig
        .getCodec()
        .fieldInfosFormat()
        .write(state.directory, state.segmentInfo, "", state.fieldInfos, IOContext.DEFAULT);
    if (infoStream.isEnabled("IW")) {
      infoStream.message(
          "IW", TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0) + " ms to write fieldInfos");
    }

    return sortMap;
  }

  /** Writes all buffered points. */
  private void writePoints(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
    PointsWriter pointsWriter = null;
    boolean success = false;
    try {
      for (int i = 0; i < fieldHash.length; i++) {
        PerField perField = fieldHash[i];
        while (perField != null) {
          if (perField.pointValuesWriter != null) {
            // We could have initialized pointValuesWriter, but failed to write even a single doc
            if (perField.fieldInfo.getPointDimensionCount() > 0) {
              if (pointsWriter == null) {
                // lazy init
                PointsFormat fmt = state.segmentInfo.getCodec().pointsFormat();
                if (fmt == null) {
                  throw new IllegalStateException(
                      "field=\""
                          + perField.fieldInfo.name
                          + "\" was indexed as points but codec does not support points");
                }
                pointsWriter = fmt.fieldsWriter(state);
              }
              perField.pointValuesWriter.flush(state, sortMap, pointsWriter);
            }
            perField.pointValuesWriter = null;
          }
          perField = perField.next;
        }
      }
      if (pointsWriter != null) {
        pointsWriter.finish();
      }
      success = true;
    } finally {
      if (success) {
        IOUtils.close(pointsWriter);
      } else {
        IOUtils.closeWhileHandlingException(pointsWriter);
      }
    }
  }

  /** Writes all buffered doc values (called from {@link #flush}). */
  private void writeDocValues(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
    DocValuesConsumer dvConsumer = null;
    boolean success = false;
    try {
      for (int i = 0; i < fieldHash.length; i++) {
        PerField perField = fieldHash[i];
        while (perField != null) {
          if (perField.docValuesWriter != null) {
            if (perField.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
              // BUG
              throw new AssertionError(
                  "segment="
                      + state.segmentInfo
                      + ": field=\""
                      + perField.fieldInfo.name
                      + "\" has no docValues but wrote them");
            }
            if (dvConsumer == null) {
              // lazy init
              DocValuesFormat fmt = state.segmentInfo.getCodec().docValuesFormat();
              dvConsumer = fmt.fieldsConsumer(state);
            }
            perField.docValuesWriter.flush(state, sortMap, dvConsumer);
            perField.docValuesWriter = null;
          } else if (perField.fieldInfo != null
              && perField.fieldInfo.getDocValuesType() != DocValuesType.NONE) {
            // BUG
            throw new AssertionError(
                "segment="
                    + state.segmentInfo
                    + ": field=\""
                    + perField.fieldInfo.name
                    + "\" has docValues but did not write them");
          }
          perField = perField.next;
        }
      }

      // TODO: catch missing DV fields here?  else we have
      // null/"" depending on how docs landed in segments?
      // but we can't detect all cases, and we should leave
      // this behavior undefined. dv is not "schemaless": it's column-stride.
      success = true;
    } finally {
      if (success) {
        IOUtils.close(dvConsumer);
      } else {
        IOUtils.closeWhileHandlingException(dvConsumer);
      }
    }

    if (state.fieldInfos.hasDocValues() == false) {
      if (dvConsumer != null) {
        // BUG
        throw new AssertionError(
            "segment=" + state.segmentInfo + ": fieldInfos has no docValues but wrote them");
      }
    } else if (dvConsumer == null) {
      // BUG
      throw new AssertionError(
          "segment=" + state.segmentInfo + ": fieldInfos has docValues but did not wrote them");
    }
  }

  private void writeNorms(SegmentWriteState state, Sorter.DocMap sortMap) throws IOException {
    boolean success = false;
    NormsConsumer normsConsumer = null;
    try {
      if (state.fieldInfos.hasNorms()) {
        NormsFormat normsFormat = state.segmentInfo.getCodec().normsFormat();
        assert normsFormat != null;
        normsConsumer = normsFormat.normsConsumer(state);

        for (FieldInfo fi : state.fieldInfos) {
          PerField perField = getPerField(fi.name);
          assert perField != null;

          // we must check the final value of omitNorms for the fieldinfo: it could have
          // changed for this field since the first time we added it.
          if (fi.omitsNorms() == false && fi.getIndexOptions() != IndexOptions.NONE) {
            assert perField.norms != null : "field=" + fi.name;
            perField.norms.finish(state.segmentInfo.maxDoc());
            perField.norms.flush(state, sortMap, normsConsumer);
          }
        }
      }
      success = true;
    } finally {
      if (success) {
        IOUtils.close(normsConsumer);
      } else {
        IOUtils.closeWhileHandlingException(normsConsumer);
      }
    }
  }

  @SuppressWarnings("try")
  void abort() throws IOException {
    // finalizer will e.g. close any open files in the term vectors writer:
    try (Closeable finalizer = termsHash::abort) {
      storedFieldsConsumer.abort();
      vectorValuesConsumer.abort();
    } finally {
      Arrays.fill(fieldHash, null);
    }
  }

  private void rehash() {
    int newHashSize = (fieldHash.length * 2);
    assert newHashSize > fieldHash.length;

    PerField[] newHashArray = new PerField[newHashSize];

    // Rehash
    int newHashMask = newHashSize - 1;
    for (int j = 0; j < fieldHash.length; j++) {
      PerField fp0 = fieldHash[j];
      while (fp0 != null) {
        final int hashPos2 = fp0.fieldName.hashCode() & newHashMask;
        PerField nextFP0 = fp0.next;
        fp0.next = newHashArray[hashPos2];
        newHashArray[hashPos2] = fp0;
        fp0 = nextFP0;
      }
    }

    fieldHash = newHashArray;
    hashMask = newHashMask;
  }

  /** Calls StoredFieldsWriter.startDocument, aborting the segment if it hits any exception. */
  private void startStoredFields(int docID) throws IOException {
    try {
      storedFieldsConsumer.startDocument(docID);
    } catch (Throwable th) {
      onAbortingException(th);
      throw th;
    }
  }

  /** Calls StoredFieldsWriter.finishDocument, aborting the segment if it hits any exception. */
  private void finishStoredFields() throws IOException {
    try {
      storedFieldsConsumer.finishDocument();
    } catch (Throwable th) {
      onAbortingException(th);
      throw th;
    }
  }

  void processDocument(int docID, Iterable document) throws IOException {
    // number of unique fields by names (collapses multiple field instances by the same name)
    int fieldCount = 0;
    int indexedFieldCount = 0; // number of unique fields indexed with postings
    long fieldGen = nextFieldGen++;
    int docFieldIdx = 0;

    // NOTE: we need two passes here, in case there are
    // multi-valued fields, because we must process all
    // instances of a given field at once, since the
    // analyzer is free to reuse TokenStream across fields
    // (i.e., we cannot have more than one TokenStream
    // running "at once"):
    termsHash.startDocument();
    startStoredFields(docID);
    try {
      // 1st pass over doc fields – verify that doc schema matches the index schema
      // build schema for each unique doc field
      for (IndexableField field : document) {
        IndexableFieldType fieldType = field.fieldType();
        final boolean isReserved = field.getClass() == ReservedField.class;
        PerField pf =
            getOrAddPerField(
                field.name(), false
                /* we never add reserved fields during indexing should be done during DWPT setup*/ );
        if (pf.reserved != isReserved) {
          throw new IllegalArgumentException(
              "\""
                  + field.name()
                  + "\" is a reserved field and should not be added to any document");
        }
        if (pf.fieldGen != fieldGen) { // first time we see this field in this document
          fields[fieldCount++] = pf;
          pf.fieldGen = fieldGen;
          pf.reset(docID);
        }
        if (docFieldIdx >= docFields.length) oversizeDocFields();
        docFields[docFieldIdx++] = pf;
        updateDocFieldSchema(field.name(), pf.schema, fieldType);
      }
      // For each field, if it's the first time we see this field in this segment,
      // initialize its FieldInfo.
      // If we have already seen this field, verify that its schema
      // within the current doc matches its schema in the index.
      for (int i = 0; i < fieldCount; i++) {
        PerField pf = fields[i];
        if (pf.fieldInfo == null) {
          initializeFieldInfo(pf);
        } else {
          pf.schema.assertSameSchema(pf.fieldInfo);
        }
      }

      // 2nd pass over doc fields – index each field
      // also count the number of unique fields indexed with postings
      docFieldIdx = 0;
      for (IndexableField field : document) {
        if (processField(docID, field, docFields[docFieldIdx])) {
          fields[indexedFieldCount] = docFields[docFieldIdx];
          indexedFieldCount++;
        }
        docFieldIdx++;
      }
    } finally {
      if (hasHitAbortingException == false) {
        // Finish each indexed field name seen in the document:
        for (int i = 0; i < indexedFieldCount; i++) {
          fields[i].finish(docID);
        }
        finishStoredFields();
        // TODO: for broken docs, optimize termsHash.finishDocument
        try {
          termsHash.finishDocument(docID);
        } catch (Throwable th) {
          // Must abort, on the possibility that on-disk term
          // vectors are now corrupt:
          abortingExceptionConsumer.accept(th);
          throw th;
        }
      }
    }
  }

  private void oversizeDocFields() {
    PerField[] newDocFields =
        new PerField
            [ArrayUtil.oversize(docFields.length + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
    System.arraycopy(docFields, 0, newDocFields, 0, docFields.length);
    docFields = newDocFields;
  }

  private void initializeFieldInfo(PerField pf) throws IOException {
    // Create and add a new fieldInfo to fieldInfos for this segment.
    // During the creation of FieldInfo there is also verification of the correctness of all its
    // parameters.

    // If the fieldInfo doesn't exist in globalFieldNumbers for the whole index,
    // it will be added there.
    // If the field already exists in globalFieldNumbers (i.e. field present in other segments),
    // we check consistency of its schema with schema for the whole index.
    FieldSchema s = pf.schema;
    if (indexWriterConfig.getIndexSort() != null && s.docValuesType != DocValuesType.NONE) {
      final Sort indexSort = indexWriterConfig.getIndexSort();
      validateIndexSortDVType(indexSort, pf.fieldName, s.docValuesType);
    }
    if (s.vectorDimension != 0) {
      validateMaxVectorDimension(
          pf.fieldName,
          s.vectorDimension,
          indexWriterConfig.getCodec().knnVectorsFormat().getMaxDimensions(pf.fieldName));
    }
    FieldInfo fi =
        fieldInfos.add(
            new FieldInfo(
                pf.fieldName,
                -1,
                s.storeTermVector,
                s.omitNorms,
                // storePayloads is set up during indexing, if payloads were seen
                false,
                s.indexOptions,
                s.docValuesType,
                s.docValuesSkipIndex,
                -1,
                s.attributes,
                s.pointDimensionCount,
                s.pointIndexDimensionCount,
                s.pointNumBytes,
                s.vectorDimension,
                s.vectorEncoding,
                s.vectorSimilarityFunction,
                pf.fieldName.equals(fieldInfos.getSoftDeletesFieldName()),
                pf.fieldName.equals(fieldInfos.getParentFieldName())));
    pf.setFieldInfo(fi);
    if (fi.getIndexOptions() != IndexOptions.NONE) {
      pf.setInvertState();
    }
    DocValuesType dvType = fi.getDocValuesType();
    switch (dvType) {
      case NONE:
        break;
      case NUMERIC:
        pf.docValuesWriter = new NumericDocValuesWriter(fi, bytesUsed);
        break;
      case BINARY:
        pf.docValuesWriter = new BinaryDocValuesWriter(fi, bytesUsed);
        break;
      case SORTED:
        pf.docValuesWriter = new SortedDocValuesWriter(fi, bytesUsed, docValuesBytePool);
        break;
      case SORTED_NUMERIC:
        pf.docValuesWriter = new SortedNumericDocValuesWriter(fi, bytesUsed);
        break;
      case SORTED_SET:
        pf.docValuesWriter = new SortedSetDocValuesWriter(fi, bytesUsed, docValuesBytePool);
        break;
      default:
        throw new AssertionError("unrecognized DocValues.Type: " + dvType);
    }
    if (fi.getPointDimensionCount() != 0) {
      pf.pointValuesWriter = new PointValuesWriter(bytesUsed, fi);
    }
    if (fi.getVectorDimension() != 0) {
      try {
        pf.knnFieldVectorsWriter = vectorValuesConsumer.addField(fi);
      } catch (Throwable th) {
        onAbortingException(th);
        throw th;
      }
    }
  }

  /** Index each field Returns {@code true}, if we are indexing a unique field with postings */
  private boolean processField(int docID, IndexableField field, PerField pf) throws IOException {
    IndexableFieldType fieldType = field.fieldType();
    boolean indexedField = false;

    // Invert indexed fields
    if (fieldType.indexOptions() != IndexOptions.NONE) {
      if (pf.first) { // first time we see this field in this doc
        pf.invert(docID, field, true);
        pf.first = false;
        indexedField = true;
      } else {
        pf.invert(docID, field, false);
      }
    }

    // Add stored fields
    if (fieldType.stored()) {
      StoredValue storedValue = field.storedValue();
      if (storedValue == null) {
        throw new IllegalArgumentException("Cannot store a null value");
      } else if (storedValue.getType() == StoredValue.Type.STRING
          && storedValue.getStringValue().length() > IndexWriter.MAX_STORED_STRING_LENGTH) {
        throw new IllegalArgumentException(
            "stored field \""
                + field.name()
                + "\" is too large ("
                + storedValue.getStringValue().length()
                + " characters) to store");
      }
      try {
        storedFieldsConsumer.writeField(pf.fieldInfo, storedValue);
      } catch (Throwable th) {
        onAbortingException(th);
        throw th;
      }
    }

    DocValuesType dvType = fieldType.docValuesType();
    if (dvType != DocValuesType.NONE) {
      indexDocValue(docID, pf, dvType, field);
    }
    if (fieldType.pointDimensionCount() != 0) {
      pf.pointValuesWriter.addPackedValue(docID, field.binaryValue());
    }
    if (fieldType.vectorDimension() != 0) {
      indexVectorValue(docID, pf, fieldType.vectorEncoding(), field);
    }
    return indexedField;
  }

  /**
   * Returns a previously created {@link PerField}, absorbing the type information from {@link
   * FieldType}, and creates a new {@link PerField} if this field name wasn't seen yet.
   */
  private PerField getOrAddPerField(String fieldName, boolean reserved) {
    final int hashPos = fieldName.hashCode() & hashMask;
    PerField pf = fieldHash[hashPos];
    while (pf != null && pf.fieldName.equals(fieldName) == false) {
      pf = pf.next;
    }
    if (pf == null) {
      // first time we encounter field with this name in this segment
      FieldSchema schema = new FieldSchema(fieldName);
      pf =
          new PerField(
              fieldName,
              indexCreatedVersionMajor,
              schema,
              indexWriterConfig.getSimilarity(),
              indexWriterConfig.getInfoStream(),
              indexWriterConfig.getAnalyzer(),
              reserved);
      pf.next = fieldHash[hashPos];
      fieldHash[hashPos] = pf;
      totalFieldCount++;
      // At most 50% load factor:
      if (totalFieldCount >= fieldHash.length / 2) {
        rehash();
      }
      if (totalFieldCount > fields.length) {
        PerField[] newFields =
            new PerField
                [ArrayUtil.oversize(totalFieldCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
        System.arraycopy(fields, 0, newFields, 0, fields.length);
        fields = newFields;
      }
    }
    return pf;
  }

  // update schema for field as seen in a particular document
  private static void updateDocFieldSchema(
      String fieldName, FieldSchema schema, IndexableFieldType fieldType) {
    if (fieldType.indexOptions() != IndexOptions.NONE) {
      schema.setIndexOptions(
          fieldType.indexOptions(), fieldType.omitNorms(), fieldType.storeTermVectors());
    } else {
      // TODO: should this be checked when a fieldType is created?
      verifyUnIndexedFieldType(fieldName, fieldType);
    }
    if (fieldType.docValuesType() != DocValuesType.NONE) {
      schema.setDocValues(fieldType.docValuesType(), fieldType.docValuesSkipIndexType());
    } else if (fieldType.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
      throw new IllegalArgumentException(
          "field '"
              + schema.name
              + "' cannot have docValuesSkipIndexType="
              + fieldType.docValuesSkipIndexType()
              + " without doc values");
    }
    if (fieldType.pointDimensionCount() != 0) {
      schema.setPoints(
          fieldType.pointDimensionCount(),
          fieldType.pointIndexDimensionCount(),
          fieldType.pointNumBytes());
    }
    if (fieldType.vectorDimension() != 0) {
      schema.setVectors(
          fieldType.vectorEncoding(),
          fieldType.vectorSimilarityFunction(),
          fieldType.vectorDimension());
    }
    if (fieldType.getAttributes() != null && fieldType.getAttributes().isEmpty() == false) {
      schema.updateAttributes(fieldType.getAttributes());
    }
  }

  private static void verifyUnIndexedFieldType(String name, IndexableFieldType ft) {
    if (ft.storeTermVectors()) {
      throw new IllegalArgumentException(
          "cannot store term vectors "
              + "for a field that is not indexed (field=\""
              + name
              + "\")");
    }
    if (ft.storeTermVectorPositions()) {
      throw new IllegalArgumentException(
          "cannot store term vector positions "
              + "for a field that is not indexed (field=\""
              + name
              + "\")");
    }
    if (ft.storeTermVectorOffsets()) {
      throw new IllegalArgumentException(
          "cannot store term vector offsets "
              + "for a field that is not indexed (field=\""
              + name
              + "\")");
    }
    if (ft.storeTermVectorPayloads()) {
      throw new IllegalArgumentException(
          "cannot store term vector payloads "
              + "for a field that is not indexed (field=\""
              + name
              + "\")");
    }
  }

  private static void validateMaxVectorDimension(
      String fieldName, int vectorDim, int maxVectorDim) {
    if (vectorDim > maxVectorDim) {
      throw new IllegalArgumentException(
          "Field ["
              + fieldName
              + "] vector's dimensions must be <= ["
              + maxVectorDim
              + "]; got "
              + vectorDim);
    }
  }

  private void validateIndexSortDVType(Sort indexSort, String fieldToValidate, DocValuesType dvType)
      throws IOException {
    for (SortField sortField : indexSort.getSort()) {
      IndexSorter sorter = sortField.getIndexSorter();
      if (sorter == null) {
        throw new IllegalStateException("Cannot sort index with sort order " + sortField);
      }
      sorter.getDocComparator(
          new DocValuesLeafReader() {
            @Override
            public NumericDocValues getNumericDocValues(String field) {
              if (Objects.equals(field, fieldToValidate) && dvType != DocValuesType.NUMERIC) {
                throw new IllegalArgumentException(
                    "SortField "
                        + sortField
                        + " expected field ["
                        + field
                        + "] to be NUMERIC but it is ["
                        + dvType
                        + "]");
              }
              return DocValues.emptyNumeric();
            }

            @Override
            public BinaryDocValues getBinaryDocValues(String field) {
              if (Objects.equals(field, fieldToValidate) && dvType != DocValuesType.BINARY) {
                throw new IllegalArgumentException(
                    "SortField "
                        + sortField
                        + " expected field ["
                        + field
                        + "] to be BINARY but it is ["
                        + dvType
                        + "]");
              }
              return DocValues.emptyBinary();
            }

            @Override
            public SortedDocValues getSortedDocValues(String field) {
              if (Objects.equals(field, fieldToValidate) && dvType != DocValuesType.SORTED) {
                throw new IllegalArgumentException(
                    "SortField "
                        + sortField
                        + " expected field ["
                        + field
                        + "] to be SORTED but it is ["
                        + dvType
                        + "]");
              }
              return DocValues.emptySorted();
            }

            @Override
            public SortedNumericDocValues getSortedNumericDocValues(String field) {
              if (Objects.equals(field, fieldToValidate)
                  && dvType != DocValuesType.SORTED_NUMERIC) {
                throw new IllegalArgumentException(
                    "SortField "
                        + sortField
                        + " expected field ["
                        + field
                        + "] to be SORTED_NUMERIC but it is ["
                        + dvType
                        + "]");
              }
              return DocValues.emptySortedNumeric();
            }

            @Override
            public SortedSetDocValues getSortedSetDocValues(String field) {
              if (Objects.equals(field, fieldToValidate) && dvType != DocValuesType.SORTED_SET) {
                throw new IllegalArgumentException(
                    "SortField "
                        + sortField
                        + " expected field ["
                        + field
                        + "] to be SORTED_SET but it is ["
                        + dvType
                        + "]");
              }
              return DocValues.emptySortedSet();
            }

            @Override
            public FieldInfos getFieldInfos() {
              throw new UnsupportedOperationException();
            }
          },
          0);
    }
  }

  /** Called from processDocument to index one field's doc value */
  private void indexDocValue(int docID, PerField fp, DocValuesType dvType, IndexableField field) {
    switch (dvType) {
      case NUMERIC:
        if (field.numericValue() == null) {
          throw new IllegalArgumentException(
              "field=\"" + fp.fieldInfo.name + "\": null value not allowed");
        }
        ((NumericDocValuesWriter) fp.docValuesWriter)
            .addValue(docID, field.numericValue().longValue());
        break;

      case BINARY:
        ((BinaryDocValuesWriter) fp.docValuesWriter).addValue(docID, field.binaryValue());
        break;

      case SORTED:
        ((SortedDocValuesWriter) fp.docValuesWriter).addValue(docID, field.binaryValue());
        break;

      case SORTED_NUMERIC:
        ((SortedNumericDocValuesWriter) fp.docValuesWriter)
            .addValue(docID, field.numericValue().longValue());
        break;

      case SORTED_SET:
        ((SortedSetDocValuesWriter) fp.docValuesWriter).addValue(docID, field.binaryValue());
        break;

      case NONE:
      default:
        throw new AssertionError("unrecognized DocValues.Type: " + dvType);
    }
  }

  @SuppressWarnings("unchecked")
  private void indexVectorValue(
      int docID, PerField pf, VectorEncoding vectorEncoding, IndexableField field)
      throws IOException {
    switch (vectorEncoding) {
      case BYTE ->
          ((KnnFieldVectorsWriter) pf.knnFieldVectorsWriter)
              .addValue(docID, ((KnnByteVectorField) field).vectorValue());
      case FLOAT32 ->
          ((KnnFieldVectorsWriter) pf.knnFieldVectorsWriter)
              .addValue(docID, ((KnnFloatVectorField) field).vectorValue());
    }
  }

  /** Returns a previously created {@link PerField}, or null if this field name wasn't seen yet. */
  private PerField getPerField(String name) {
    final int hashPos = name.hashCode() & hashMask;
    PerField fp = fieldHash[hashPos];
    while (fp != null && !fp.fieldName.equals(name)) {
      fp = fp.next;
    }
    return fp;
  }

  @Override
  public long ramBytesUsed() {
    return bytesUsed.get()
        + storedFieldsConsumer.accountable.ramBytesUsed()
        + termVectorsWriter.accountable.ramBytesUsed()
        + vectorValuesConsumer.getAccountable().ramBytesUsed();
  }

  @Override
  public Collection getChildResources() {
    return List.of(
        storedFieldsConsumer.accountable,
        termVectorsWriter.accountable,
        vectorValuesConsumer.getAccountable());
  }

  /** NOTE: not static: accesses at least docState, termsHash. */
  private final class PerField implements Comparable {
    final String fieldName;
    final int indexCreatedVersionMajor;
    final FieldSchema schema;
    final boolean reserved;
    FieldInfo fieldInfo;
    final Similarity similarity;

    FieldInvertState invertState;
    TermsHashPerField termsHashPerField;

    // Non-null if this field ever had doc values in this
    // segment:
    DocValuesWriter docValuesWriter;

    // Non-null if this field ever had points in this segment:
    PointValuesWriter pointValuesWriter;

    // Non-null if this field had vectors in this segment
    KnnFieldVectorsWriter knnFieldVectorsWriter;

    /** We use this to know when a PerField is seen for the first time in the current document. */
    long fieldGen = -1;

    // Used by the hash table
    PerField next;

    // Lazy init'd:
    NormValuesWriter norms;

    // reused
    TokenStream tokenStream;
    private final InfoStream infoStream;
    private final Analyzer analyzer;
    private boolean first; // first in a document

    PerField(
        String fieldName,
        int indexCreatedVersionMajor,
        FieldSchema schema,
        Similarity similarity,
        InfoStream infoStream,
        Analyzer analyzer,
        boolean reserved) {
      this.fieldName = fieldName;
      this.indexCreatedVersionMajor = indexCreatedVersionMajor;
      this.schema = schema;
      this.similarity = similarity;
      this.infoStream = infoStream;
      this.analyzer = analyzer;
      this.reserved = reserved;
    }

    void reset(int docId) {
      first = true;
      schema.reset(docId);
    }

    void setFieldInfo(FieldInfo fieldInfo) {
      assert this.fieldInfo == null;
      this.fieldInfo = fieldInfo;
    }

    void setInvertState() {
      invertState =
          new FieldInvertState(
              indexCreatedVersionMajor, fieldInfo.name, fieldInfo.getIndexOptions());
      termsHashPerField = termsHash.addField(invertState, fieldInfo);
      if (fieldInfo.omitsNorms() == false) {
        assert norms == null;
        // Even if no documents actually succeed in setting a norm, we still write norms for this
        // segment
        norms = new NormValuesWriter(fieldInfo, bytesUsed);
      }
      if (fieldInfo.hasTermVectors()) {
        termVectorsWriter.setHasVectors();
      }
    }

    @Override
    public int compareTo(PerField other) {
      return this.fieldName.compareTo(other.fieldName);
    }

    public void finish(int docID) throws IOException {
      if (fieldInfo.omitsNorms() == false) {
        long normValue;
        if (invertState.length == 0) {
          // the field exists in this document, but it did not have
          // any indexed tokens, so we assign a default value of zero
          // to the norm
          normValue = 0;
        } else {
          normValue = similarity.computeNorm(invertState);
          if (normValue == 0) {
            throw new IllegalStateException(
                "Similarity " + similarity + " return 0 for non-empty field");
          }
        }
        norms.addValue(docID, normValue);
      }
      termsHashPerField.finish();
    }

    /**
     * Inverts one field for one document; first is true if this is the first time we are seeing
     * this field name in this document.
     */
    public void invert(int docID, IndexableField field, boolean first) throws IOException {
      assert field.fieldType().indexOptions().compareTo(IndexOptions.DOCS) >= 0;

      if (first) {
        // First time we're seeing this field (indexed) in this document
        invertState.reset();
      }

      switch (field.invertableType()) {
        case BINARY:
          invertTerm(docID, field, first);
          break;
        case TOKEN_STREAM:
          invertTokenStream(docID, field, first);
          break;
        default:
          throw new AssertionError();
      }
    }

    private void invertTokenStream(int docID, IndexableField field, boolean first)
        throws IOException {
      final boolean analyzed = field.fieldType().tokenized() && analyzer != null;
      /*
       * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
       * when we fail. We expect some caller to eventually deal with the real exception, so we don't want any 'catch' clauses,
       * but rather a finally that takes note of the problem.
       */
      boolean succeededInProcessingField = false;
      try (TokenStream stream = tokenStream = field.tokenStream(analyzer, tokenStream)) {
        // reset the TokenStream to the first token
        stream.reset();
        invertState.setAttributeSource(stream);
        termsHashPerField.start(field, first);

        while (stream.incrementToken()) {

          // If we hit an exception in stream.next below
          // (which is fairly common, e.g. if analyzer
          // chokes on a given document), then it's
          // non-aborting and (above) this one document
          // will be marked as deleted, but still
          // consume a docID

          int posIncr = invertState.posIncrAttribute.getPositionIncrement();
          invertState.position += posIncr;
          if (invertState.position < invertState.lastPosition) {
            if (posIncr == 0) {
              throw new IllegalArgumentException(
                  "first position increment must be > 0 (got 0) for field '" + field.name() + "'");
            } else if (posIncr < 0) {
              throw new IllegalArgumentException(
                  "position increment must be >= 0 (got "
                      + posIncr
                      + ") for field '"
                      + field.name()
                      + "'");
            } else {
              throw new IllegalArgumentException(
                  "position overflowed Integer.MAX_VALUE (got posIncr="
                      + posIncr
                      + " lastPosition="
                      + invertState.lastPosition
                      + " position="
                      + invertState.position
                      + ") for field '"
                      + field.name()
                      + "'");
            }
          } else if (invertState.position > IndexWriter.MAX_POSITION) {
            throw new IllegalArgumentException(
                "position "
                    + invertState.position
                    + " is too large for field '"
                    + field.name()
                    + "': max allowed position is "
                    + IndexWriter.MAX_POSITION);
          }
          invertState.lastPosition = invertState.position;
          if (posIncr == 0) {
            invertState.numOverlap++;
          }

          int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
          int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
          if (startOffset < invertState.lastStartOffset || endOffset < startOffset) {
            throw new IllegalArgumentException(
                "startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
                    + "startOffset="
                    + startOffset
                    + ",endOffset="
                    + endOffset
                    + ",lastStartOffset="
                    + invertState.lastStartOffset
                    + " for field '"
                    + field.name()
                    + "'");
          }
          invertState.lastStartOffset = startOffset;

          try {
            invertState.length =
                Math.addExact(invertState.length, invertState.termFreqAttribute.getTermFrequency());
          } catch (ArithmeticException ae) {
            throw new IllegalArgumentException(
                "too many tokens for field \"" + field.name() + "\"", ae);
          }

          // System.out.println("  term=" + invertState.termAttribute);

          // If we hit an exception in here, we abort
          // all buffered documents since the last
          // flush, on the likelihood that the
          // internal state of the terms hash is now
          // corrupt and should not be flushed to a
          // new segment:
          try {
            termsHashPerField.add(invertState.termAttribute.getBytesRef(), docID);
          } catch (MaxBytesLengthExceededException e) {
            byte[] prefix = new byte[30];
            BytesRef bigTerm = invertState.termAttribute.getBytesRef();
            System.arraycopy(bigTerm.bytes, bigTerm.offset, prefix, 0, 30);
            String msg =
                "Document contains at least one immense term in field=\""
                    + fieldInfo.name
                    + "\" (whose UTF8 encoding is longer than the max length "
                    + IndexWriter.MAX_TERM_LENGTH
                    + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '"
                    + Arrays.toString(prefix)
                    + "...', original message: "
                    + e.getMessage();
            if (infoStream.isEnabled("IW")) {
              infoStream.message("IW", "ERROR: " + msg);
            }
            // Document will be deleted above:
            throw new IllegalArgumentException(msg, e);
          } catch (Throwable th) {
            onAbortingException(th);
            throw th;
          }
        }

        // trigger streams to perform end-of-stream operations
        stream.end();

        // TODO: maybe add some safety? then again, it's already checked
        // when we come back around to the field...
        invertState.position += invertState.posIncrAttribute.getPositionIncrement();
        invertState.offset += invertState.offsetAttribute.endOffset();

        /* if there is an exception coming through, we won't set this to true here:*/
        succeededInProcessingField = true;
      } finally {
        if (!succeededInProcessingField && infoStream.isEnabled("DW")) {
          infoStream.message(
              "DW", "An exception was thrown while processing field " + fieldInfo.name);
        }
      }

      if (analyzed) {
        invertState.position += analyzer.getPositionIncrementGap(fieldInfo.name);
        invertState.offset += analyzer.getOffsetGap(fieldInfo.name);
      }
    }

    private void invertTerm(int docID, IndexableField field, boolean first) throws IOException {
      BytesRef binaryValue = field.binaryValue();
      if (binaryValue == null) {
        throw new IllegalArgumentException(
            "Field "
                + field.name()
                + " returns TERM for invertableType() and null for binaryValue(), which is illegal");
      }
      final IndexableFieldType fieldType = field.fieldType();
      if (fieldType.tokenized()
          || fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) > 0
          || fieldType.storeTermVectorPositions()
          || fieldType.storeTermVectorOffsets()
          || fieldType.storeTermVectorPayloads()) {
        throw new IllegalArgumentException(
            "Fields that are tokenized or index proximity data must produce a non-null TokenStream, but "
                + field.name()
                + " did not");
      }
      invertState.setAttributeSource(null);
      invertState.position++;
      invertState.length++;
      termsHashPerField.start(field, first);
      invertState.length = Math.addExact(invertState.length, 1);
      try {
        termsHashPerField.add(binaryValue, docID);
      } catch (MaxBytesLengthExceededException e) {
        byte[] prefix = new byte[30];
        System.arraycopy(binaryValue.bytes, binaryValue.offset, prefix, 0, 30);
        String msg =
            "Document contains at least one immense term in field=\""
                + fieldInfo.name
                + "\" (whose length is longer than the max length "
                + IndexWriter.MAX_TERM_LENGTH
                + "), all of which were skipped. The prefix of the first immense term is: '"
                + Arrays.toString(prefix)
                + "...'";
        if (infoStream.isEnabled("IW")) {
          infoStream.message("IW", "ERROR: " + msg);
        }
        throw new IllegalArgumentException(msg, e);
      }
    }
  }

  DocIdSetIterator getHasDocValues(String field) {
    PerField perField = getPerField(field);
    if (perField != null) {
      if (perField.docValuesWriter != null) {
        if (perField.fieldInfo.getDocValuesType() == DocValuesType.NONE) {
          return null;
        }

        return perField.docValuesWriter.getDocValues();
      }
    }
    return null;
  }

  private static class IntBlockAllocator extends IntBlockPool.Allocator {
    private final Counter bytesUsed;

    IntBlockAllocator(Counter bytesUsed) {
      super(IntBlockPool.INT_BLOCK_SIZE);
      this.bytesUsed = bytesUsed;
    }

    /* Allocate another int[] from the shared pool */
    @Override
    public int[] getIntBlock() {
      int[] b = new int[IntBlockPool.INT_BLOCK_SIZE];
      bytesUsed.addAndGet(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES);
      return b;
    }

    @Override
    public void recycleIntBlocks(int[][] blocks, int offset, int length) {
      bytesUsed.addAndGet(-(length * (IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)));
    }
  }

  /**
   * A schema of the field in the current document. With every new document this schema is reset. As
   * the document fields are processed, we update the schema with options encountered in this
   * document. Once the processing for the document is done, we compare the built schema of the
   * current document with the corresponding FieldInfo (FieldInfo is built on a first document in
   * the segment where we encounter this field). If there is inconsistency, we raise an error. This
   * ensures that a field has the same data structures across all documents.
   */
  private static final class FieldSchema {
    private final String name;
    private int docID = 0;
    private final Map attributes = new HashMap<>();
    private boolean omitNorms = false;
    private boolean storeTermVector = false;
    private IndexOptions indexOptions = IndexOptions.NONE;
    private DocValuesType docValuesType = DocValuesType.NONE;
    private DocValuesSkipIndexType docValuesSkipIndex = DocValuesSkipIndexType.NONE;
    private int pointDimensionCount = 0;
    private int pointIndexDimensionCount = 0;
    private int pointNumBytes = 0;
    private int vectorDimension = 0;
    private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
    private VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;

    private static final String errMsg =
        "Inconsistency of field data structures across documents for field ";

    FieldSchema(String name) {
      this.name = name;
    }

    private void assertSame(String label, boolean expected, boolean given) {
      if (expected != given) {
        raiseNotSame(label, expected, given);
      }
    }

    private void assertSame(String label, int expected, int given) {
      if (expected != given) {
        raiseNotSame(label, expected, given);
      }
    }

    private > void assertSame(String label, T expected, T given) {
      if (expected != given) {
        raiseNotSame(label, expected, given);
      }
    }

    private void raiseNotSame(String label, Object expected, Object given) {
      throw new IllegalArgumentException(
          errMsg
              + "["
              + name
              + "] of doc ["
              + docID
              + "]. "
              + label
              + ": expected '"
              + expected
              + "', but it has '"
              + given
              + "'.");
    }

    void updateAttributes(Map attrs) {
      attrs.forEach((k, v) -> this.attributes.put(k, v));
    }

    void setIndexOptions(
        IndexOptions newIndexOptions, boolean newOmitNorms, boolean newStoreTermVector) {
      if (indexOptions == IndexOptions.NONE) {
        indexOptions = newIndexOptions;
        omitNorms = newOmitNorms;
        storeTermVector = newStoreTermVector;
      } else {
        assertSame("index options", indexOptions, newIndexOptions);
        assertSame("omit norms", omitNorms, newOmitNorms);
        assertSame("store term vector", storeTermVector, newStoreTermVector);
      }
    }

    void setDocValues(
        DocValuesType newDocValuesType, DocValuesSkipIndexType newDocValuesSkipIndex) {
      if (docValuesType == DocValuesType.NONE) {
        this.docValuesType = newDocValuesType;
        this.docValuesSkipIndex = newDocValuesSkipIndex;
      } else {
        assertSame("doc values type", docValuesType, newDocValuesType);
        assertSame("doc values skip index type", docValuesSkipIndex, newDocValuesSkipIndex);
      }
    }

    void setPoints(int dimensionCount, int indexDimensionCount, int numBytes) {
      if (pointIndexDimensionCount == 0) {
        pointDimensionCount = dimensionCount;
        pointIndexDimensionCount = indexDimensionCount;
        pointNumBytes = numBytes;
      } else {
        assertSame("point dimension", pointDimensionCount, dimensionCount);
        assertSame("point index dimension", pointIndexDimensionCount, indexDimensionCount);
        assertSame("point num bytes", pointNumBytes, numBytes);
      }
    }

    void setVectors(
        VectorEncoding encoding, VectorSimilarityFunction similarityFunction, int dimension) {
      if (vectorDimension == 0) {
        this.vectorEncoding = encoding;
        this.vectorSimilarityFunction = similarityFunction;
        this.vectorDimension = dimension;
      } else {
        assertSame("vector encoding", vectorEncoding, encoding);
        assertSame("vector similarity function", vectorSimilarityFunction, similarityFunction);
        assertSame("vector dimension", vectorDimension, dimension);
      }
    }

    void reset(int doc) {
      docID = doc;
      omitNorms = false;
      storeTermVector = false;
      indexOptions = IndexOptions.NONE;
      docValuesType = DocValuesType.NONE;
      pointDimensionCount = 0;
      pointIndexDimensionCount = 0;
      pointNumBytes = 0;
      vectorDimension = 0;
      vectorEncoding = VectorEncoding.FLOAT32;
      vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
    }

    void assertSameSchema(FieldInfo fi) {
      assertSame("index options", fi.getIndexOptions(), indexOptions);
      assertSame("omit norms", fi.omitsNorms(), omitNorms);
      assertSame("store term vector", fi.hasTermVectors(), storeTermVector);
      assertSame("doc values type", fi.getDocValuesType(), docValuesType);
      assertSame("doc values skip index type", fi.docValuesSkipIndexType(), docValuesSkipIndex);
      assertSame(
          "vector similarity function", fi.getVectorSimilarityFunction(), vectorSimilarityFunction);
      assertSame("vector encoding", fi.getVectorEncoding(), vectorEncoding);
      assertSame("vector dimension", fi.getVectorDimension(), vectorDimension);
      assertSame("point dimension", fi.getPointDimensionCount(), pointDimensionCount);
      assertSame(
          "point index dimension", fi.getPointIndexDimensionCount(), pointIndexDimensionCount);
      assertSame("point num bytes", fi.getPointNumBytes(), pointNumBytes);
    }
  }

  /**
   * Wraps the given field in a reserved field and registers it as reserved. Only DWPT should do
   * this to mark fields as private / reserved to prevent this fieldname to be used from the outside
   * of the IW / DWPT eco-system
   */
   ReservedField markAsReserved(T field) {
    getOrAddPerField(field.name(), true);
    return new ReservedField(field);
  }

  static final class ReservedField implements IndexableField {

    private final T delegate;

    private ReservedField(T delegate) {
      this.delegate = delegate;
    }

    T getDelegate() {
      return delegate;
    }

    @Override
    public String name() {
      return delegate.name();
    }

    @Override
    public IndexableFieldType fieldType() {
      return delegate.fieldType();
    }

    @Override
    public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) {
      return delegate.tokenStream(analyzer, reuse);
    }

    @Override
    public BytesRef binaryValue() {
      return delegate.binaryValue();
    }

    @Override
    public String stringValue() {
      return delegate.stringValue();
    }

    @Override
    public CharSequence getCharSequenceValue() {
      return delegate.getCharSequenceValue();
    }

    @Override
    public Reader readerValue() {
      return delegate.readerValue();
    }

    @Override
    public Number numericValue() {
      return delegate.numericValue();
    }

    @Override
    public StoredValue storedValue() {
      return delegate.storedValue();
    }

    @Override
    public InvertableType invertableType() {
      return delegate.invertableType();
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy