org.apache.lucene.index.ReadersAndUpdates Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of lucene-core Show documentation
Apache Lucene Java Core
There is a newer version: 9.11.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.MutableBits;

// Used by IndexWriter to hold open SegmentReaders (for
// searching or merging), plus pending deletes and updates,
// for a given segment
class ReadersAndUpdates {
  // Not final because we replace (clone) when we need to
  // change it and it's been shared:
  public final SegmentCommitInfo info;

  // Tracks how many consumers are using this instance:
  private final AtomicInteger refCount = new AtomicInteger(1);

  private final IndexWriter writer;

  // Set once (null, and then maybe set, and never set again):
  private SegmentReader reader;

  // Holds the current shared (readable and writable)
  // liveDocs.  This is null when there are no deleted
  // docs, and it's copy-on-write (cloned whenever we need
  // to change it but it's been shared to an external NRT
  // reader).
  private Bits liveDocs;

  // How many further deletions we've done against
  // liveDocs vs when we loaded it or last wrote it:
  private int pendingDeleteCount;

  // True if the current liveDocs is referenced by an
  // external NRT reader:
  private boolean liveDocsShared;

  // Indicates whether this segment is currently being merged. While a segment
  // is merging, all field updates are also registered in the
  // mergingNumericUpdates map. Also, calls to writeFieldUpdates merge the 
  // updates with mergingNumericUpdates.
  // That way, when the segment is done merging, IndexWriter can apply the
  // updates on the merged segment too.
  private boolean isMerging = false;
  
  private final Map mergingDVUpdates = new HashMap<>();
  
  public ReadersAndUpdates(IndexWriter writer, SegmentCommitInfo info) {
    this.writer = writer;
    this.info = info;
    liveDocsShared = true;
  }

  /** Init from a previously opened SegmentReader.
   *
   * NOTE: steals incoming ref from reader. */
  public ReadersAndUpdates(IndexWriter writer, SegmentReader reader) {
    this.writer = writer;
    this.reader = reader;
    info = reader.getSegmentInfo();
    liveDocs = reader.getLiveDocs();
    liveDocsShared = true;
    pendingDeleteCount = reader.numDeletedDocs() - info.getDelCount();
    assert pendingDeleteCount >= 0: "got " + pendingDeleteCount + " reader.numDeletedDocs()=" + reader.numDeletedDocs() + " info.getDelCount()=" + info.getDelCount() + " maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs();
  }

  public void incRef() {
    final int rc = refCount.incrementAndGet();
    assert rc > 1;
  }

  public void decRef() {
    final int rc = refCount.decrementAndGet();
    assert rc >= 0;
  }

  public int refCount() {
    final int rc = refCount.get();
    assert rc >= 0;
    return rc;
  }

  public synchronized int getPendingDeleteCount() {
    return pendingDeleteCount;
  }
  
  // Call only from assert!
  public synchronized boolean verifyDocCounts() {
    int count;
    if (liveDocs != null) {
      count = 0;
      for(int docID=0;docID updates,
      Directory dir, DocValuesFormat dvFormat, final SegmentReader reader, Map> fieldFiles) throws IOException {
    for (Entry e : updates.entrySet()) {
      final String field = e.getKey();
      final NumericDocValuesFieldUpdates fieldUpdates = e.getValue();

      final long nextDocValuesGen = info.getNextDocValuesGen();
      final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
      final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc();
      final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize));
      final FieldInfo fieldInfo = infos.fieldInfo(field);
      assert fieldInfo != null;
      fieldInfo.setDocValuesGen(nextDocValuesGen);
      final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
      // separately also track which files were created for this gen
      final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
      final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
      try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
        // write the numeric updates to a new gen'd docvalues file
        fieldsConsumer.addNumericField(fieldInfo, new Iterable() {
          final NumericDocValues currentValues = reader.getNumericDocValues(field);
          final Bits docsWithField = reader.getDocsWithField(field);
          final int maxDoc = reader.maxDoc();
          final NumericDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
          @Override
          public Iterator iterator() {
            updatesIter.reset();
            return new Iterator() {

              int curDoc = -1;
              int updateDoc = updatesIter.nextDoc();
              
              @Override
              public boolean hasNext() {
                return curDoc < maxDoc - 1;
              }

              @Override
              public Number next() {
                if (++curDoc >= maxDoc) {
                  throw new NoSuchElementException("no more documents to return values for");
                }
                if (curDoc == updateDoc) { // this document has an updated value
                  Long value = updatesIter.value(); // either null (unset value) or updated value
                  updateDoc = updatesIter.nextDoc(); // prepare for next round
                  return value;
                } else {
                  // no update for this document
                  assert curDoc < updateDoc;
                  if (currentValues != null && docsWithField.get(curDoc)) {
                    // only read the current value if the document had a value before
                    return currentValues.get(curDoc);
                  } else {
                    return null;
                  }
                }
              }

              @Override
              public void remove() {
                throw new UnsupportedOperationException("this iterator does not support removing elements");
              }
            };
          }
        });
      }
      info.advanceDocValuesGen();
      assert !fieldFiles.containsKey(fieldInfo.number);
      fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
    }
  }

  @SuppressWarnings("synthetic-access")
  private void handleBinaryDVUpdates(FieldInfos infos, Map updates, 
      TrackingDirectoryWrapper dir, DocValuesFormat dvFormat, final SegmentReader reader, Map> fieldFiles) throws IOException {
    for (Entry e : updates.entrySet()) {
      final String field = e.getKey();
      final BinaryDocValuesFieldUpdates fieldUpdates = e.getValue();

      final long nextDocValuesGen = info.getNextDocValuesGen();
      final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX);
      final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc();
      final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize));
      final FieldInfo fieldInfo = infos.fieldInfo(field);
      assert fieldInfo != null;
      fieldInfo.setDocValuesGen(nextDocValuesGen);
      final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo });
      // separately also track which files were created for this gen
      final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
      final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
      try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
        // write the binary updates to a new gen'd docvalues file
        fieldsConsumer.addBinaryField(fieldInfo, new Iterable() {
          final BinaryDocValues currentValues = reader.getBinaryDocValues(field);
          final Bits docsWithField = reader.getDocsWithField(field);
          final int maxDoc = reader.maxDoc();
          final BinaryDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator();
          @Override
          public Iterator iterator() {
            updatesIter.reset();
            return new Iterator() {
              
              int curDoc = -1;
              int updateDoc = updatesIter.nextDoc();
              
              @Override
              public boolean hasNext() {
                return curDoc < maxDoc - 1;
              }
              
              @Override
              public BytesRef next() {
                if (++curDoc >= maxDoc) {
                  throw new NoSuchElementException("no more documents to return values for");
                }
                if (curDoc == updateDoc) { // this document has an updated value
                  BytesRef value = updatesIter.value(); // either null (unset value) or updated value
                  updateDoc = updatesIter.nextDoc(); // prepare for next round
                  return value;
                } else {
                  // no update for this document
                  assert curDoc < updateDoc;
                  if (currentValues != null && docsWithField.get(curDoc)) {
                    // only read the current value if the document had a value before
                    return currentValues.get(curDoc);
                  } else {
                    return null;
                  }
                }
              }
              
              @Override
              public void remove() {
                throw new UnsupportedOperationException("this iterator does not support removing elements");
              }
            };
          }
        });
      }
      info.advanceDocValuesGen();
      assert !fieldFiles.containsKey(fieldInfo.number);
      fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles());
    }
  }
  
  private Set writeFieldInfosGen(FieldInfos fieldInfos, Directory dir, DocValuesFormat dvFormat, 
      FieldInfosFormat infosFormat) throws IOException {
    final long nextFieldInfosGen = info.getNextFieldInfosGen();
    final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
    // we write approximately that many bytes (based on Lucene46DVF):
    // HEADER + FOOTER: 40
    // 90 bytes per-field (over estimating long name and attributes map)
    final long estInfosSize = 40 + 90 * fieldInfos.size();
    final IOContext infosContext = new IOContext(new FlushInfo(info.info.maxDoc(), estInfosSize));
    // separately also track which files were created for this gen
    final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
    infosFormat.write(trackingDir, info.info, segmentSuffix, fieldInfos, infosContext);
    info.advanceFieldInfosGen();
    return trackingDir.getCreatedFiles();
  }

  // Writes field updates (new _X_N updates files) to the directory
  public synchronized void writeFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) throws IOException {
    assert Thread.holdsLock(writer);
    //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates);
    
    assert dvUpdates.any();
    
    // Do this so we can delete any created files on
    // exception; this saves all codecs from having to do
    // it:
    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
    
    final Map> newDVFiles = new HashMap<>();
    Set fieldInfosFiles = null;
    FieldInfos fieldInfos = null;
    boolean success = false;
    try {
      final Codec codec = info.info.getCodec();

      // reader could be null e.g. for a just merged segment (from
      // IndexWriter.commitMergedDeletes).
      final SegmentReader reader = this.reader == null ? new SegmentReader(info, IOContext.READONCE) : this.reader;
      try {
        // clone FieldInfos so that we can update their dvGen separately from
        // the reader's infos and write them to a new fieldInfos_gen file
        FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap);
        // cannot use builder.add(reader.getFieldInfos()) because it does not
        // clone FI.attributes as well FI.dvGen
        for (FieldInfo fi : reader.getFieldInfos()) {
          FieldInfo clone = builder.add(fi);
          // copy the stuff FieldInfos.Builder doesn't copy
          for (Entry e : fi.attributes().entrySet()) {
            clone.putAttribute(e.getKey(), e.getValue());
          }
          clone.setDocValuesGen(fi.getDocValuesGen());
        }
        // create new fields or update existing ones to have NumericDV type
        for (String f : dvUpdates.numericDVUpdates.keySet()) {
          FieldInfo fieldInfo = builder.getOrAdd(f);
          fieldInfo.setDocValuesType(DocValuesType.NUMERIC);
        }
        // create new fields or update existing ones to have BinaryDV type
        for (String f : dvUpdates.binaryDVUpdates.keySet()) {
          FieldInfo fieldInfo = builder.getOrAdd(f);
          fieldInfo.setDocValuesType(DocValuesType.BINARY);
        }
        
        fieldInfos = builder.finish();
        final DocValuesFormat docValuesFormat = codec.docValuesFormat();
        
//          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates);
        handleNumericDVUpdates(fieldInfos, dvUpdates.numericDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles);
        
//        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates);
        handleBinaryDVUpdates(fieldInfos, dvUpdates.binaryDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles);

//        System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: write fieldInfos; seg=" + info);
        fieldInfosFiles = writeFieldInfosGen(fieldInfos, trackingDir, docValuesFormat, codec.fieldInfosFormat());
      } finally {
        if (reader != this.reader) {
//          System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader);
          reader.close();
        }
      }
    
      success = true;
    } finally {
      if (!success) {
        // Advance only the nextWriteFieldInfosGen and nextWriteDocValuesGen, so
        // that a 2nd attempt to write will write to a new file
        info.advanceNextWriteFieldInfosGen();
        info.advanceNextWriteDocValuesGen();
        
        // Delete any partially created file(s):
        for (String fileName : trackingDir.getCreatedFiles()) {
          IOUtils.deleteFilesIgnoringExceptions(dir, fileName);
        }
      }
    }
    
    // copy all the updates to mergingUpdates, so they can later be applied to the merged segment
    if (isMerging) {
      for (Entry e : dvUpdates.numericDVUpdates.entrySet()) {
        DocValuesFieldUpdates updates = mergingDVUpdates.get(e.getKey());
        if (updates == null) {
          mergingDVUpdates.put(e.getKey(), e.getValue());
        } else {
          updates.merge(e.getValue());
        }
      }
      for (Entry e : dvUpdates.binaryDVUpdates.entrySet()) {
        DocValuesFieldUpdates updates = mergingDVUpdates.get(e.getKey());
        if (updates == null) {
          mergingDVUpdates.put(e.getKey(), e.getValue());
        } else {
          updates.merge(e.getValue());
        }
      }
    }
    
    // writing field updates succeeded
    assert fieldInfosFiles != null;
    info.setFieldInfosFiles(fieldInfosFiles);
    
    // update the doc-values updates files. the files map each field to its set
    // of files, hence we copy from the existing map all fields w/ updates that
    // were not updated in this session, and add new mappings for fields that
    // were updated now.
    assert !newDVFiles.isEmpty();
    for (Entry> e : info.getDocValuesUpdatesFiles().entrySet()) {
      if (!newDVFiles.containsKey(e.getKey())) {
        newDVFiles.put(e.getKey(), e.getValue());
      }
    }
    info.setDocValuesUpdatesFiles(newDVFiles);
    
    // wrote new files, should checkpoint()
    writer.checkpoint();

    // if there is a reader open, reopen it to reflect the updates
    if (reader != null) {
      SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.info.maxDoc() - info.getDelCount() - pendingDeleteCount);
      boolean reopened = false;
      try {
        reader.decRef();
        reader = newReader;
        reopened = true;
      } finally {
        if (!reopened) {
          newReader.decRef();
        }
      }
    }
  }

  /**
   * Returns a reader for merge. This method applies field updates if there are
   * any and marks that this segment is currently merging.
   */
  synchronized SegmentReader getReaderForMerge(IOContext context) throws IOException {
    assert Thread.holdsLock(writer);
    // must execute these two statements as atomic operation, otherwise we
    // could lose updates if e.g. another thread calls writeFieldUpdates in
    // between, or the updates are applied to the obtained reader, but then
    // re-applied in IW.commitMergedDeletes (unnecessary work and potential
    // bugs).
    isMerging = true;
    return getReader(context);
  }
  
  /**
   * Drops all merging updates. Called from IndexWriter after this segment
   * finished merging (whether successfully or not).
   */
  public synchronized void dropMergingUpdates() {
    mergingDVUpdates.clear();
    isMerging = false;
  }
  
  /** Returns updates that came in while this segment was merging. */
  public synchronized Map getMergingFieldUpdates() {
    return mergingDVUpdates;
  }
  
  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append("ReadersAndLiveDocs(seg=").append(info);
    sb.append(" pendingDeleteCount=").append(pendingDeleteCount);
    sb.append(" liveDocsShared=").append(liveDocsShared);
    return sb.toString();
  }
  
}