All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.ReadersAndUpdates Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;


import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.LiveDocsFormat;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.MutableBits;

// Used by IndexWriter to hold open SegmentReaders (for
// searching or merging), plus pending deletes and updates,
// for a given segment
class ReadersAndUpdates {
  // Not final because we replace (clone) when we need to
  // change it and it's been shared:
  public final SegmentCommitInfo info;

  // Tracks how many consumers are using this instance:
  private final AtomicInteger refCount = new AtomicInteger(1);

  private final IndexWriter writer;

  // Set once (null, and then maybe set, and never set again):
  private SegmentReader reader;

  // Holds the current shared (readable and writable)
  // liveDocs.  This is null when there are no deleted
  // docs, and it's copy-on-write (cloned whenever we need
  // to change it but it's been shared to an external NRT
  // reader).
  private Bits liveDocs;

  // How many further deletions we've done against
  // liveDocs vs when we loaded it or last wrote it:
  private int pendingDeleteCount;

  // True if the current liveDocs is referenced by an
  // external NRT reader:
  private boolean liveDocsShared;

  // Indicates whether this segment is currently being merged. While a segment
  // is merging, all field updates are also registered in the
  // mergingNumericUpdates map. Also, calls to writeFieldUpdates merge the 
  // updates with mergingNumericUpdates.
  // That way, when the segment is done merging, IndexWriter can apply the
  // updates on the merged segment too.
  private boolean isMerging = false;
  
  private final Map mergingDVUpdates = new HashMap<>();
  
  public ReadersAndUpdates(IndexWriter writer, SegmentCommitInfo info) {
    this.writer = writer;
    this.info = info;
    liveDocsShared = true;
  }

  /** Init from a previously opened SegmentReader.
   *
   * 

NOTE: steals incoming ref from reader. */ public ReadersAndUpdates(IndexWriter writer, SegmentReader reader) { this.writer = writer; this.reader = reader; info = reader.getSegmentInfo(); liveDocs = reader.getLiveDocs(); liveDocsShared = true; pendingDeleteCount = reader.numDeletedDocs() - info.getDelCount(); assert pendingDeleteCount >= 0: "got " + pendingDeleteCount + " reader.numDeletedDocs()=" + reader.numDeletedDocs() + " info.getDelCount()=" + info.getDelCount() + " maxDoc=" + reader.maxDoc() + " numDocs=" + reader.numDocs(); } public void incRef() { final int rc = refCount.incrementAndGet(); assert rc > 1; } public void decRef() { final int rc = refCount.decrementAndGet(); assert rc >= 0; } public int refCount() { final int rc = refCount.get(); assert rc >= 0; return rc; } public synchronized int getPendingDeleteCount() { return pendingDeleteCount; } // Call only from assert! public synchronized boolean verifyDocCounts() { int count; if (liveDocs != null) { count = 0; for(int docID=0;docID updates, Directory dir, DocValuesFormat dvFormat, final SegmentReader reader, Map> fieldFiles) throws IOException { for (Entry e : updates.entrySet()) { final String field = e.getKey(); final NumericDocValuesFieldUpdates fieldUpdates = e.getValue(); final long nextDocValuesGen = info.getNextDocValuesGen(); final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX); final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc(); final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize)); final FieldInfo fieldInfo = infos.fieldInfo(field); assert fieldInfo != null; fieldInfo.setDocValuesGen(nextDocValuesGen); final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo }); // separately also track which files were created for this gen final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix); try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) { // write the numeric updates to a new gen'd docvalues file fieldsConsumer.addNumericField(fieldInfo, new Iterable() { final NumericDocValues currentValues = reader.getNumericDocValues(field); final Bits docsWithField = reader.getDocsWithField(field); final int maxDoc = reader.maxDoc(); final NumericDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator(); @Override public Iterator iterator() { updatesIter.reset(); return new Iterator() { int curDoc = -1; int updateDoc = updatesIter.nextDoc(); @Override public boolean hasNext() { return curDoc < maxDoc - 1; } @Override public Number next() { if (++curDoc >= maxDoc) { throw new NoSuchElementException("no more documents to return values for"); } if (curDoc == updateDoc) { // this document has an updated value Long value = updatesIter.value(); // either null (unset value) or updated value updateDoc = updatesIter.nextDoc(); // prepare for next round return value; } else { // no update for this document assert curDoc < updateDoc; if (currentValues != null && docsWithField.get(curDoc)) { // only read the current value if the document had a value before return currentValues.get(curDoc); } else { return null; } } } @Override public void remove() { throw new UnsupportedOperationException("this iterator does not support removing elements"); } }; } }); } info.advanceDocValuesGen(); assert !fieldFiles.containsKey(fieldInfo.number); fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles()); } } @SuppressWarnings("synthetic-access") private void handleBinaryDVUpdates(FieldInfos infos, Map updates, TrackingDirectoryWrapper dir, DocValuesFormat dvFormat, final SegmentReader reader, Map> fieldFiles) throws IOException { for (Entry e : updates.entrySet()) { final String field = e.getKey(); final BinaryDocValuesFieldUpdates fieldUpdates = e.getValue(); final long nextDocValuesGen = info.getNextDocValuesGen(); final String segmentSuffix = Long.toString(nextDocValuesGen, Character.MAX_RADIX); final long estUpdatesSize = fieldUpdates.ramBytesPerDoc() * info.info.maxDoc(); final IOContext updatesContext = new IOContext(new FlushInfo(info.info.maxDoc(), estUpdatesSize)); final FieldInfo fieldInfo = infos.fieldInfo(field); assert fieldInfo != null; fieldInfo.setDocValuesGen(nextDocValuesGen); final FieldInfos fieldInfos = new FieldInfos(new FieldInfo[] { fieldInfo }); // separately also track which files were created for this gen final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix); try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) { // write the binary updates to a new gen'd docvalues file fieldsConsumer.addBinaryField(fieldInfo, new Iterable() { final BinaryDocValues currentValues = reader.getBinaryDocValues(field); final Bits docsWithField = reader.getDocsWithField(field); final int maxDoc = reader.maxDoc(); final BinaryDocValuesFieldUpdates.Iterator updatesIter = fieldUpdates.iterator(); @Override public Iterator iterator() { updatesIter.reset(); return new Iterator() { int curDoc = -1; int updateDoc = updatesIter.nextDoc(); @Override public boolean hasNext() { return curDoc < maxDoc - 1; } @Override public BytesRef next() { if (++curDoc >= maxDoc) { throw new NoSuchElementException("no more documents to return values for"); } if (curDoc == updateDoc) { // this document has an updated value BytesRef value = updatesIter.value(); // either null (unset value) or updated value updateDoc = updatesIter.nextDoc(); // prepare for next round return value; } else { // no update for this document assert curDoc < updateDoc; if (currentValues != null && docsWithField.get(curDoc)) { // only read the current value if the document had a value before return currentValues.get(curDoc); } else { return null; } } } @Override public void remove() { throw new UnsupportedOperationException("this iterator does not support removing elements"); } }; } }); } info.advanceDocValuesGen(); assert !fieldFiles.containsKey(fieldInfo.number); fieldFiles.put(fieldInfo.number, trackingDir.getCreatedFiles()); } } private Set writeFieldInfosGen(FieldInfos fieldInfos, Directory dir, DocValuesFormat dvFormat, FieldInfosFormat infosFormat) throws IOException { final long nextFieldInfosGen = info.getNextFieldInfosGen(); final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX); // we write approximately that many bytes (based on Lucene46DVF): // HEADER + FOOTER: 40 // 90 bytes per-field (over estimating long name and attributes map) final long estInfosSize = 40 + 90 * fieldInfos.size(); final IOContext infosContext = new IOContext(new FlushInfo(info.info.maxDoc(), estInfosSize)); // separately also track which files were created for this gen final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); infosFormat.write(trackingDir, info.info, segmentSuffix, fieldInfos, infosContext); info.advanceFieldInfosGen(); return trackingDir.getCreatedFiles(); } // Writes field updates (new _X_N updates files) to the directory public synchronized void writeFieldUpdates(Directory dir, DocValuesFieldUpdates.Container dvUpdates) throws IOException { assert Thread.holdsLock(writer); //System.out.println("rld.writeFieldUpdates: seg=" + info + " numericFieldUpdates=" + numericFieldUpdates); assert dvUpdates.any(); // Do this so we can delete any created files on // exception; this saves all codecs from having to do // it: TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir); final Map> newDVFiles = new HashMap<>(); Set fieldInfosFiles = null; FieldInfos fieldInfos = null; boolean success = false; try { final Codec codec = info.info.getCodec(); // reader could be null e.g. for a just merged segment (from // IndexWriter.commitMergedDeletes). final SegmentReader reader = this.reader == null ? new SegmentReader(info, IOContext.READONCE) : this.reader; try { // clone FieldInfos so that we can update their dvGen separately from // the reader's infos and write them to a new fieldInfos_gen file FieldInfos.Builder builder = new FieldInfos.Builder(writer.globalFieldNumberMap); // cannot use builder.add(reader.getFieldInfos()) because it does not // clone FI.attributes as well FI.dvGen for (FieldInfo fi : reader.getFieldInfos()) { FieldInfo clone = builder.add(fi); // copy the stuff FieldInfos.Builder doesn't copy for (Entry e : fi.attributes().entrySet()) { clone.putAttribute(e.getKey(), e.getValue()); } clone.setDocValuesGen(fi.getDocValuesGen()); } // create new fields or update existing ones to have NumericDV type for (String f : dvUpdates.numericDVUpdates.keySet()) { FieldInfo fieldInfo = builder.getOrAdd(f); fieldInfo.setDocValuesType(DocValuesType.NUMERIC); } // create new fields or update existing ones to have BinaryDV type for (String f : dvUpdates.binaryDVUpdates.keySet()) { FieldInfo fieldInfo = builder.getOrAdd(f); fieldInfo.setDocValuesType(DocValuesType.BINARY); } fieldInfos = builder.finish(); final DocValuesFormat docValuesFormat = codec.docValuesFormat(); // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeFieldUpdates: applying numeric updates; seg=" + info + " updates=" + numericFieldUpdates); handleNumericDVUpdates(fieldInfos, dvUpdates.numericDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: applying binary updates; seg=" + info + " updates=" + dvUpdates.binaryDVUpdates); handleBinaryDVUpdates(fieldInfos, dvUpdates.binaryDVUpdates, trackingDir, docValuesFormat, reader, newDVFiles); // System.out.println("[" + Thread.currentThread().getName() + "] RAU.writeFieldUpdates: write fieldInfos; seg=" + info); fieldInfosFiles = writeFieldInfosGen(fieldInfos, trackingDir, docValuesFormat, codec.fieldInfosFormat()); } finally { if (reader != this.reader) { // System.out.println("[" + Thread.currentThread().getName() + "] RLD.writeLiveDocs: closeReader " + reader); reader.close(); } } success = true; } finally { if (!success) { // Advance only the nextWriteFieldInfosGen and nextWriteDocValuesGen, so // that a 2nd attempt to write will write to a new file info.advanceNextWriteFieldInfosGen(); info.advanceNextWriteDocValuesGen(); // Delete any partially created file(s): for (String fileName : trackingDir.getCreatedFiles()) { IOUtils.deleteFilesIgnoringExceptions(dir, fileName); } } } // copy all the updates to mergingUpdates, so they can later be applied to the merged segment if (isMerging) { for (Entry e : dvUpdates.numericDVUpdates.entrySet()) { DocValuesFieldUpdates updates = mergingDVUpdates.get(e.getKey()); if (updates == null) { mergingDVUpdates.put(e.getKey(), e.getValue()); } else { updates.merge(e.getValue()); } } for (Entry e : dvUpdates.binaryDVUpdates.entrySet()) { DocValuesFieldUpdates updates = mergingDVUpdates.get(e.getKey()); if (updates == null) { mergingDVUpdates.put(e.getKey(), e.getValue()); } else { updates.merge(e.getValue()); } } } // writing field updates succeeded assert fieldInfosFiles != null; info.setFieldInfosFiles(fieldInfosFiles); // update the doc-values updates files. the files map each field to its set // of files, hence we copy from the existing map all fields w/ updates that // were not updated in this session, and add new mappings for fields that // were updated now. assert !newDVFiles.isEmpty(); for (Entry> e : info.getDocValuesUpdatesFiles().entrySet()) { if (!newDVFiles.containsKey(e.getKey())) { newDVFiles.put(e.getKey(), e.getValue()); } } info.setDocValuesUpdatesFiles(newDVFiles); // wrote new files, should checkpoint() writer.checkpoint(); // if there is a reader open, reopen it to reflect the updates if (reader != null) { SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.info.maxDoc() - info.getDelCount() - pendingDeleteCount); boolean reopened = false; try { reader.decRef(); reader = newReader; reopened = true; } finally { if (!reopened) { newReader.decRef(); } } } } /** * Returns a reader for merge. This method applies field updates if there are * any and marks that this segment is currently merging. */ synchronized SegmentReader getReaderForMerge(IOContext context) throws IOException { assert Thread.holdsLock(writer); // must execute these two statements as atomic operation, otherwise we // could lose updates if e.g. another thread calls writeFieldUpdates in // between, or the updates are applied to the obtained reader, but then // re-applied in IW.commitMergedDeletes (unnecessary work and potential // bugs). isMerging = true; return getReader(context); } /** * Drops all merging updates. Called from IndexWriter after this segment * finished merging (whether successfully or not). */ public synchronized void dropMergingUpdates() { mergingDVUpdates.clear(); isMerging = false; } /** Returns updates that came in while this segment was merging. */ public synchronized Map getMergingFieldUpdates() { return mergingDVUpdates; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("ReadersAndLiveDocs(seg=").append(info); sb.append(" pendingDeleteCount=").append(pendingDeleteCount); sb.append(" liveDocsShared=").append(liveDocsShared); return sb.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy