All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.LiveIndexWriterConfig Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.util.Collections;
import java.util.Comparator;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.Version;

/**
 * Holds all the configuration used by {@link IndexWriter} with few setters for settings that can be
 * changed on an {@link IndexWriter} instance "live".
 *
 * @since 4.0
 */
public class LiveIndexWriterConfig {

  private final Analyzer analyzer;

  private volatile int maxBufferedDocs;
  private volatile double ramBufferSizeMB;
  private volatile IndexReaderWarmer mergedSegmentWarmer;

  // modified by IndexWriterConfig
  /** {@link IndexDeletionPolicy} controlling when commit points are deleted. */
  protected volatile IndexDeletionPolicy delPolicy;

  /** {@link IndexCommit} that {@link IndexWriter} is opened on. */
  protected volatile IndexCommit commit;

  /** {@link OpenMode} that {@link IndexWriter} is opened with. */
  protected volatile OpenMode openMode;

  /** Compatibility version to use for this index. */
  protected int createdVersionMajor = Version.LATEST.major;

  /** {@link Similarity} to use when encoding norms. */
  protected volatile Similarity similarity;

  /** {@link MergeScheduler} to use for running merges. */
  protected volatile MergeScheduler mergeScheduler;

  /** {@link Codec} used to write new segments. */
  protected volatile Codec codec;

  /** {@link InfoStream} for debugging messages. */
  protected volatile InfoStream infoStream;

  /** {@link MergePolicy} for selecting merges. */
  protected volatile MergePolicy mergePolicy;

  /** True if readers should be pooled. */
  protected volatile boolean readerPooling;

  /** {@link FlushPolicy} to control when segments are flushed. */
  protected volatile FlushPolicy flushPolicy;

  /**
   * Sets the hard upper bound on RAM usage for a single segment, after which the segment is forced
   * to flush.
   */
  protected volatile int perThreadHardLimitMB;

  /** True if segment flushes should use compound file format */
  protected volatile boolean useCompoundFile;

  /** True if calls to {@link IndexWriter#close()} should first do a commit. */
  protected boolean commitOnClose = IndexWriterConfig.DEFAULT_COMMIT_ON_CLOSE;

  /** The sort order to use to write merged segments. */
  protected Sort indexSort = null;

  /** The comparator for sorting leaf readers. */
  protected Comparator leafSorter;

  /** The field names involved in the index sort */
  protected Set indexSortFields = Collections.emptySet();

  /** parent document field */
  protected String parentField = null;

  /**
   * if an indexing thread should check for pending flushes on update in order to help out on a full
   * flush
   */
  protected volatile boolean checkPendingFlushOnUpdate = true;

  /** soft deletes field */
  protected String softDeletesField = null;

  /** Amount of time to wait for merges returned by MergePolicy.findFullFlushMerges(...) */
  protected volatile long maxFullFlushMergeWaitMillis;

  /** The IndexWriter event listener to record key events * */
  protected IndexWriterEventListener eventListener;

  // used by IndexWriterConfig
  LiveIndexWriterConfig(Analyzer analyzer) {
    this.analyzer = analyzer;
    ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;
    maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;
    mergedSegmentWarmer = null;
    delPolicy = new KeepOnlyLastCommitDeletionPolicy();
    commit = null;
    useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
    openMode = OpenMode.CREATE_OR_APPEND;
    similarity = IndexSearcher.getDefaultSimilarity();
    mergeScheduler = new ConcurrentMergeScheduler();
    codec = Codec.getDefault();
    if (codec == null) {
      throw new NullPointerException();
    }
    infoStream = InfoStream.getDefault();
    mergePolicy = new TieredMergePolicy();
    flushPolicy = new FlushByRamOrCountsPolicy();
    readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING;
    perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
    maxFullFlushMergeWaitMillis = IndexWriterConfig.DEFAULT_MAX_FULL_FLUSH_MERGE_WAIT_MILLIS;
    eventListener = IndexWriterEventListener.NO_OP_LISTENER;
  }

  /** Returns the default analyzer to use for indexing documents. */
  public Analyzer getAnalyzer() {
    return analyzer;
  }

  /**
   * Determines the amount of RAM that may be used for buffering added documents and deletions
   * before they are flushed to the Directory. Generally for faster indexing performance it's best
   * to flush by RAM usage instead of document count and use as large a RAM buffer as you can.
   *
   * 

When this is set, the writer will flush whenever buffered documents and deletions use this * much RAM. Pass in {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent triggering a flush * due to RAM usage. Note that if flushing by document count is also enabled, then the flush will * be triggered by whichever comes first. * *

The maximum RAM limit is inherently determined by the JVMs available memory. Yet, an {@link * IndexWriter} session can consume a significantly larger amount of memory than the given RAM * limit since this limit is just an indicator when to flush memory resident documents to the * Directory. Flushes are likely happen concurrently while other threads adding documents to the * writer. For application stability the available memory in the JVM should be significantly * larger than the RAM buffer used for indexing. * *

NOTE: the account of RAM usage for pending deletions is only approximate. * Specifically, if you delete by Query, Lucene currently has no way to measure the RAM usage of * individual Queries so the accounting will under-estimate and you should compensate by either * calling commit() or refresh() periodically yourself. * *

NOTE: It's not guaranteed that all memory resident documents are flushed once this * limit is exceeded. Depending on the configured {@link FlushPolicy} only a subset of the * buffered documents are flushed and therefore only parts of the RAM buffer is released. * *

The default value is {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB}. * *

Takes effect immediately, but only the next time a document is added, updated or deleted. * * @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int) * @throws IllegalArgumentException if ramBufferSize is enabled but non-positive, or it disables * ramBufferSize when maxBufferedDocs is already disabled */ public synchronized LiveIndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) { if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) { throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled"); } if (ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH) { throw new IllegalArgumentException( "at least one of ramBufferSize and maxBufferedDocs must be enabled"); } this.ramBufferSizeMB = ramBufferSizeMB; return this; } /** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */ public double getRAMBufferSizeMB() { return ramBufferSizeMB; } /** * Determines the minimal number of documents required before the buffered in-memory documents are * flushed as a new Segment. Large values generally give faster indexing. * *

When this is set, the writer will flush every maxBufferedDocs added documents. Pass in * {@link IndexWriterConfig#DISABLE_AUTO_FLUSH} to prevent triggering a flush due to number of * buffered documents. Note that if flushing by RAM usage is also enabled, then the flush will be * triggered by whichever comes first. * *

Disabled by default (writer flushes by RAM usage). * *

Takes effect immediately, but only the next time a document is added, updated or deleted. * * @see #setRAMBufferSizeMB(double) * @throws IllegalArgumentException if maxBufferedDocs is enabled but smaller than 2, or it * disables maxBufferedDocs when ramBufferSize is already disabled */ public synchronized LiveIndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) { if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) { throw new IllegalArgumentException("maxBufferedDocs must at least be 2 when enabled"); } if (maxBufferedDocs == IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB == IndexWriterConfig.DISABLE_AUTO_FLUSH) { throw new IllegalArgumentException( "at least one of ramBufferSize and maxBufferedDocs must be enabled"); } this.maxBufferedDocs = maxBufferedDocs; return this; } /** * Returns the number of buffered added documents that will trigger a flush if enabled. * * @see #setMaxBufferedDocs(int) */ public int getMaxBufferedDocs() { return maxBufferedDocs; } /** * Expert: {@link MergePolicy} is invoked whenever there are changes to the segments in the index. * Its role is to select which merges to do, if any, and return a {@link * MergePolicy.MergeSpecification} describing the merges. It also selects merges to do for * forceMerge. * *

Takes effect on subsequent merge selections. Any merges in flight or any merges already * registered by the previous {@link MergePolicy} are not affected. */ public LiveIndexWriterConfig setMergePolicy(MergePolicy mergePolicy) { if (mergePolicy == null) { throw new IllegalArgumentException("mergePolicy must not be null"); } this.mergePolicy = mergePolicy; return this; } /** * Set the merged segment warmer. See {@link IndexReaderWarmer}. * *

Takes effect on the next merge. */ public LiveIndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) { this.mergedSegmentWarmer = mergeSegmentWarmer; return this; } /** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */ public IndexReaderWarmer getMergedSegmentWarmer() { return mergedSegmentWarmer; } /** Returns the {@link OpenMode} set by {@link IndexWriterConfig#setOpenMode(OpenMode)}. */ public OpenMode getOpenMode() { return openMode; } /** * Return the compatibility version to use for this index. * * @see IndexWriterConfig#setIndexCreatedVersionMajor */ public int getIndexCreatedVersionMajor() { return createdVersionMajor; } /** * Returns the {@link IndexDeletionPolicy} specified in {@link * IndexWriterConfig#setIndexDeletionPolicy(IndexDeletionPolicy)} or the default {@link * KeepOnlyLastCommitDeletionPolicy}/ */ public IndexDeletionPolicy getIndexDeletionPolicy() { return delPolicy; } /** * Returns the {@link IndexCommit} as specified in {@link * IndexWriterConfig#setIndexCommit(IndexCommit)} or the default, {@code null} which specifies to * open the latest index commit point. */ public IndexCommit getIndexCommit() { return commit; } /** Expert: returns the {@link Similarity} implementation used by this {@link IndexWriter}. */ public Similarity getSimilarity() { return similarity; } /** * Returns the {@link MergeScheduler} that was set by {@link * IndexWriterConfig#setMergeScheduler(MergeScheduler)}. */ public MergeScheduler getMergeScheduler() { return mergeScheduler; } /** Returns the current {@link Codec}. */ public Codec getCodec() { return codec; } /** * Returns the current MergePolicy in use by this writer. * * @see IndexWriterConfig#setMergePolicy(MergePolicy) */ public MergePolicy getMergePolicy() { return mergePolicy; } /** * Returns {@code true} if {@link IndexWriter} should pool readers even if {@link * DirectoryReader#open(IndexWriter)} has not been called. */ public boolean getReaderPooling() { return readerPooling; } /** * Returns the max amount of memory each {@link DocumentsWriterPerThread} can consume until * forcefully flushed. * * @see IndexWriterConfig#setRAMPerThreadHardLimitMB(int) */ public int getRAMPerThreadHardLimitMB() { return perThreadHardLimitMB; } /** * @see IndexWriterConfig#setFlushPolicy(FlushPolicy) */ FlushPolicy getFlushPolicy() { return flushPolicy; } /** * Returns {@link InfoStream} used for debugging. * * @see IndexWriterConfig#setInfoStream(InfoStream) */ public InfoStream getInfoStream() { return infoStream; } /** * Sets if the {@link IndexWriter} should pack newly written segments in a compound file. Default * is true. * *

Use false for batch indexing with very large ram buffer settings. * *

Note: To control compound file usage during segment merges see {@link * MergePolicy#setNoCFSRatio(double)} and {@link MergePolicy#setMaxCFSSegmentSizeMB(double)}. This * setting only applies to newly created segments. */ public LiveIndexWriterConfig setUseCompoundFile(boolean useCompoundFile) { this.useCompoundFile = useCompoundFile; return this; } /** * Returns true iff the {@link IndexWriter} packs newly written segments in a * compound file. Default is true. */ public boolean getUseCompoundFile() { return useCompoundFile; } /** * Returns true if {@link IndexWriter#close()} should first commit before closing. */ public boolean getCommitOnClose() { return commitOnClose; } /** Get the index-time {@link Sort} order, applied to all (flushed and merged) segments. */ public Sort getIndexSort() { return indexSort; } /** Returns the field names involved in the index sort */ public Set getIndexSortFields() { return indexSortFields; } /** * Returns a comparator for sorting leaf readers. If not {@code null}, this comparator is used to * sort leaf readers within {@code DirectoryReader} opened from the {@code IndexWriter} of this * configuration. * * @return a comparator for sorting leaf readers */ public Comparator getLeafSorter() { return leafSorter; } /** * Expert: Returns if indexing threads check for pending flushes on update in order to help our * flushing indexing buffers to disk * * @lucene.experimental */ public boolean isCheckPendingFlushOnUpdate() { return checkPendingFlushOnUpdate; } /** * Expert: sets if indexing threads check for pending flushes on update in order to help our * flushing indexing buffers to disk. As a consequence, threads calling {@link * DirectoryReader#openIfChanged(DirectoryReader, IndexWriter)} or {@link IndexWriter#flush()} * will be the only thread writing segments to disk unless flushes are falling behind. If indexing * is stalled due to too many pending flushes indexing threads will help our writing pending * segment flushes to disk. * * @lucene.experimental */ public LiveIndexWriterConfig setCheckPendingFlushUpdate(boolean checkPendingFlushOnUpdate) { this.checkPendingFlushOnUpdate = checkPendingFlushOnUpdate; return this; } /** * Returns the soft deletes field or null if soft-deletes are disabled. See {@link * IndexWriterConfig#setSoftDeletesField(String)} for details. */ public String getSoftDeletesField() { return softDeletesField; } /** * Expert: return the amount of time to wait for merges returned by by * MergePolicy.findFullFlushMerges(...). If this time is reached, we proceed with the commit based * on segments merged up to that point. The merges are not cancelled, and may still run to * completion independent of the commit. */ public long getMaxFullFlushMergeWaitMillis() { return maxFullFlushMergeWaitMillis; } /** Returns the IndexWriterEventListener callback that tracks the key IndexWriter operations. */ public IndexWriterEventListener getIndexWriterEventListener() { return eventListener; } /** Returns the parent document field name if configured. */ public String getParentField() { return parentField; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("analyzer=") .append(analyzer == null ? "null" : analyzer.getClass().getName()) .append("\n"); sb.append("ramBufferSizeMB=").append(getRAMBufferSizeMB()).append("\n"); sb.append("maxBufferedDocs=").append(getMaxBufferedDocs()).append("\n"); sb.append("mergedSegmentWarmer=").append(getMergedSegmentWarmer()).append("\n"); sb.append("delPolicy=").append(getIndexDeletionPolicy().getClass().getName()).append("\n"); IndexCommit commit = getIndexCommit(); sb.append("commit=").append(commit == null ? "null" : commit).append("\n"); sb.append("openMode=").append(getOpenMode()).append("\n"); sb.append("similarity=").append(getSimilarity().getClass().getName()).append("\n"); sb.append("mergeScheduler=").append(getMergeScheduler()).append("\n"); sb.append("codec=").append(getCodec()).append("\n"); sb.append("infoStream=").append(getInfoStream().getClass().getName()).append("\n"); sb.append("mergePolicy=").append(getMergePolicy()).append("\n"); sb.append("readerPooling=").append(getReaderPooling()).append("\n"); sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n"); sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n"); sb.append("commitOnClose=").append(getCommitOnClose()).append("\n"); sb.append("indexSort=").append(getIndexSort()).append("\n"); sb.append("checkPendingFlushOnUpdate=").append(isCheckPendingFlushOnUpdate()).append("\n"); sb.append("softDeletesField=").append(getSoftDeletesField()).append("\n"); sb.append("maxFullFlushMergeWaitMillis=").append(getMaxFullFlushMergeWaitMillis()).append("\n"); sb.append("leafSorter=").append(getLeafSorter()).append("\n"); sb.append("eventListener=").append(getIndexWriterEventListener()).append("\n"); sb.append("parentField=").append(getParentField()).append("\n"); return sb.toString(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy