All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.index.DocumentsWriterPerThreadPool Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.index;

import java.io.Closeable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import java.util.function.Supplier;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.ThreadInterruptedException;

/**
 * {@link DocumentsWriterPerThreadPool} controls {@link DocumentsWriterPerThread} instances and
 * their thread assignments during indexing. Each {@link DocumentsWriterPerThread} is once a
 * obtained from the pool exclusively used for indexing a single document or list of documents by
 * the obtaining thread. Each indexing thread must obtain such a {@link DocumentsWriterPerThread} to
 * make progress. Depending on the {@link DocumentsWriterPerThreadPool} implementation {@link
 * DocumentsWriterPerThread} assignments might differ from document to document.
 *
 * 

Once a {@link DocumentsWriterPerThread} is selected for flush the {@link * DocumentsWriterPerThread} will be checked out of the thread pool and won't be reused for * indexing. See {@link #checkout(DocumentsWriterPerThread)}. */ final class DocumentsWriterPerThreadPool implements Iterable, Closeable { private final Set dwpts = Collections.newSetFromMap(new IdentityHashMap<>()); private final LockableConcurrentApproximatePriorityQueue freeList = new LockableConcurrentApproximatePriorityQueue<>(); private final Supplier dwptFactory; private int takenWriterPermits = 0; private volatile boolean closed; DocumentsWriterPerThreadPool(Supplier dwptFactory) { this.dwptFactory = dwptFactory; } /** Returns the active number of {@link DocumentsWriterPerThread} instances. */ synchronized int size() { return dwpts.size(); } synchronized void lockNewWriters() { // this is similar to a semaphore - we need to acquire all permits ie. takenWriterPermits must // be == 0 // any call to lockNewWriters() must be followed by unlockNewWriters() otherwise we will // deadlock at some // point assert takenWriterPermits >= 0; takenWriterPermits++; } synchronized void unlockNewWriters() { assert takenWriterPermits > 0; takenWriterPermits--; if (takenWriterPermits == 0) { notifyAll(); } } /** * Returns a new already locked {@link DocumentsWriterPerThread} * * @return a new {@link DocumentsWriterPerThread} */ private synchronized DocumentsWriterPerThread newWriter() { assert takenWriterPermits >= 0; while (takenWriterPermits > 0) { // we can't create new DWPTs while not all permits are available try { wait(); } catch (InterruptedException ie) { throw new ThreadInterruptedException(ie); } } // we must check if we are closed since this might happen while we are waiting for the writer // permit // and if we miss that we might release a new DWPT even though the pool is closed. Yet, that // wouldn't be the // end of the world it's violating the contract that we don't release any new DWPT after this // pool is closed ensureOpen(); DocumentsWriterPerThread dwpt = dwptFactory.get(); dwpt.lock(); // lock so nobody else will get this DWPT dwpts.add(dwpt); return dwpt; } // TODO: maybe we should try to do load leveling here: we want roughly even numbers // of items (docs, deletes, DV updates) to most take advantage of concurrency while flushing /** * This method is used by DocumentsWriter/FlushControl to obtain a DWPT to do an indexing * operation (add/updateDocument). */ DocumentsWriterPerThread getAndLock() { ensureOpen(); DocumentsWriterPerThread dwpt = freeList.lockAndPoll(); if (dwpt != null) { return dwpt; } // newWriter() adds the DWPT to the `dwpts` set as a side-effect. However it is not added to // `freeList` at this point, it will be added later on once DocumentsWriter has indexed a // document into this DWPT and then gives it back to the pool by calling // #marksAsFreeAndUnlock. return newWriter(); } private void ensureOpen() { if (closed) { throw new AlreadyClosedException("DWPTPool is already closed"); } } private synchronized boolean contains(DocumentsWriterPerThread state) { return dwpts.contains(state); } void marksAsFreeAndUnlock(DocumentsWriterPerThread state) { final long ramBytesUsed = state.ramBytesUsed(); assert state.isFlushPending() == false && state.isAborted() == false && state.isQueueAdvanced() == false : "DWPT has pending flush: " + state.isFlushPending() + " aborted=" + state.isAborted() + " queueAdvanced=" + state.isQueueAdvanced(); assert contains(state) : "we tried to add a DWPT back to the pool but the pool doesn't know about this DWPT"; freeList.addAndUnlock(state, ramBytesUsed); } @Override public synchronized Iterator iterator() { // copy on read - this is a quick op since num states is low return List.copyOf(dwpts).iterator(); } /** * Filters all DWPTs the given predicate applies to and that can be checked out of the pool via * {@link #checkout(DocumentsWriterPerThread)}. All DWPTs returned from this method are already * locked and {@link #isRegistered(DocumentsWriterPerThread)} will return true for * all returned DWPTs */ List filterAndLock(Predicate predicate) { List list = new ArrayList<>(); for (DocumentsWriterPerThread perThread : this) { if (predicate.test(perThread)) { perThread.lock(); if (isRegistered(perThread)) { list.add(perThread); } else { // somebody else has taken this DWPT out of the pool. // unlock and let it go perThread.unlock(); } } } return Collections.unmodifiableList(list); } /** * Removes the given DWPT from the pool unless it's already been removed before. * * @return true iff the given DWPT has been removed. Otherwise false */ synchronized boolean checkout(DocumentsWriterPerThread perThread) { // The DWPT must be held by the current thread. This guarantees that concurrent calls to // #getAndLock cannot pull this DWPT out of the pool since #getAndLock does a DWPT#tryLock to // check if the DWPT is available. assert perThread.isHeldByCurrentThread(); if (dwpts.remove(perThread)) { freeList.remove(perThread); } else { assert freeList.contains(perThread) == false; return false; } return true; } /** Returns true if this DWPT is still part of the pool */ synchronized boolean isRegistered(DocumentsWriterPerThread perThread) { return dwpts.contains(perThread); } @Override public synchronized void close() { this.closed = true; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy