org.apache.lucene.index.DocumentsWriterPerThreadPool Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.util.ThreadInterruptedException;
/**
* {@link DocumentsWriterPerThreadPool} controls {@link ThreadState} instances
* and their thread assignments during indexing. Each {@link ThreadState} holds
* a reference to a {@link DocumentsWriterPerThread} that is once a
* {@link ThreadState} is obtained from the pool exclusively used for indexing a
* single document by the obtaining thread. Each indexing thread must obtain
* such a {@link ThreadState} to make progress. Depending on the
* {@link DocumentsWriterPerThreadPool} implementation {@link ThreadState}
* assignments might differ from document to document.
*
* Once a {@link DocumentsWriterPerThread} is selected for flush the thread pool
* is reusing the flushing {@link DocumentsWriterPerThread}s ThreadState with a
* new {@link DocumentsWriterPerThread} instance.
*
*/
final class DocumentsWriterPerThreadPool {
/**
* {@link ThreadState} references and guards a
* {@link DocumentsWriterPerThread} instance that is used during indexing to
* build a in-memory index segment. {@link ThreadState} also holds all flush
* related per-thread data controlled by {@link DocumentsWriterFlushControl}.
*
* A {@link ThreadState}, its methods and members should only accessed by one
* thread a time. Users must acquire the lock via {@link ThreadState#lock()}
* and release the lock in a finally block via {@link ThreadState#unlock()}
* before accessing the state.
*/
@SuppressWarnings("serial")
final static class ThreadState extends ReentrantLock {
DocumentsWriterPerThread dwpt;
// TODO this should really be part of DocumentsWriterFlushControl
// write access guarded by DocumentsWriterFlushControl
volatile boolean flushPending = false;
// TODO this should really be part of DocumentsWriterFlushControl
// write access guarded by DocumentsWriterFlushControl
long bytesUsed = 0;
// set by DocumentsWriter after each indexing op finishes
volatile long lastSeqNo;
ThreadState(DocumentsWriterPerThread dpwt) {
this.dwpt = dpwt;
}
private void reset() {
assert this.isHeldByCurrentThread();
this.dwpt = null;
this.bytesUsed = 0;
this.flushPending = false;
}
boolean isInitialized() {
assert this.isHeldByCurrentThread();
return dwpt != null;
}
/**
* Returns the number of currently active bytes in this ThreadState's
* {@link DocumentsWriterPerThread}
*/
public long getBytesUsedPerThread() {
assert this.isHeldByCurrentThread();
// public for FlushPolicy
return bytesUsed;
}
/**
* Returns this {@link ThreadState}s {@link DocumentsWriterPerThread}
*/
public DocumentsWriterPerThread getDocumentsWriterPerThread() {
assert this.isHeldByCurrentThread();
// public for FlushPolicy
return dwpt;
}
/**
* Returns true
iff this {@link ThreadState} is marked as flush
* pending otherwise false
*/
public boolean isFlushPending() {
return flushPending;
}
}
private final List threadStates = new ArrayList<>();
private final List freeList = new ArrayList<>();
private boolean aborted;
/**
* Returns the active number of {@link ThreadState} instances.
*/
synchronized int getActiveThreadStateCount() {
return threadStates.size();
}
synchronized void setAbort() {
aborted = true;
}
synchronized void clearAbort() {
aborted = false;
notifyAll();
}
/**
* Returns a new {@link ThreadState} iff any new state is available otherwise
* null
.
*
* NOTE: the returned {@link ThreadState} is already locked iff non-
* null
.
*
* @return a new {@link ThreadState} iff any new state is available otherwise
* null
*/
private synchronized ThreadState newThreadState() {
while (aborted) {
try {
wait();
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
}
ThreadState threadState = new ThreadState(null);
threadState.lock(); // lock so nobody else will get this ThreadState
threadStates.add(threadState);
return threadState;
}
DocumentsWriterPerThread reset(ThreadState threadState) {
assert threadState.isHeldByCurrentThread();
final DocumentsWriterPerThread dwpt = threadState.dwpt;
threadState.reset();
return dwpt;
}
void recycle(DocumentsWriterPerThread dwpt) {
// don't recycle DWPT by default
}
// TODO: maybe we should try to do load leveling here: we want roughly even numbers
// of items (docs, deletes, DV updates) to most take advantage of concurrency while flushing
/** This method is used by DocumentsWriter/FlushControl to obtain a ThreadState to do an indexing operation (add/updateDocument). */
ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter) {
ThreadState threadState = null;
synchronized (this) {
if (freeList.isEmpty()) {
// ThreadState is already locked before return by this method:
return newThreadState();
} else {
// Important that we are LIFO here! This way if number of concurrent indexing threads was once high, but has now reduced, we only use a
// limited number of thread states:
threadState = freeList.remove(freeList.size()-1);
if (threadState.dwpt == null) {
// This thread-state is not initialized, e.g. it
// was just flushed. See if we can instead find
// another free thread state that already has docs
// indexed. This way if incoming thread concurrency
// has decreased, we don't leave docs
// indefinitely buffered, tying up RAM. This
// will instead get those thread states flushed,
// freeing up RAM for larger segment flushes:
for(int i=0;iith active {@link ThreadState} where i is the
* given ord.
*
* @param ord
* the ordinal of the {@link ThreadState}
* @return the ith active {@link ThreadState} where i is the
* given ord.
*/
synchronized ThreadState getThreadState(int ord) {
return threadStates.get(ord);
}
// TODO: merge this with getActiveThreadStateCount: they are the same!
synchronized int getMaxThreadStates() {
return threadStates.size();
}
}