org.apache.solr.core.IndexDeletionPolicyWrapper Maven / Gradle / Ivy
Show all versions of solr-core Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BinaryOperator;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.solr.core.snapshots.SolrSnapshotMetaDataManager;
import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A wrapper for an IndexDeletionPolicy instance.
*
* Provides features for looking up IndexCommit given a version. Allows reserving index commit
* points for certain amounts of time to support features such as index replication or snapshotting
* directly out of a live index directory.
*
*
NOTE: The {@link #clone()} method returns this
in order to make this
* {@link IndexDeletionPolicy} instance trackable across {@link IndexWriter} instantiations. This is
* correct because each core has its own {@link IndexDeletionPolicy} and never has more than one
* open {@link IndexWriter}.
*
* @see org.apache.lucene.index.IndexDeletionPolicy
*/
public final class IndexDeletionPolicyWrapper extends IndexDeletionPolicy {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final IndexDeletionPolicy deletionPolicy;
private final SolrSnapshotMetaDataManager snapshotMgr;
/**
* The set of all known commits after the last completed call to {@link #onInit} or
* {@link #onCommit} on our {@link #getWrappedDeletionPolicy()}.
*
*
This map is atomically replaced by {@link #updateKnownCommitPoints}. The keys are the {@link
* IndexCommit#getGeneration} of each commit
*
* @see #getAndSaveCommitPoint
* @see #getCommits
* @see #updateKnownCommitPoints
*/
private volatile Map knownCommits = new ConcurrentHashMap<>();
/**
* The most recent commit included in call to {@link #onInit} or {@link #onCommit} before
* delegating to our {@link #getWrappedDeletionPolicy()}.
*
* NOTE: This may be null if there is not yet a single commit to our index.
*
*
This commit is implicitly protected from deletion in {@link IndexCommitWrapper#delete}
*
* @see #getLatestCommit
* @see #updateLatestCommit
*/
private volatile IndexCommit latestCommit;
/**
* The set of all commit generations that have been reserved for some amount of time.
*
*
The keys of the {@link IndexCommit#getGeneration} of a commit, the values are the {@link
* System#nanoTime} that the commit should be reserved until.
*
* @see #setReserveDuration
* @see #cleanReserves
*/
private final Map reserves = new ConcurrentHashMap<>();
/**
* The set of all commit generations that have been saved until explicitly released
*
* The keys of the {@link IndexCommit#getGeneration} of a commit, the values are a reference
* count of the number of callers who have "saved" this commit. {@link #releaseCommitPoint}
* automatically removes mappings once the ref count reaches 0.
*
* @see #getAndSaveLatestCommit
* @see #saveCommitPoint
* @see #releaseCommitPoint
*/
private final Map savedCommits = new ConcurrentHashMap<>();
public IndexDeletionPolicyWrapper(
IndexDeletionPolicy deletionPolicy, SolrSnapshotMetaDataManager snapshotMgr) {
this.deletionPolicy = deletionPolicy;
this.snapshotMgr = snapshotMgr;
}
/**
* Returns the most recent commit point.
*
* NOTE: This method makes no guarantee that the commit returned still exists as the
* moment this method completes. Callers are encouraged to use {@link #getAndSaveLatestCommit}
* instead.
*
* @return the most recent commit point, or null if there have not been any commits
* @see #getAndSaveLatestCommit
*/
public IndexCommit getLatestCommit() {
return latestCommit;
}
/**
* Atomically Saves (via reference counting) & Returns the most recent commit point.
*
*
If the return value is non-null, then the caller MUST call {@link
* #releaseCommitPoint} when finished using it in order to decrement the reference count, or the
* commit will be preserved in the Directory forever.
*
* @return the most recent commit point, or null if there have not been any commits
* @see #saveCommitPoint
* @see #releaseCommitPoint
*/
public synchronized IndexCommit getAndSaveLatestCommit() {
final IndexCommit commit = getLatestCommit();
if (null != commit) {
saveCommitPoint(commit.getGeneration());
}
return commit;
}
/**
* Atomically Saves (via reference counting) & Returns the specified commit if available.
*
*
If the return value is non-null, then the caller MUST call {@link
* #releaseCommitPoint} when finished using it in order to decrement the reference count, or the
* commit will be preserved in the Directory forever.
*
* @return the commit point with the specified generation, or null if not available
* @see #saveCommitPoint
* @see #releaseCommitPoint
*/
public synchronized IndexCommit getAndSaveCommitPoint(Long generation) {
if (null == generation) {
throw new NullPointerException("generation to get and save must not be null");
}
final IndexCommit commit = knownCommits.get(generation);
if ((null != commit && false != commit.isDeleted())
|| (null == commit && null != latestCommit && generation < latestCommit.getGeneration())) {
throw new IllegalStateException(
"Specified index generation is too old to be saved: " + generation);
}
final AtomicInteger refCount =
savedCommits.computeIfAbsent(
generation,
s -> {
return new AtomicInteger();
});
final int currentCount = refCount.incrementAndGet();
log.debug("Saving generation={}, refCount={}", generation, currentCount);
return commit;
}
public IndexDeletionPolicy getWrappedDeletionPolicy() {
return deletionPolicy;
}
/**
* Set the duration for which commit point is to be reserved by the deletion policy.
*
*
NOTE: This method does not make any guarantees that the specified index generation
* exists, or that the specified generation has not already ben deleted. The only guarantee is
* that if the specified generation exists now, or is created at some point in the
* future, then it will be reserved for at least the specified reserveTime
.
*
* @param indexGen gen of the commit point to be reserved
* @param reserveTime duration in milliseconds (relative to 'now') for which the commit point is
* to be reserved
*/
public void setReserveDuration(Long indexGen, long reserveTime) {
// since 'reserves' is a concurrent HashMap, we don't need to synchronize this method as long as
// all operations on 'reserves' are done atomically.
//
// Here we'll use Map.merge to ensure that we atomically replace any existing timestamp if
// and only if our new reservation timestamp is larger.
final long reserveAsNanoTime =
System.nanoTime() + TimeUnit.NANOSECONDS.convert(reserveTime, TimeUnit.MILLISECONDS);
reserves.merge(indexGen, reserveAsNanoTime, BinaryOperator.maxBy(Comparator.naturalOrder()));
}
private void cleanReserves() {
final long currentNanoTime = System.nanoTime();
// use removeIf to ensure we're removing "old" entries atomically
reserves.entrySet().removeIf(e -> e.getValue() < currentNanoTime);
}
private List wrap(List list) {
List result = new ArrayList<>();
for (IndexCommit indexCommit : list) result.add(new IndexCommitWrapper(indexCommit));
return result;
}
/**
* Permanently prevent this commit point from being deleted (if it has not already) using a
* reference count.
*
* NOTE: Callers MUST call {@link #releaseCommitPoint} when finished using it
* in order to decrement the reference count, or the commit will be preserved in the Directory
* forever.
*
* @param generation the generation of the IndexCommit to save until released
* @see #getAndSaveLatestCommit
* @see #getAndSaveCommitPoint
* @see #releaseCommitPoint
* @throws IllegalStateException if generation is already too old to be saved
*/
public synchronized void saveCommitPoint(Long generation) {
getAndSaveCommitPoint(generation); // will handle the logic for us, just ignore the results
}
/**
* Release a previously saved commit point.
*
*
This is a convenience wrapper around {@link #releaseCommitPoint(Long)} that will ignore null
* input.
*/
public synchronized void releaseCommitPoint(IndexCommit commit) {
if (null != commit) {
releaseCommitPoint(commit.getGeneration());
}
}
/**
* Release a previously saved commit point.
*
*
This method does not enforce that the specified generation has previously been saved, or
* even that it's 'non-null'. But if both are true then it will decrement the reference count for
* the specified generation.
*/
public synchronized void releaseCommitPoint(Long generation) {
if (null == generation) {
return;
}
final AtomicInteger refCount = savedCommits.get(generation);
if (null != refCount) { // shouldn't happen if balanced save/release calls in callers
final int currentCount = refCount.decrementAndGet();
log.debug("Released generation={}, refCount={}", generation, currentCount);
if (currentCount <= 0) {
savedCommits.remove(generation); // counter no longer needed;
}
}
}
/**
* Internal use for Lucene... do not explicitly call.
*
*
This Impl passes the list of commits to the delegate Policy AFTER wrapping each
* commit in a proxy class that only proxies {@link IndexCommit#delete} if they are not already
* saved.
*/
@Override
public void onInit(List list) throws IOException {
List wrapperList = wrap(list);
updateLatestCommit(wrapperList);
deletionPolicy.onInit(wrapperList);
updateKnownCommitPoints(wrapperList);
cleanReserves();
}
/**
* Internal use for Lucene... do not explicitly call.
*
* This Impl passes the list of commits to the delegate Policy AFTER wrapping each
* commit in a proxy class that only proxies {@link IndexCommit#delete} if they are not already
* saved.
*/
@Override
public void onCommit(List list) throws IOException {
List wrapperList = wrap(list);
updateLatestCommit(wrapperList);
deletionPolicy.onCommit(wrapperList);
updateKnownCommitPoints(wrapperList);
cleanReserves();
}
/**
* Wrapper class that synchronizes the {@link IndexCommit#delete} calls and only passes them to
* the wrapped commit if they should not be saved or reserved.
*/
private class IndexCommitWrapper extends IndexCommit {
final IndexCommit delegate;
IndexCommitWrapper(IndexCommit delegate) {
this.delegate = delegate;
}
@Override
public String getSegmentsFileName() {
return delegate.getSegmentsFileName();
}
@Override
public Collection getFileNames() throws IOException {
return delegate.getFileNames();
}
@Override
public Directory getDirectory() {
return delegate.getDirectory();
}
@Override
public void delete() {
// Box it now to prevent multiple autoboxing when doing multiple map lookups
final Long gen = delegate.getGeneration();
// synchronize on the policy wrapper so that we don't delegate the delete call
// concurrently with another thread trying to save this commit
synchronized (IndexDeletionPolicyWrapper.this) {
if ((System.nanoTime() < reserves.getOrDefault(gen, 0L))
|| savedCommits.containsKey(gen)
|| snapshotMgr.isSnapshotted(gen)
|| (null != latestCommit && gen.longValue() == latestCommit.getGeneration())) {
return; // skip deletion
}
log.debug("Deleting generation={}", gen);
delegate.delete(); // delegate deletion
}
}
@Override
public int getSegmentCount() {
return delegate.getSegmentCount();
}
@Override
public boolean equals(Object o) {
return delegate.equals(o);
}
@Override
public int hashCode() {
return delegate.hashCode();
}
@Override
public long getGeneration() {
return delegate.getGeneration();
}
@Override
public boolean isDeleted() {
synchronized (IndexDeletionPolicyWrapper.this) {
return delegate.isDeleted();
}
}
@Override
public Map getUserData() throws IOException {
return delegate.getUserData();
}
}
/**
* Returns the commit with the specified generation if it is known.
*
* NOTE: This method makes no guarantee that the commit returned still exists as the
* moment this method completes. Callers are encouraged to use {@link #getAndSaveLatestCommit}
* instead.
*
* @param gen the generation of the commit point requested
* @return a commit point corresponding to the given version if available, or null if not yet
* created or already deleted
* @deprecated use {@link #getAndSaveCommitPoint} instead
*/
@Deprecated
public IndexCommit getCommitPoint(Long gen) {
return knownCommits.get(gen);
}
/**
* Returns a Map of all currently known commits, keyed by their generation.
*
*
NOTE: This map instance may change between commits and commit points may be deleted.
* This API is intended for "informational purposes" only, to provide an "at the moment" view of
* the current list of known commits. Callers that need to ensure commits exist for an extended
* period must wrap this call and all subsequent usage of the results in a synchronization block.
*
* @return a Map of generation to commit points
*/
public Map getCommits() {
return Collections.unmodifiableMap(knownCommits);
}
/**
* Updates {@link #latestCommit}.
*
* This is handled special, and not included in {@link #updateKnownCommitPoints}, because we
* need to ensure this happens before delegating calls to {@link #onInit} or {@link
* #onCommit} to our inner Policy. Doing this ensures that we can always protect {@link
* #latestCommit} from being deleted.
*
*
If we did not do this, and waited to update latestCommit
in
* updateKnownCommitPoints()
then we would need to wrap synchronization completely around
* the (delegated) onInit()
and onCommit()
calls, to ensure there was no
* window of time when {@link #getAndSaveLatestCommit} might return the "old" latest commit, after
* our delegate Policy had already deleted it.
*
*
(Since Saving/Reserving (other) commits is handled indirectly ("by reference") via the
* generation callers can still safely (try) to reserve "old" commits using an explicit generation
* since {@link IndexCommitWrapper#delete} is synchronized on this
)
*
* @see #latestCommit
* @see #updateKnownCommitPoints
*/
private synchronized void updateLatestCommit(final List list) {
// NOTE: There's a hypothetical, not necessarily possible/plausible, situation that
// could lead to this combination of updateLatestCommit + updateKnownCommitPoints not
// being as thread safe as completely synchronizing in onInit/onCommit...
// - knownCommits==(1, 2, 3, 4), latestCommit==4
// - onCommit(1, 2, 3, 4, 5, 6, 7) - we immediately update latestCommit=7
// - before knownCommits is updated, some client calls getAndSaveCommitPoint(6)
// - call fails "too old to be saved" even though it's in flight
// (this assumes some future caller/use-case that doesn't currently exist)
//
// The upside of this current approach, and not completely synchronizing onInit/onCommit
// is that we have no control over what delegate is used, or how long those calls might take.
//
// If the hypothetical situation above ever becomes problematic, then an alternative approach
// might be to *add* to the Set/Map of all known commits *before* delegating, then *remove*
// everything except the new (non-deleted) commits *after* delegating.
assert null != list;
if (list.isEmpty()) {
return;
}
final IndexCommitWrapper newLast = list.get(list.size() - 1);
assert !newLast.isDeleted()
: "Code flaw: Last commit already deleted, call this method before delegating onCommit/onInit";
latestCommit = newLast.delegate;
}
/**
* Updates the state of all "current" commits.
*
* This method is safe to call after delegating to ou inner IndexDeletionPolicy
*
(w/o synchronizing the delegate calls) because even if the delegate decides to {@link
* IndexCommit#delete} a commit that a concurrent thread may wish to reserve/save, that {@link
* IndexCommitWrapper} will ensure that call is synchronized.
*
* @see #updateLatestCommit
*/
private synchronized void updateKnownCommitPoints(final List list) {
assert null != list;
assert (list.isEmpty() || null != latestCommit) : "Code flaw: How is latestCommit not set yet?";
assert (null == latestCommit || !latestCommit.isDeleted())
: "Code flaw: How did the latestCommit get set but deleted?";
assert (list.isEmpty() || Objects.equals(latestCommit, list.get(list.size() - 1).delegate))
: "Code flaw, updateLatestCommit() should have already been called";
final Map map = new ConcurrentHashMap<>();
for (IndexCommitWrapper wrapper : list) {
if (!wrapper.isDeleted()) {
map.put(wrapper.delegate.getGeneration(), wrapper.delegate);
}
}
knownCommits = map;
}
/**
* Helper method for unpacking the timestamp info from the user data
*
* @see SolrIndexWriter#COMMIT_TIME_MSEC_KEY
* @see IndexCommit#getUserData
*/
public static long getCommitTimestamp(IndexCommit commit) throws IOException {
final Map commitData = commit.getUserData();
String commitTime = commitData.get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
if (commitTime != null) {
return Long.parseLong(commitTime);
} else {
return 0;
}
}
}