org.apache.cassandra.index.SecondaryIndexManager Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
There is a newer version: 5.0.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.index;

import java.lang.reflect.Constructor;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.primitives.Longs;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.SettableFuture;

import org.apache.cassandra.utils.Throwables;
import org.apache.commons.lang3.StringUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.concurrent.JMXEnabledThreadPoolExecutor;
import org.apache.cassandra.concurrent.NamedThreadFactory;
import org.apache.cassandra.concurrent.Stage;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.cql3.statements.schema.IndexTarget;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.compaction.CompactionManager;
import org.apache.cassandra.db.filter.RowFilter;
import org.apache.cassandra.db.lifecycle.SSTableSet;
import org.apache.cassandra.db.lifecycle.View;
import org.apache.cassandra.db.marshal.ValueAccessor;
import org.apache.cassandra.db.partitions.*;
import org.apache.cassandra.db.rows.*;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.index.Index.IndexBuildingSupport;
import org.apache.cassandra.index.internal.CassandraIndex;
import org.apache.cassandra.index.transactions.*;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.notifications.INotification;
import org.apache.cassandra.notifications.INotificationConsumer;
import org.apache.cassandra.notifications.SSTableAddedNotification;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.IndexMetadata;
import org.apache.cassandra.schema.Indexes;
import org.apache.cassandra.service.pager.SinglePartitionPager;
import org.apache.cassandra.tracing.Tracing;
import org.apache.cassandra.transport.ProtocolVersion;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.JVMStabilityInspector;
import org.apache.cassandra.utils.concurrent.Refs;

import static org.apache.cassandra.utils.ExecutorUtils.awaitTermination;
import static org.apache.cassandra.utils.ExecutorUtils.shutdown;

/**
 * Handles the core maintenance functionality associated with indexes: adding/removing them to or from
 * a table, (re)building during bootstrap or other streaming operations, flushing, reloading metadata
 * and so on.
 * 


 * The Index interface defines a number of methods which return {@code Callable}. These are primarily the
 * management tasks for an index implementation. Most of them are currently executed in a blocking
 * fashion via submission to SIM's blockingExecutor. This provides the desired behaviour in pretty
 * much all cases, as tasks like flushing an index needs to be executed synchronously to avoid potentially
 * deadlocking on the FlushWriter or PostFlusher. Several of these {@code Callable} returning methods on Index could
 * then be defined with as void and called directly from SIM (rather than being run via the executor service).
 * Separating the task defintion from execution gives us greater flexibility though, so that in future, for example,
 * if the flush process allows it we leave open the possibility of executing more of these tasks asynchronously.
 * 


 * The primary exception to the above is the Callable returned from Index#addIndexedColumn. This may
 * involve a significant effort, building a new index over any existing data. We perform this task asynchronously;
 * as it is called as part of a schema update, which we do not want to block for a long period. Building non-custom
 * indexes is performed on the CompactionManager.
 * 


 * This class also provides instances of processors which listen to updates to the base table and forward to
 * registered Indexes the info required to keep those indexes up to date.
 * There are two variants of these processors, each with a factory method provided by SIM:
 * IndexTransaction: deals with updates generated on the regular write path.
 * CleanupTransaction: used when partitions are modified during compaction or cleanup operations.
 * Further details on their usage and lifecycles can be found in the interface definitions below.
 * 


 * The bestIndexFor method is used at query time to identify the most selective index of those able
 * to satisfy any search predicates defined by a ReadCommand's RowFilter. It returns a thin IndexAccessor object
 * which enables the ReadCommand to access the appropriate functions of the Index at various stages in its lifecycle.
 * e.g. the getEstimatedResultRows is required when StorageProxy calculates the initial concurrency factor for
 * distributing requests to replicas, whereas a Searcher instance is needed when the ReadCommand is executed locally on
 * a target replica.
 * 


 * Finally, this class provides a clear and safe lifecycle to manage index builds, either full rebuilds via
 * {@link this#rebuildIndexesBlocking(Set)} or builds of new sstables
 * added via {@link org.apache.cassandra.notifications.SSTableAddedNotification}s, guaranteeing
 * the following:
 * 
 * The initialization task and any subsequent successful (re)build mark the index as built.
 * If any (re)build operation fails, the index is not marked as built, and only another full rebuild can mark the
 * index as built.
 * Full rebuilds cannot be run concurrently with other full or sstable (re)builds.
 * SSTable builds can always be run concurrently with any other builds.
 * 
 */
public class SecondaryIndexManager implements IndexRegistry, INotificationConsumer
{
    private static final Logger logger = LoggerFactory.getLogger(SecondaryIndexManager.class);

    // default page size (in rows) when rebuilding the index for a whole partition
    public static final int DEFAULT_PAGE_SIZE = 10000;

    /**
     * All registered indexes.
     */
    private final Map indexes = Maps.newConcurrentMap();

    /**
     * The indexes that had a build failure.
     */
    private final Set needsFullRebuild = Sets.newConcurrentHashSet();

    /**
     * The indexes that are available for querying.
     */
    private final Set queryableIndexes = Sets.newConcurrentHashSet();
    
    /**
     * The indexes that are available for writing.
     */
    private final Map writableIndexes = Maps.newConcurrentMap();

    /**
     * The count of pending index builds for each index.
     */
    private final Map inProgressBuilds = Maps.newConcurrentMap();

    // executes tasks returned by Indexer#addIndexColumn which may require index(es) to be (re)built
    private static final ListeningExecutorService asyncExecutor = MoreExecutors.listeningDecorator(
    new JMXEnabledThreadPoolExecutor(1,
                                     Stage.KEEP_ALIVE_SECONDS,
                                     TimeUnit.SECONDS,
                                     new LinkedBlockingQueue<>(),
                                     new NamedThreadFactory("SecondaryIndexManagement"),
                                     "internal"));

    // executes all blocking tasks produced by Indexers e.g. getFlushTask, getMetadataReloadTask etc
    private static final ListeningExecutorService blockingExecutor = MoreExecutors.newDirectExecutorService();

    /**
     * The underlying column family containing the source data for these indexes
     */
    public final ColumnFamilyStore baseCfs;
    private final Keyspace keyspace;

    public SecondaryIndexManager(ColumnFamilyStore baseCfs)
    {
        this.baseCfs = baseCfs;
        this.keyspace = baseCfs.keyspace;
        baseCfs.getTracker().subscribe(this);
    }

    /**
     * Drops and adds new indexes associated with the underlying CF
     */
    public void reload()
    {
        // figure out what needs to be added and dropped.
        Indexes tableIndexes = baseCfs.metadata().indexes;
        indexes.keySet()
               .stream()
               .filter(indexName -> !tableIndexes.has(indexName))
               .forEach(this::removeIndex);

        // we call add for every index definition in the collection as
        // some may not have been created here yet, only added to schema
        for (IndexMetadata tableIndex : tableIndexes)
            addIndex(tableIndex, false);
    }

    private Future reloadIndex(IndexMetadata indexDef)
    {
        Index index = indexes.get(indexDef.name);
        Callable reloadTask = index.getMetadataReloadTask(indexDef);
        return reloadTask == null
               ? Futures.immediateFuture(null)
               : blockingExecutor.submit(reloadTask);
    }

    @SuppressWarnings("unchecked")
    private synchronized Future createIndex(IndexMetadata indexDef, boolean isNewCF)
    {
        final Index index = createInstance(indexDef);
        index.register(this);
        if (writableIndexes.put(index.getIndexMetadata().name, index) == null)
            logger.info("Index [{}] registered and writable.", index.getIndexMetadata().name);

        markIndexesBuilding(ImmutableSet.of(index), true, isNewCF);

        Callable initialBuildTask = null;
        // if the index didn't register itself, we can probably assume that no initialization needs to happen
        if (indexes.containsKey(indexDef.name))
        {
            try
            {
                initialBuildTask = index.getInitializationTask();
            }
            catch (Throwable t)
            {
                logAndMarkIndexesFailed(Collections.singleton(index), t, true);
                throw t;
            }
        }

        // if there's no initialization, just mark as built and return:
        if (initialBuildTask == null)
        {
            markIndexBuilt(index, true);
            return Futures.immediateFuture(null);
        }

        // otherwise run the initialization task asynchronously with a callback to mark it built or failed
        final SettableFuture initialization = SettableFuture.create();
        Futures.addCallback(asyncExecutor.submit(initialBuildTask), new FutureCallback()
        {
            @Override
            public void onFailure(Throwable t)
            {
                logAndMarkIndexesFailed(Collections.singleton(index), t, true);
                initialization.setException(t);
            }

            @Override
            public void onSuccess(Object o)
            {
                markIndexBuilt(index, true);
                initialization.set(o);
            }
        }, MoreExecutors.directExecutor());

        return initialization;
    }

    /**
     * Adds and builds a index
     *
     * @param indexDef the IndexMetadata describing the index
     * @param isNewCF true if the index is added as part of a new table/columnfamily (i.e. loading a CF at startup), 
     * false for all other cases (i.e. newly added index)
     */
    public synchronized Future addIndex(IndexMetadata indexDef, boolean isNewCF)
    {
        if (indexes.containsKey(indexDef.name))
            return reloadIndex(indexDef);
        else
            return createIndex(indexDef, isNewCF);
    }

    /**
     * Checks if the specified index is queryable.
     *
     * @param index the index
     * @return true if the specified index is queryable, false otherwise
     */
    public boolean isIndexQueryable(Index index)
    {
        return queryableIndexes.contains(index.getIndexMetadata().name);
    }
    
    /**
     * Checks if the specified index is writable.
     *
     * @param index the index
     * @return true if the specified index is writable, false otherwise
     */
    public boolean isIndexWritable(Index index)
    {
        return writableIndexes.containsKey(index.getIndexMetadata().name);
    }

    /**
     * Checks if the specified index has any running build task.
     *
     * @param indexName the index name
     * @return {@code true} if the index is building, {@code false} otherwise
     */
    @VisibleForTesting
    public synchronized boolean isIndexBuilding(String indexName)
    {
        AtomicInteger counter = inProgressBuilds.get(indexName);
        return counter != null && counter.get() > 0;
    }

    public synchronized void removeIndex(String indexName)
    {
        Index index = unregisterIndex(indexName);
        if (null != index)
        {
            markIndexRemoved(indexName);
            executeBlocking(index.getInvalidateTask(), null);
        }
    }


    public Set getDependentIndexes(ColumnMetadata column)
    {
        if (indexes.isEmpty())
            return Collections.emptySet();

        Set dependentIndexes = new HashSet<>();
        for (Index index : indexes.values())
            if (index.dependsOn(column))
                dependentIndexes.add(index.getIndexMetadata());

        return dependentIndexes;
    }

    /**
     * Called when dropping a Table
     */
    public void markAllIndexesRemoved()
    {
        getBuiltIndexNames().forEach(this::markIndexRemoved);
    }

    /**
     * Does a blocking full rebuild/recovery of the specifed indexes from all the sstables in the base table.
     * Note also that this method of (re)building/recovering indexes:
     * a) takes a set of index *names* rather than Indexers
     * b) marks existing indexes removed prior to rebuilding
     * c) fails if such marking operation conflicts with any ongoing index builds, as full rebuilds cannot be run
     * concurrently
     *
     * @param indexNames the list of indexes to be rebuilt
     */
    public void rebuildIndexesBlocking(Set indexNames)
    {
        // Get the set of indexes that require blocking build
        Set toRebuild = indexes.values()
                                      .stream()
                                      .filter(index -> indexNames.contains(index.getIndexMetadata().name))
                                      .filter(Index::shouldBuildBlocking)
                                      .collect(Collectors.toSet());

        if (toRebuild.isEmpty())
        {
            logger.info("No defined indexes with the supplied names: {}", Joiner.on(',').join(indexNames));
            return;
        }

        // Optimistically mark the indexes as writable, so we don't miss incoming writes
        boolean needsFlush = false;
        for (Index index : toRebuild)
        {
            String name = index.getIndexMetadata().name;
            if (writableIndexes.put(name, index) == null)
            {
                logger.info("Index [{}] became writable starting recovery.", name);
                needsFlush = true;
            }
        }

        // Once we are tracking new writes, flush any memtable contents to not miss them from the sstable-based rebuild
        if (needsFlush)
            baseCfs.forceBlockingFlush();

        // Now that we are tracking new writes and we haven't left untracked contents on the memtables, we are ready to
        // index the sstables
        try (ColumnFamilyStore.RefViewFragment viewFragment = baseCfs.selectAndReference(View.selectFunction(SSTableSet.CANONICAL));
             Refs allSSTables = viewFragment.refs)
        {
            buildIndexesBlocking(allSSTables, toRebuild, true);
        }
    }

    /**
     * Checks if the specified {@link ColumnFamilyStore} is a secondary index.
     *
     * @param cfs the ColumnFamilyStore to check.
     * @return true if the specified ColumnFamilyStore is a secondary index,
     * false otherwise.
     */
    public static boolean isIndexColumnFamilyStore(ColumnFamilyStore cfs)
    {
        return isIndexColumnFamily(cfs.name);
    }

    /**
     * Checks if the specified {@link ColumnFamilyStore} is the one secondary index.
     *
     * @param cfName the name of the ColumnFamilyStore to check.
     * @return true if the specified ColumnFamilyStore is a secondary index,
     * false otherwise.
     */
    public static boolean isIndexColumnFamily(String cfName)
    {
        return cfName.contains(Directories.SECONDARY_INDEX_NAME_SEPARATOR);
    }

    /**
     * Returns the parent of the specified {@link ColumnFamilyStore}.
     *
     * @param cfs the ColumnFamilyStore
     * @return the parent of the specified ColumnFamilyStore
     */
    public static ColumnFamilyStore getParentCfs(ColumnFamilyStore cfs)
    {
        String parentCfs = getParentCfsName(cfs.name);
        return cfs.keyspace.getColumnFamilyStore(parentCfs);
    }

    /**
     * Returns the parent name of the specified {@link ColumnFamilyStore}.
     *
     * @param cfName the ColumnFamilyStore name
     * @return the parent name of the specified ColumnFamilyStore
     */
    public static String getParentCfsName(String cfName)
    {
        assert isIndexColumnFamily(cfName);
        return StringUtils.substringBefore(cfName, Directories.SECONDARY_INDEX_NAME_SEPARATOR);
    }

    /**
     * Returns the index name
     *
     * @param cfs the ColumnFamilyStore
     * @return the index name
     */
    public static String getIndexName(ColumnFamilyStore cfs)
    {
        return getIndexName(cfs.name);
    }

    /**
     * Returns the index name
     *
     * @param cfName the ColumnFamilyStore name
     * @return the index name
     */
    public static String getIndexName(String cfName)
    {
        assert isIndexColumnFamily(cfName);
        return StringUtils.substringAfter(cfName, Directories.SECONDARY_INDEX_NAME_SEPARATOR);
    }

    /**
     * Performs a blocking (re)indexing/recovery of the specified SSTables for the specified indexes.
     *
     * If the index doesn't support ALL {@link Index.LoadType} it performs a recovery {@link Index#getRecoveryTaskSupport()}
     * instead of a build {@link Index#getBuildTaskSupport()}
     * 
     * @param sstables      the SSTables to be (re)indexed
     * @param indexes       the indexes to be (re)built for the specifed SSTables
     * @param isFullRebuild True if this method is invoked as a full index rebuild, false otherwise
     */
    @SuppressWarnings({ "unchecked" })
    private void buildIndexesBlocking(Collection sstables, Set indexes, boolean isFullRebuild)
    {
        if (indexes.isEmpty())
            return;

        // Mark all indexes as building: this step must happen first, because if any index can't be marked, the whole
        // process needs to abort
        markIndexesBuilding(indexes, isFullRebuild, false);

        // Build indexes in a try/catch, so that any index not marked as either built or failed will be marked as failed:
        final Set builtIndexes = Sets.newConcurrentHashSet();
        final Set unbuiltIndexes = Sets.newConcurrentHashSet();

        // Any exception thrown during index building that could be suppressed by the finally block
        Exception accumulatedFail = null;

        try
        {
            logger.info("Submitting index {} of {} for data in {}",
                        isFullRebuild ? "recovery" : "build",
                        indexes.stream().map(i -> i.getIndexMetadata().name).collect(Collectors.joining(",")),
                        sstables.stream().map(SSTableReader::toString).collect(Collectors.joining(",")));

            // Group all building tasks
            Map> byType = new HashMap<>();
            for (Index index : indexes)
            {
                IndexBuildingSupport buildOrRecoveryTask = isFullRebuild
                                                           ? index.getBuildTaskSupport()
                                                           : index.getRecoveryTaskSupport();
                Set stored = byType.computeIfAbsent(buildOrRecoveryTask, i -> new HashSet<>());
                stored.add(index);
            }

            // Schedule all index building tasks with a callback to mark them as built or failed
            List> futures = new ArrayList<>(byType.size());
            byType.forEach((buildingSupport, groupedIndexes) ->
                           {
                               SecondaryIndexBuilder builder = buildingSupport.getIndexBuildTask(baseCfs, groupedIndexes, sstables);
                               final SettableFuture build = SettableFuture.create();
                               Futures.addCallback(CompactionManager.instance.submitIndexBuild(builder), new FutureCallback()
                               {
                                   private void doOnFailure(Throwable t)
                                   {
                                       logAndMarkIndexesFailed(groupedIndexes, t, false);
                                       unbuiltIndexes.addAll(groupedIndexes);
                                       build.setException(t);
                                   }

                                   @Override
                                   public void onFailure(Throwable t)
                                   {
                                       if (builder instanceof AutoCloseable)
                                           t = Throwables.close(t, Arrays.asList((AutoCloseable) builder));

                                       doOnFailure(t);
                                   }

                                   @Override
                                   public void onSuccess(Object o)
                                   {
                                       if (builder instanceof AutoCloseable)
                                       {
                                           Throwable t = Throwables.close(null, Arrays.asList((AutoCloseable) builder));
                                           if (t != null)
                                           {
                                               doOnFailure(t);
                                               return;
                                           }
                                       }
                                       groupedIndexes.forEach(i -> markIndexBuilt(i, isFullRebuild));
                                       logger.info("Index build of {} completed", getIndexNames(groupedIndexes));
                                       builtIndexes.addAll(groupedIndexes);
                                       build.set(o);
                                   }
                               }, MoreExecutors.directExecutor());
                               futures.add(build);
                           });

            // Finally wait for the index builds to finish and flush the indexes that built successfully
            FBUtilities.waitOnFutures(futures);
        }
        catch (Exception e)
        {
            accumulatedFail = e;
            throw e;
        }
        finally
        {
            try
            {
                // Fail any indexes that couldn't be marked
                Set failedIndexes = Sets.difference(indexes, Sets.union(builtIndexes, unbuiltIndexes));
                if (!failedIndexes.isEmpty())
                {
                    logAndMarkIndexesFailed(failedIndexes, accumulatedFail, false);
                }

                // Flush all built indexes with an aynchronous callback to log the success or failure of the flush
                flushIndexesBlocking(builtIndexes, new FutureCallback()
                {
                    String indexNames = StringUtils.join(builtIndexes.stream()
                                                                     .map(i -> i.getIndexMetadata().name)
                                                                     .collect(Collectors.toList()), ',');

                    @Override
                    public void onFailure(Throwable ignored)
                    {
                        logger.info("Index flush of {} failed", indexNames);
                    }

                    @Override
                    public void onSuccess(Object ignored)
                    {
                        logger.info("Index flush of {} completed", indexNames);
                    }
                });
            }
            catch (Exception e)
            {
                if (accumulatedFail != null)
                {
                    accumulatedFail.addSuppressed(e);
                }
                else
                {
                    throw e;
                }
            }
        }
    }

    private String getIndexNames(Set indexes)
    {
        List indexNames = indexes.stream()
                                         .map(i -> i.getIndexMetadata().name)
                                         .collect(Collectors.toList());
        return StringUtils.join(indexNames, ',');
    }

    /**
     * Marks the specified indexes as (re)building if:
     * 1) There's no in progress rebuild of any of the given indexes.
     * 2) There's an in progress rebuild but the caller is not a full rebuild.
     * 
     * Otherwise, this method invocation fails, as it is not possible to run full rebuilds while other concurrent rebuilds
     * are in progress. Please note this is checked atomically against all given indexes; that is, no index will be marked
     * if even a single one fails.
     * 

     * Marking an index as "building" practically means:
     * 1) The index is removed from the "failed" set if this is a full rebuild.
     * 2) The index is removed from the system keyspace built indexes; this only happens if this method is not invoked
     * for a new table initialization, as in such case there's no need to remove it (it is either already not present,
     * or already present because already built).
     * 

     * Thread safety is guaranteed by having all methods managing index builds synchronized: being synchronized on
     * the SecondaryIndexManager instance, it means all invocations for all different indexes will go through the same
     * lock, but this is fine as the work done while holding such lock is trivial.
     * 

     * {@link #markIndexBuilt(Index, boolean)} or {@link #markIndexFailed(Index, boolean)} should be always called after
     * the rebuilding has finished, so that the index build state can be correctly managed and the index rebuilt.
     *
     * @param indexes the index to be marked as building
     * @param isFullRebuild {@code true} if this method is invoked as a full index rebuild, {@code false} otherwise
     * @param isNewCF {@code true} if this method is invoked when initializing a new table/columnfamily (i.e. loading a CF at startup), 
     * {@code false} for all other cases (i.e. newly added index)
     */
    private synchronized void markIndexesBuilding(Set indexes, boolean isFullRebuild, boolean isNewCF)
    {
        String keyspaceName = baseCfs.keyspace.getName();

        // First step is to validate against concurrent rebuilds; it would be more optimized to do everything on a single
        // step, but we're not really expecting a very high number of indexes, and this isn't on any hot path, so
        // we're favouring readability over performance
        indexes.forEach(index ->
                        {
                            String indexName = index.getIndexMetadata().name;
                            AtomicInteger counter = inProgressBuilds.computeIfAbsent(indexName, ignored -> new AtomicInteger(0));

                            if (counter.get() > 0 && isFullRebuild)
                                throw new IllegalStateException(String.format("Cannot rebuild index %s as another index build for the same index is currently in progress.", indexName));
                        });

        // Second step is the actual marking:
        indexes.forEach(index ->
                        {
                            String indexName = index.getIndexMetadata().name;
                            AtomicInteger counter = inProgressBuilds.computeIfAbsent(indexName, ignored -> new AtomicInteger(0));

                            if (isFullRebuild)
                                needsFullRebuild.remove(indexName);

                            if (counter.getAndIncrement() == 0 && DatabaseDescriptor.isDaemonInitialized() && !isNewCF)
                                SystemKeyspace.setIndexRemoved(keyspaceName, indexName);
                        });
    }

    /**
     * Marks the specified index as built if there are no in progress index builds and the index is not failed.
     * {@link #markIndexesBuilding(Set, boolean, boolean)} should always be invoked before this method.
     *
     * @param index the index to be marked as built
     * @param isFullRebuild {@code true} if this method is invoked as a full index rebuild, {@code false} otherwise
     */
    private synchronized void markIndexBuilt(Index index, boolean isFullRebuild)
    {
        String indexName = index.getIndexMetadata().name;
        if (isFullRebuild)
        {
            if (queryableIndexes.add(indexName))
                logger.info("Index [{}] became queryable after successful build.", indexName);

            if (writableIndexes.put(indexName, index) == null)
                logger.info("Index [{}] became writable after successful build.", indexName);
        }
        
        AtomicInteger counter = inProgressBuilds.get(indexName);
        if (counter != null)
        {
            assert counter.get() > 0;
            if (counter.decrementAndGet() == 0)
            {
                inProgressBuilds.remove(indexName);
                if (!needsFullRebuild.contains(indexName) && DatabaseDescriptor.isDaemonInitialized())
                    SystemKeyspace.setIndexBuilt(baseCfs.keyspace.getName(), indexName);
            }
        }
    }

    /**
     * Marks the specified index as failed.
     * {@link #markIndexesBuilding(Set, boolean, boolean)} should always be invoked before this method.
     *
     * @param index the index to be marked as built
     * @param isInitialBuild {@code true} if the index failed during its initial build, {@code false} otherwise
     */
    private synchronized void markIndexFailed(Index index, boolean isInitialBuild)
    {
        String indexName = index.getIndexMetadata().name;

        AtomicInteger counter = inProgressBuilds.get(indexName);
        if (counter != null)
        {
            assert counter.get() > 0;

            counter.decrementAndGet();

            if (DatabaseDescriptor.isDaemonInitialized())
                SystemKeyspace.setIndexRemoved(baseCfs.keyspace.getName(), indexName);

            needsFullRebuild.add(indexName);

            if (!index.getSupportedLoadTypeOnFailure(isInitialBuild).supportsWrites() && writableIndexes.remove(indexName) != null)
                logger.info("Index [{}] became not-writable because of failed build.", indexName);

            if (!index.getSupportedLoadTypeOnFailure(isInitialBuild).supportsReads() && queryableIndexes.remove(indexName))
                logger.info("Index [{}] became not-queryable because of failed build.", indexName);
        }
    }

    private void logAndMarkIndexesFailed(Set indexes, Throwable indexBuildFailure, boolean isInitialBuild)
    {
        JVMStabilityInspector.inspectThrowable(indexBuildFailure);
        if (indexBuildFailure != null)
            logger.warn("Index build of {} failed. Please run full index rebuild to fix it.", getIndexNames(indexes), indexBuildFailure);
        else
            logger.warn("Index build of {} failed. Please run full index rebuild to fix it.", getIndexNames(indexes));
        indexes.forEach(i -> this.markIndexFailed(i, isInitialBuild));
    }

    /**
     * Marks the specified index as removed.
     *
     * @param indexName the index name
     */
    private synchronized void markIndexRemoved(String indexName)
    {
        SystemKeyspace.setIndexRemoved(baseCfs.keyspace.getName(), indexName);
        queryableIndexes.remove(indexName);
        writableIndexes.remove(indexName);
        needsFullRebuild.remove(indexName);
        inProgressBuilds.remove(indexName);
    }

    public Index getIndexByName(String indexName)
    {
        return indexes.get(indexName);
    }

    private Index createInstance(IndexMetadata indexDef)
    {
        Index newIndex;
        if (indexDef.isCustom())
        {
            assert indexDef.options != null;
            String className = indexDef.options.get(IndexTarget.CUSTOM_INDEX_OPTION_NAME);
            assert !Strings.isNullOrEmpty(className);
            try
            {
                Class indexClass = FBUtilities.classForName(className, "Index");
                Constructor ctor = indexClass.getConstructor(ColumnFamilyStore.class, IndexMetadata.class);
                newIndex = ctor.newInstance(baseCfs, indexDef);
            }
            catch (Exception e)
            {
                throw new RuntimeException(e);
            }
        }
        else
        {
            newIndex = CassandraIndex.newIndex(baseCfs, indexDef);
        }
        return newIndex;
    }

    /**
     * Truncate all indexes
     */
    public void truncateAllIndexesBlocking(final long truncatedAt)
    {
        executeAllBlocking(indexes.values().stream(), (index) -> index.getTruncateTask(truncatedAt), null);
    }

    /**
     * Remove all indexes
     */
    public void dropAllIndexes()
    {
        markAllIndexesRemoved();
        invalidateAllIndexesBlocking();
    }

    @VisibleForTesting
    public void invalidateAllIndexesBlocking()
    {
        executeAllBlocking(indexes.values().stream(), Index::getInvalidateTask, null);
    }

    /**
     * Perform a blocking flush all indexes
     */
    public void flushAllIndexesBlocking()
    {
        flushIndexesBlocking(ImmutableSet.copyOf(indexes.values()));
    }

    /**
     * Perform a blocking flush of selected indexes
     */
    public void flushIndexesBlocking(Set indexes)
    {
        flushIndexesBlocking(indexes, null);
    }

    /**
     * Performs a blocking flush of all custom indexes
     */
    public void flushAllNonCFSBackedIndexesBlocking()
    {
        executeAllBlocking(indexes.values()
                                  .stream()
                                  .filter(index -> !index.getBackingTable().isPresent()),
                           Index::getBlockingFlushTask, null);
    }

    /**
     * Performs a blocking execution of pre-join tasks of all indexes
     */
    public void executePreJoinTasksBlocking(boolean hadBootstrap)
    {
        logger.info("Executing pre-join{} tasks for: {}", hadBootstrap ? " post-bootstrap" : "", this.baseCfs);
        executeAllBlocking(indexes.values().stream(), (index) ->
        {
            return index.getPreJoinTask(hadBootstrap);
        }, null);
    }

    private void flushIndexesBlocking(Set indexes, FutureCallback