org.apache.cassandra.cql3.statements.SelectStatement Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
There is a newer version: 5.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.cql3.statements;

import java.nio.ByteBuffer;
import java.util.*;
import java.util.stream.Collectors;
import java.util.concurrent.TimeUnit;

import javax.annotation.concurrent.ThreadSafe;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.audit.AuditLogContext;
import org.apache.cassandra.audit.AuditLogEntryType;
import org.apache.cassandra.auth.Permission;
import org.apache.cassandra.cql3.restrictions.SingleRestriction;
import org.apache.cassandra.db.guardrails.Guardrails;
import org.apache.cassandra.index.Index;
import org.apache.cassandra.schema.ColumnMetadata;
import org.apache.cassandra.schema.Schema;
import org.apache.cassandra.schema.SchemaConstants;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.schema.TableMetadataRef;
import org.apache.cassandra.cql3.*;
import org.apache.cassandra.cql3.functions.Function;
import org.apache.cassandra.cql3.restrictions.StatementRestrictions;
import org.apache.cassandra.cql3.selection.RawSelector;
import org.apache.cassandra.cql3.selection.ResultSetBuilder;
import org.apache.cassandra.cql3.selection.Selectable;
import org.apache.cassandra.cql3.selection.Selectable.WithFunction;
import org.apache.cassandra.cql3.selection.Selection;
import org.apache.cassandra.cql3.selection.Selection.Selectors;
import org.apache.cassandra.cql3.selection.Selector;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.aggregation.AggregationSpecification;
import org.apache.cassandra.db.aggregation.GroupMaker;
import org.apache.cassandra.db.filter.*;
import org.apache.cassandra.db.marshal.CompositeType;
import org.apache.cassandra.db.marshal.Int32Type;
import org.apache.cassandra.db.partitions.PartitionIterator;
import org.apache.cassandra.db.rows.Row;
import org.apache.cassandra.db.rows.RowIterator;
import org.apache.cassandra.db.view.View;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.exceptions.*;
import org.apache.cassandra.metrics.ClientRequestSizeMetrics;
import org.apache.cassandra.index.IndexRegistry;
import org.apache.cassandra.serializers.MarshalException;
import org.apache.cassandra.service.ClientState;
import org.apache.cassandra.service.ClientWarn;
import org.apache.cassandra.service.QueryState;
import org.apache.cassandra.service.StorageProxy;
import org.apache.cassandra.service.pager.AggregationQueryPager;
import org.apache.cassandra.service.pager.PagingState;
import org.apache.cassandra.service.pager.QueryPager;
import org.apache.cassandra.transport.ProtocolVersion;
import org.apache.cassandra.transport.messages.ResultMessage;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.NoSpamLogger;

import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;

import static java.lang.String.format;
import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse;
import static org.apache.cassandra.cql3.statements.RequestValidations.checkNotNull;
import static org.apache.cassandra.cql3.statements.RequestValidations.checkNull;
import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue;
import static org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest;
import static org.apache.cassandra.utils.ByteBufferUtil.UNSET_BYTE_BUFFER;
import static org.apache.cassandra.utils.Clock.Global.nanoTime;

/**
 * Encapsulates a completely parsed SELECT query, including the target
 * column family, expression, result count, and ordering clause.
 *
 * A number of public methods here are only used internally. However,
 * many of these are made accessible for the benefit of custom
 * QueryHandler implementations, so before reducing their accessibility
 * due consideration should be given.
 *
 * Note that select statements can be accessed by multiple threads, so we cannot rely on mutable attributes.
 */
@ThreadSafe
public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement
{
    private static final Logger logger = LoggerFactory.getLogger(SelectStatement.class);
    private static final NoSpamLogger noSpamLogger = NoSpamLogger.getLogger(SelectStatement.logger, 1, TimeUnit.MINUTES);

    public static final int DEFAULT_PAGE_SIZE = 10000;
    public static final String TOPK_CONSISTENCY_LEVEL_ERROR = "Top-K queries can only be run with consistency level ONE/LOCAL_ONE. Consistency level %s was used.";
    public static final String TOPK_LIMIT_ERROR = "Top-K queries must have a limit specified and the limit must be less than the query page size";
    public static final String TOPK_PARTITION_LIMIT_ERROR = "Top-K queries do not support per-partition limits";
    public static final String TOPK_AGGREGATION_ERROR = "Top-K queries can not be run with aggregation";
    public static final String TOPK_CONSISTENCY_LEVEL_WARNING = "Top-K queries can only be run with consistency level ONE " +
                                                                "/ LOCAL_ONE / NODE_LOCAL. Consistency level %s was requested. " +
                                                                "Downgrading the consistency level to %s.";
    public static final String TOPK_PAGE_SIZE_WARNING = "Top-K queries do not support paging and the page size is set to %d, " +
                                                        "which is less than LIMIT %d. The page size has been set to %> orderingComparator;

    // Used by forSelection below
    private static final Parameters defaultParameters = new Parameters(Collections.emptyList(),
                                                                       Collections.emptyList(),
                                                                       false,
                                                                       false,
                                                                       false);

    public SelectStatement(TableMetadata table,
                           VariableSpecifications bindVariables,
                           Parameters parameters,
                           Selection selection,
                           StatementRestrictions restrictions,
                           boolean isReversed,
                           AggregationSpecification.Factory aggregationSpecFactory,
                           ColumnComparator> orderingComparator,
                           Term limit,
                           Term perPartitionLimit)
    {
        this.table = table;
        this.bindVariables = bindVariables;
        this.selection = selection;
        this.restrictions = restrictions;
        this.isReversed = isReversed;
        this.aggregationSpecFactory = aggregationSpecFactory;
        this.orderingComparator = orderingComparator;
        this.parameters = parameters;
        this.limit = limit;
        this.perPartitionLimit = perPartitionLimit;
    }

    @Override
    public List getBindVariables()
    {
        return bindVariables.getBindVariables();
    }

    @Override
    public short[] getPartitionKeyBindVariableIndexes()
    {
        return bindVariables.getPartitionKeyBindVariableIndexes(table);
    }

    @Override
    public Iterable getFunctions()
    {
        List functions = new ArrayList<>();
        addFunctionsTo(functions);
        return functions;
    }

    private void addFunctionsTo(List functions)
    {
        selection.addFunctionsTo(functions);
        restrictions.addFunctionsTo(functions);

        if (aggregationSpecFactory != null)
            aggregationSpecFactory.addFunctionsTo(functions);

        if (limit != null)
            limit.addFunctionsTo(functions);

        if (perPartitionLimit != null)
            perPartitionLimit.addFunctionsTo(functions);
    }

    /**
     * The columns to fetch internally for this SELECT statement (which can be more than the one selected by the
     * user as it also include any restricted column in particular).
     */
    public ColumnFilter queriedColumns()
    {
        return selection.newSelectors(QueryOptions.DEFAULT).getColumnFilter();
    }

    // Creates a simple select based on the given selection.
    // Note that the results select statement should not be used for actual queries, but only for processing already
    // queried data through processColumnFamily.
    static SelectStatement forSelection(TableMetadata table, Selection selection)
    {
        return new SelectStatement(table,
                                   VariableSpecifications.empty(),
                                   defaultParameters,
                                   selection,
                                   StatementRestrictions.empty(StatementType.SELECT, table),
                                   false,
                                   null,
                                   null,
                                   null,
                                   null);
    }

    public ResultSet.ResultMetadata getResultMetadata()
    {
        return selection.getResultMetadata();
    }

    public void authorize(ClientState state) throws InvalidRequestException, UnauthorizedException
    {
        if (table.isView())
        {
            TableMetadataRef baseTable = View.findBaseTable(keyspace(), table());
            if (baseTable != null)
                state.ensureTablePermission(baseTable, Permission.SELECT);
        }
        else
        {
            state.ensureTablePermission(table, Permission.SELECT);
        }

        for (Function function : getFunctions())
            state.ensurePermission(Permission.EXECUTE, function);

        if (!state.hasTablePermission(table, Permission.UNMASK) &&
            !state.hasTablePermission(table, Permission.SELECT_MASKED))
        {
            List queriedMaskedColumns = table.columns()
                                                             .stream()
                                                             .filter(ColumnMetadata::isMasked)
                                                             .filter(restrictions::isRestricted)
                                                             .collect(Collectors.toList());

            if (!queriedMaskedColumns.isEmpty())
                throw new UnauthorizedException(format("User %s has no UNMASK nor SELECT_MASKED permission on table %s.%s, " +
                                                       "cannot query masked columns %s",
                                                       state.getUser().getName(), keyspace(), table(), queriedMaskedColumns));
        }
    }

    public void validate(ClientState state) throws InvalidRequestException
    {
        if (parameters.allowFiltering && !SchemaConstants.isSystemKeyspace(table.keyspace))
            Guardrails.allowFilteringEnabled.ensureEnabled(state);
    }

    public ResultMessage.Rows execute(QueryState state, QueryOptions options, long queryStartNanoTime)
    {
        ConsistencyLevel cl = options.getConsistency();
        checkNotNull(cl, "Invalid empty consistency level");

        cl.validateForRead();
        Guardrails.readConsistencyLevels.guard(EnumSet.of(cl), state.getClientState());

        long nowInSec = options.getNowInSeconds(state);
        int userLimit = getLimit(options);
        int userPerPartitionLimit = getPerPartitionLimit(options);
        int pageSize = options.getPageSize();
        boolean unmask = !table.hasMaskedColumns() || state.getClientState().hasTablePermission(table, Permission.UNMASK);

        Selectors selectors = selection.newSelectors(options);
        AggregationSpecification aggregationSpec = getAggregationSpec(options);
        DataLimits limit = getDataLimits(userLimit, userPerPartitionLimit, pageSize, aggregationSpec);

        // Handle additional validation for topK queries
        if (restrictions.isTopK())
        {
            checkFalse(aggregationSpec != null, TOPK_AGGREGATION_ERROR);

            // We aren't going to allow SERIAL at all, so we can error out on those.
            checkFalse(options.getConsistency() == ConsistencyLevel.LOCAL_SERIAL ||
                       options.getConsistency() == ConsistencyLevel.SERIAL,
                       String.format(TOPK_CONSISTENCY_LEVEL_ERROR, options.getConsistency()));

            if (options.getConsistency() != ConsistencyLevel.ONE &&
                options.getConsistency() != ConsistencyLevel.LOCAL_ONE &&
                options.getConsistency() != ConsistencyLevel.NODE_LOCAL)
            {
                ConsistencyLevel supplied = options.getConsistency();
                ConsistencyLevel downgrade = supplied.isDatacenterLocal() ? ConsistencyLevel.LOCAL_ONE : ConsistencyLevel.ONE;

                options = QueryOptions.withConsistencyLevel(options, downgrade);

                ClientWarn.instance.warn(String.format(TOPK_CONSISTENCY_LEVEL_WARNING, supplied, downgrade));
            }

            checkFalse(limit.isUnlimited(), TOPK_LIMIT_ERROR);

            checkFalse(limit.perPartitionCount() != DataLimits.NO_LIMIT, TOPK_PARTITION_LIMIT_ERROR);

            if (pageSize > 0 && pageSize < limit.count())
            {
                int oldPageSize = pageSize;
                pageSize = limit.count();
                limit = getDataLimits(userLimit, userPerPartitionLimit, pageSize, aggregationSpec);
                options = QueryOptions.withPageSize(options, pageSize);
                ClientWarn.instance.warn(String.format(TOPK_PAGE_SIZE_WARNING, oldPageSize, limit.count()));
            }
        }

        ReadQuery query = getQuery(options, state.getClientState(), selectors.getColumnFilter(), nowInSec, limit);

        if (options.isReadThresholdsEnabled())
            query.trackWarnings();
        ResultMessage.Rows rows;

        if (aggregationSpec == null && (pageSize <= 0 || (query.limits().count() <= pageSize) || query.isTopK()))
        {
            rows = execute(query, options, state.getClientState(), selectors, nowInSec, userLimit, null, queryStartNanoTime, unmask);
        }
        else
        {
            QueryPager pager = getPager(query, options);

            rows = execute(state,
                           Pager.forDistributedQuery(pager, cl, state.getClientState()),
                           options,
                           selectors,
                           pageSize,
                           nowInSec,
                           userLimit,
                           aggregationSpec,
                           queryStartNanoTime,
                           unmask);
        }
        if (!SchemaConstants.isSystemKeyspace(table.keyspace))
            ClientRequestSizeMetrics.recordReadResponseMetrics(rows, restrictions, selection);

        return rows;
    }


    public AggregationSpecification getAggregationSpec(QueryOptions options)
    {
        return aggregationSpecFactory == null ? null : aggregationSpecFactory.newInstance(options);
    }

    public ReadQuery getQuery(QueryOptions options, long nowInSec) throws RequestValidationException
    {
        Selectors selectors = selection.newSelectors(options);
        return getQuery(options,
                        ClientState.forInternalCalls(),
                        selectors.getColumnFilter(),
                        nowInSec,
                        getLimit(options),
                        getPerPartitionLimit(options),
                        options.getPageSize(),
                        getAggregationSpec(options));
    }

    public ReadQuery getQuery(QueryOptions options,
                              ClientState state,
                              ColumnFilter columnFilter,
                              long nowInSec,
                              int userLimit,
                              int perPartitionLimit,
                              int pageSize,
                              AggregationSpecification aggregationSpec)
    {
        DataLimits limit = getDataLimits(userLimit, perPartitionLimit, pageSize, aggregationSpec);

        return getQuery(options, state, columnFilter, nowInSec, limit);
    }

    public ReadQuery getQuery(QueryOptions options,
                              ClientState state,
                              ColumnFilter columnFilter,
                              long nowInSec,
                              DataLimits limit)
    {
        boolean isPartitionRangeQuery = restrictions.isKeyRange() || restrictions.usesSecondaryIndexing();

        if (isPartitionRangeQuery)
            return getRangeCommand(options, state, columnFilter, limit, nowInSec);

        return getSliceCommands(options, state, columnFilter, limit, nowInSec);
    }

    private ResultMessage.Rows execute(ReadQuery query,
                                       QueryOptions options,
                                       ClientState state,
                                       Selectors selectors,
                                       long nowInSec,
                                       int userLimit,
                                       AggregationSpecification aggregationSpec,
                                       long queryStartNanoTime,
                                       boolean unmask)
    {
        try (PartitionIterator data = query.execute(options.getConsistency(), state, queryStartNanoTime))
        {
            return processResults(data, options, selectors, nowInSec, userLimit, aggregationSpec, unmask);
        }
    }

    @Override
    public AuditLogContext getAuditLogContext()
    {
        return new AuditLogContext(AuditLogEntryType.SELECT, keyspace(), table.name);
    }

    // Simple wrapper class to avoid some code duplication
    private static abstract class Pager
    {
        protected QueryPager pager;

        protected Pager(QueryPager pager)
        {
            this.pager = pager;
        }

        public static Pager forInternalQuery(QueryPager pager, ReadExecutionController executionController)
        {
            return new InternalPager(pager, executionController);
        }

        public static Pager forDistributedQuery(QueryPager pager, ConsistencyLevel consistency, ClientState clientState)
        {
            return new NormalPager(pager, consistency, clientState);
        }

        public boolean isExhausted()
        {
            return pager.isExhausted();
        }

        public PagingState state()
        {
            return pager.state();
        }

        public abstract PartitionIterator fetchPage(int pageSize, long queryStartNanoTime);

        public static class NormalPager extends Pager
        {
            private final ConsistencyLevel consistency;
            private final ClientState clientState;

            private NormalPager(QueryPager pager, ConsistencyLevel consistency, ClientState clientState)
            {
                super(pager);
                this.consistency = consistency;
                this.clientState = clientState;
            }

            public PartitionIterator fetchPage(int pageSize, long queryStartNanoTime)
            {
                return pager.fetchPage(pageSize, consistency, clientState, queryStartNanoTime);
            }
        }

        public static class InternalPager extends Pager
        {
            private final ReadExecutionController executionController;

            private InternalPager(QueryPager pager, ReadExecutionController executionController)
            {
                super(pager);
                this.executionController = executionController;
            }

            public PartitionIterator fetchPage(int pageSize, long queryStartNanoTime)
            {
                return pager.fetchPageInternal(pageSize, executionController);
            }
        }
    }

    private ResultMessage.Rows execute(QueryState state,
                                       Pager pager,
                                       QueryOptions options,
                                       Selectors selectors,
                                       int pageSize,
                                       long nowInSec,
                                       int userLimit,
                                       AggregationSpecification aggregationSpec,
                                       long queryStartNanoTime,
                                       boolean unmask)
    {
        Guardrails.pageSize.guard(pageSize, table(), false, state.getClientState());

        if (aggregationSpecFactory != null)
        {
            if (!restrictions.hasPartitionKeyRestrictions())
            {
                warn("Aggregation query used without partition key");
                noSpamLogger.warn(String.format("Aggregation query used without partition key on table %s.%s, aggregation type: %s",
                                                 keyspace(), table(), aggregationSpec.kind()));
            }
            else if (restrictions.keyIsInRelation())
            {
                warn("Aggregation query used on multiple partition keys (IN restriction)");
                noSpamLogger.warn(String.format("Aggregation query used on multiple partition keys (IN restriction) on table %s.%s, aggregation type: %s",
                                                 keyspace(), table(), aggregationSpec.kind()));
            }
        }

        // We can't properly do post-query ordering if we page (see #6722)
        // For GROUP BY or aggregation queries we always page internally even if the user has turned paging off
        checkFalse(pageSize > 0 && needsPostQueryOrdering(),
                  "Cannot page queries with both ORDER BY and a IN restriction on the partition key;"
                  + " you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");

        ResultMessage.Rows msg;
        try (PartitionIterator page = pager.fetchPage(pageSize, queryStartNanoTime))
        {
            msg = processResults(page, options, selectors, nowInSec, userLimit, aggregationSpec, unmask);
        }

        // Please note that the isExhausted state of the pager only gets updated when we've closed the page, so this
        // shouldn't be moved inside the 'try' above.
        if (!pager.isExhausted() && !pager.pager.isTopK())
            msg.result.metadata.setHasMorePages(pager.state());

        return msg;
    }

    private void warn(String msg)
    {
        logger.warn(msg);
        ClientWarn.instance.warn(msg);
    }

    private ResultMessage.Rows processResults(PartitionIterator partitions,
                                              QueryOptions options,
                                              Selectors selectors,
                                              long nowInSec,
                                              int userLimit,
                                              AggregationSpecification aggregationSpec,
                                              boolean unmask) throws RequestValidationException
    {
        ResultSet rset = process(partitions, options, selectors, nowInSec, userLimit, aggregationSpec, unmask);
        return new ResultMessage.Rows(rset);
    }

    public ResultMessage.Rows executeLocally(QueryState state, QueryOptions options) throws RequestExecutionException, RequestValidationException
    {
        return executeInternal(state, options, options.getNowInSeconds(state), nanoTime());
    }

    public ResultMessage.Rows executeInternal(QueryState state,
                                              QueryOptions options,
                                              long nowInSec,
                                              long queryStartNanoTime)
    {
        int userLimit = getLimit(options);
        int userPerPartitionLimit = getPerPartitionLimit(options);
        int pageSize = options.getPageSize();
        boolean unmask = state.getClientState().hasTablePermission(table, Permission.UNMASK);

        Selectors selectors = selection.newSelectors(options);
        AggregationSpecification aggregationSpec = getAggregationSpec(options);
        ReadQuery query = getQuery(options,
                                   state.getClientState(),
                                   selectors.getColumnFilter(),
                                   nowInSec,
                                   userLimit,
                                   userPerPartitionLimit,
                                   pageSize,
                                   aggregationSpec);

        try (ReadExecutionController executionController = query.executionController())
        {
            if (aggregationSpec == null && (pageSize <= 0 || (query.limits().count() <= pageSize) || query.isTopK()))
            {
                try (PartitionIterator data = query.executeInternal(executionController))
                {
                    return processResults(data, options, selectors, nowInSec, userLimit, null, unmask);
                }
            }

            QueryPager pager = getPager(query, options);

            return execute(state,
                           Pager.forInternalQuery(pager, executionController),
                           options,
                           selectors,
                           pageSize,
                           nowInSec,
                           userLimit,
                           aggregationSpec,
                           queryStartNanoTime,
                           unmask);
        }
    }

    private QueryPager getPager(ReadQuery query, QueryOptions options)
    {
        QueryPager pager = query.getPager(options.getPagingState(), options.getProtocolVersion());

        if (aggregationSpecFactory == null || query.isEmpty())
            return pager;

        return new AggregationQueryPager(pager, query.limits());
    }

    public Map> executeRawInternal(QueryOptions options, ClientState state, long nowInSec) throws RequestExecutionException, RequestValidationException
    {
        int userLimit = getLimit(options);
        int userPerPartitionLimit = getPerPartitionLimit(options);
        if (options.getPageSize() > 0)
            throw new IllegalStateException();
        if (aggregationSpecFactory != null)
            throw new IllegalStateException();

        Selectors selectors = selection.newSelectors(options);
        ReadQuery query = getQuery(options, state, selectors.getColumnFilter(), nowInSec, userLimit, userPerPartitionLimit, Integer.MAX_VALUE, null);

        Map> result = Collections.emptyMap();
        try (ReadExecutionController executionController = query.executionController())
        {
            try (PartitionIterator data = query.executeInternal(executionController))
            {
                while (data.hasNext())
                {
                    try (RowIterator in = data.next())
                    {
                        List out = Collections.emptyList();
                        while (in.hasNext())
                        {
                            switch (out.size())
                            {
                                case 0:  out = Collections.singletonList(in.next()); break;
                                case 1:  out = new ArrayList<>(out);
                                default: out.add(in.next());
                            }
                        }
                        switch (result.size())
                        {
                            case 0:  result = Collections.singletonMap(in.partitionKey(), out); break;
                            case 1:  result = new TreeMap<>(result);
                            default: result.put(in.partitionKey(), out);
                        }
                    }
                }
                return result;
            }
        }
    }

    public ResultSet process(PartitionIterator partitions, long nowInSec, boolean unmask) throws InvalidRequestException
    {
        QueryOptions options = QueryOptions.DEFAULT;
        Selectors selectors = selection.newSelectors(options);
        return process(partitions, options, selectors, nowInSec, getLimit(options), getAggregationSpec(options), unmask);
    }

    @Override
    public String keyspace()
    {
        return table.keyspace;
    }

    public String table()
    {
        return table.name;
    }

    /**
     * May be used by custom QueryHandler implementations
     */
    public Selection getSelection()
    {
        return selection;
    }

    /**
     * May be used by custom QueryHandler implementations
     */
    public StatementRestrictions getRestrictions()
    {
        return restrictions;
    }

    private ReadQuery getSliceCommands(QueryOptions options, ClientState state, ColumnFilter columnFilter,
                                       DataLimits limit, long nowInSec)
    {
        Collection keys = restrictions.getPartitionKeys(options, state);
        if (keys.isEmpty())
            return ReadQuery.empty(table);

        if (restrictions.keyIsInRelation())
        {
            Guardrails.partitionKeysInSelect.guard(keys.size(), table.name, false, state);
        }

        ClusteringIndexFilter filter = makeClusteringIndexFilter(options, state, columnFilter);
        if (filter == null || filter.isEmpty(table.comparator))
            return ReadQuery.empty(table);

        RowFilter rowFilter = getRowFilter(options);

        List decoratedKeys = new ArrayList<>(keys.size());
        for (ByteBuffer key : keys)
        {
            QueryProcessor.validateKey(key);
            decoratedKeys.add(table.partitioner.decorateKey(ByteBufferUtil.clone(key)));
        }

        return SinglePartitionReadQuery.createGroup(table, nowInSec, columnFilter, rowFilter, limit, decoratedKeys, filter);
    }

    /**
     * Returns the slices fetched by this SELECT, assuming an internal call (no bound values in particular).
     * 
     * Note that if the SELECT intrinsically selects rows by names, we convert them into equivalent slices for
     * the purpose of this method. This is used for MVs to restrict what needs to be read when we want to read
     * everything that could be affected by a given view (and so, if the view SELECT statement has restrictions
     * on the clustering columns, we can restrict what we read).
     */
    public Slices clusteringIndexFilterAsSlices()
    {
        QueryOptions options = QueryOptions.forInternalCalls(Collections.emptyList());
        ClientState state = ClientState.forInternalCalls();
        ColumnFilter columnFilter = selection.newSelectors(options).getColumnFilter();
        ClusteringIndexFilter filter = makeClusteringIndexFilter(options, state, columnFilter);
        if (filter instanceof ClusteringIndexSliceFilter)
            return ((ClusteringIndexSliceFilter)filter).requestedSlices();

        Slices.Builder builder = new Slices.Builder(table.comparator);
        for (Clustering clustering: ((ClusteringIndexNamesFilter)filter).requestedRows())
            builder.add(Slice.make(clustering));
        return builder.build();
    }

    /**
     * Returns a read command that can be used internally to query all the rows queried by this SELECT for a
     * give key (used for materialized views).
     */
    public SinglePartitionReadCommand internalReadForView(DecoratedKey key, long nowInSec)
    {
        QueryOptions options = QueryOptions.forInternalCalls(Collections.emptyList());
        ClientState state = ClientState.forInternalCalls();
        ColumnFilter columnFilter = selection.newSelectors(options).getColumnFilter();
        ClusteringIndexFilter filter = makeClusteringIndexFilter(options, state, columnFilter);
        RowFilter rowFilter = getRowFilter(options);
        return SinglePartitionReadCommand.create(table, nowInSec, columnFilter, rowFilter, DataLimits.NONE, key, filter);
    }

    /**
     * The {@code RowFilter} for this SELECT, assuming an internal call (no bound values in particular).
     */
    public RowFilter rowFilterForInternalCalls()
    {
        return getRowFilter(QueryOptions.forInternalCalls(Collections.emptyList()));
    }

    private ReadQuery getRangeCommand(QueryOptions options, ClientState state, ColumnFilter columnFilter, DataLimits limit, long nowInSec)
    {
        ClusteringIndexFilter clusteringIndexFilter = makeClusteringIndexFilter(options, state, columnFilter);
        if (clusteringIndexFilter == null)
            return ReadQuery.empty(table);

        RowFilter rowFilter = getRowFilter(options);

        // The LIMIT provided by the user is the number of CQL row he wants returned.
        // We want to have getRangeSlice to count the number of columns, not the number of keys.
        AbstractBounds keyBounds = restrictions.getPartitionKeyBounds(options);
        if (keyBounds == null)
            return ReadQuery.empty(table);

        ReadQuery command =
            PartitionRangeReadQuery.create(table, nowInSec, columnFilter, rowFilter, limit, new DataRange(keyBounds, clusteringIndexFilter));

        // If there's a secondary index that the command can use, have it validate the request parameters.
        command.maybeValidateIndex();

        return command;
    }

    private ClusteringIndexFilter makeClusteringIndexFilter(QueryOptions options, ClientState state, ColumnFilter columnFilter)
    {
        if (parameters.isDistinct)
        {
            // We need to be able to distinguish between partition having live rows and those that don't. But
            // doing so is not trivial since "having a live row" depends potentially on
            //   1) when the query is performed, due to TTLs
            //   2) how thing reconcile together between different nodes
            // so that it's hard to really optimize properly internally. So to keep it simple, we simply query
            // for the first row of the partition and hence uses Slices.ALL. We'll limit it to the first live
            // row however in getLimit().
            return new ClusteringIndexSliceFilter(Slices.ALL, false);
        }

        if (restrictions.isColumnRange())
        {
            Slices slices = makeSlices(options);
            if (slices == Slices.NONE && !selection.containsStaticColumns())
                return null;

            return new ClusteringIndexSliceFilter(slices, isReversed);
        }

        NavigableSet> clusterings = getRequestedRows(options, state);
        // We can have no clusterings if either we're only selecting the static columns, or if we have
        // a 'IN ()' for clusterings. In that case, we still want to query if some static columns are
        // queried. But we're fine otherwise.
        if (clusterings.isEmpty() && columnFilter.fetchedColumns().statics.isEmpty())
            return null;

        return new ClusteringIndexNamesFilter(clusterings, isReversed);
    }

    @VisibleForTesting
    public Slices makeSlices(QueryOptions options)
    throws InvalidRequestException
    {
        SortedSet> startBounds = restrictions.getClusteringColumnsBounds(Bound.START, options);
        SortedSet> endBounds = restrictions.getClusteringColumnsBounds(Bound.END, options);
        assert startBounds.size() == endBounds.size();

        // The case where startBounds == 1 is common enough that it's worth optimizing
        if (startBounds.size() == 1)
        {
            ClusteringBound start = startBounds.first();
            ClusteringBound end = endBounds.first();
            return Slice.isEmpty(table.comparator, start, end)
                 ? Slices.NONE
                 : Slices.with(table.comparator, Slice.make(start, end));
        }

        Slices.Builder builder = new Slices.Builder(table.comparator, startBounds.size());
        Iterator> startIter = startBounds.iterator();
        Iterator> endIter = endBounds.iterator();
        while (startIter.hasNext() && endIter.hasNext())
        {
            ClusteringBound start = startIter.next();
            ClusteringBound end = endIter.next();

            // Ignore slices that are nonsensical
            if (Slice.isEmpty(table.comparator, start, end))
                continue;

            builder.add(start, end);
        }

        return builder.build();
    }

    private DataLimits getDataLimits(int userLimit,
                                     int perPartitionLimit,
                                     int pageSize,
                                     AggregationSpecification aggregationSpec)
    {
        int cqlRowLimit = DataLimits.NO_LIMIT;
        int cqlPerPartitionLimit = DataLimits.NO_LIMIT;

        // If we do post ordering we need to get all the results sorted before we can trim them.
        if (aggregationSpec != AggregationSpecification.AGGREGATE_EVERYTHING)
        {
            // If we aren't need post-query ordering but we are doing index ordering (currently ANN only) then
            // we do need to use the user limit.
            if (!needsPostQueryOrdering() || needIndexOrdering())
                cqlRowLimit = userLimit;
            cqlPerPartitionLimit = perPartitionLimit;
        }

        // Group by and aggregation queries will always be paged internally to avoid OOM.
        // If the user provided a pageSize we'll use that to page internally (because why not), otherwise we use our default
        if (pageSize <= 0)
            pageSize = DEFAULT_PAGE_SIZE;

        // Aggregation queries work fine on top of the group by paging but to maintain
        // backward compatibility we need to use the old way.
        if (aggregationSpec != null && aggregationSpec != AggregationSpecification.AGGREGATE_EVERYTHING)
        {
            if (parameters.isDistinct)
                return DataLimits.distinctLimits(cqlRowLimit);

            return DataLimits.groupByLimits(cqlRowLimit,
                                            cqlPerPartitionLimit,
                                            pageSize,
                                            aggregationSpec);
        }

        if (parameters.isDistinct)
            return cqlRowLimit == DataLimits.NO_LIMIT ? DataLimits.DISTINCT_NONE : DataLimits.distinctLimits(cqlRowLimit);

        return DataLimits.cqlLimits(cqlRowLimit, cqlPerPartitionLimit);
    }

    /**
     * Returns the limit specified by the user.
     * May be used by custom QueryHandler implementations
     *
     * @return the limit specified by the user or DataLimits.NO_LIMIT if no value
     * as been specified.
     */
    public int getLimit(QueryOptions options)
    {
        return getLimit(limit, options);
    }

    /**
     * Returns the per partition limit specified by the user.
     * May be used by custom QueryHandler implementations
     *
     * @return the per partition limit specified by the user or DataLimits.NO_LIMIT if no value
     * as been specified.
     */
    public int getPerPartitionLimit(QueryOptions options)
    {
        return getLimit(perPartitionLimit, options);
    }

    private int getLimit(Term limit, QueryOptions options)
    {
        int userLimit = DataLimits.NO_LIMIT;

        if (limit != null)
        {
            ByteBuffer b = checkNotNull(limit.bindAndGet(options), "Invalid null value of limit");
            // treat UNSET limit value as 'unlimited'
            if (b != UNSET_BYTE_BUFFER)
            {
                try
                {
                    Int32Type.instance.validate(b);
                    userLimit = Int32Type.instance.compose(b);
                    checkTrue(userLimit > 0, "LIMIT must be strictly positive");
                }
                catch (MarshalException e)
                {
                    throw new InvalidRequestException("Invalid limit value");
                }
            }
        }
        return userLimit;
    }

    private NavigableSet> getRequestedRows(QueryOptions options, ClientState state) throws InvalidRequestException
    {
        // Note: getRequestedColumns don't handle static columns, but due to CASSANDRA-5762
        // we always do a slice for CQL3 tables, so it's ok to ignore them here
        assert !restrictions.isColumnRange();
        return restrictions.getClusteringColumns(options, state);
    }

    /**
     * May be used by custom QueryHandler implementations
     */
    public RowFilter getRowFilter(QueryOptions options) throws InvalidRequestException
    {
        IndexRegistry indexRegistry = IndexRegistry.obtain(table);
        return restrictions.getRowFilter(indexRegistry, options);
    }

    private ResultSet process(PartitionIterator partitions,
                              QueryOptions options,
                              Selectors selectors,
                              long nowInSec,
                              int userLimit,
                              AggregationSpecification aggregationSpec,
                              boolean unmask) throws InvalidRequestException
    {
        GroupMaker groupMaker = aggregationSpec == null ? null : aggregationSpec.newGroupMaker();
        ResultSetBuilder result = new ResultSetBuilder(getResultMetadata(), selectors, unmask, groupMaker);

        while (partitions.hasNext())
        {
            try (RowIterator partition = partitions.next())
            {
                processPartition(partition, options, result, nowInSec);
            }
        }

        ResultSet cqlRows = result.build();
        maybeWarn(result, options);

        orderResults(cqlRows, options);

        cqlRows.trim(userLimit);

        return cqlRows;
    }

    public static ByteBuffer[] getComponents(TableMetadata metadata, DecoratedKey dk)
    {
        ByteBuffer key = dk.getKey();
        if (metadata.partitionKeyColumns().size() == 1)
            return new ByteBuffer[]{ key };
        if (metadata.partitionKeyType instanceof CompositeType)
        {
            return ((CompositeType)metadata.partitionKeyType).split(key);
        }
        else
        {
            return new ByteBuffer[]{ key };
        }
    }

    private void maybeWarn(ResultSetBuilder result, QueryOptions options)
    {
        if (!options.isReadThresholdsEnabled())
            return;
        ColumnFamilyStore store = cfs();
        if (store != null)
            store.metric.coordinatorReadSize.update(result.getSize());
        if (result.shouldWarn(options.getCoordinatorReadSizeWarnThresholdBytes()))
        {
            String msg = String.format("Read on table %s has exceeded the size warning threshold of %,d bytes", table, options.getCoordinatorReadSizeWarnThresholdBytes());
            ClientState state = ClientState.forInternalCalls();
            ClientWarn.instance.warn(msg + " with " + loggableTokens(options, state));
            logger.warn("{} with query {}", msg, asCQL(options, state));
            if (store != null)
                store.metric.coordinatorReadSizeWarnings.mark();
        }
    }

    private void maybeFail(ResultSetBuilder result, QueryOptions options)
    {
        if (!options.isReadThresholdsEnabled())
            return;
        if (result.shouldReject(options.getCoordinatorReadSizeAbortThresholdBytes()))
        {
            String msg = String.format("Read on table %s has exceeded the size failure threshold of %,d bytes", table, options.getCoordinatorReadSizeAbortThresholdBytes());
            ClientState state = ClientState.forInternalCalls();
            String clientMsg = msg + " with " + loggableTokens(options, state);
            ClientWarn.instance.warn(clientMsg);
            logger.warn("{} with query {}", msg, asCQL(options, state));
            ColumnFamilyStore store = cfs();
            if (store != null)
            {
                store.metric.coordinatorReadSizeAborts.mark();
                store.metric.coordinatorReadSize.update(result.getSize());
            }
            // read errors require blockFor and recieved (its in the protocol message), but this isn't known;
            // to work around this, treat the coordinator as the only response we care about and mark it failed
            ReadSizeAbortException exception = new ReadSizeAbortException(clientMsg, options.getConsistency(), 0, 1, true,
                                                                          ImmutableMap.of(FBUtilities.getBroadcastAddressAndPort(), RequestFailureReason.READ_SIZE));
            StorageProxy.recordReadRegularAbort(options.getConsistency(), exception);
            throw exception;
        }
    }

    private ColumnFamilyStore cfs()
    {
        return Schema.instance.getColumnFamilyStoreInstance(table.id);
    }

    // Used by ModificationStatement for CAS operations
    public void processPartition(RowIterator partition, QueryOptions options, ResultSetBuilder result, long nowInSec)
    throws InvalidRequestException
    {
        maybeFail(result, options);
        ProtocolVersion protocolVersion = options.getProtocolVersion();

        ByteBuffer[] keyComponents = getComponents(table, partition.partitionKey());

        Row staticRow = partition.staticRow();
        // If there is no rows, we include the static content if we should and we're done.
        if (!partition.hasNext())
        {
            if (!staticRow.isEmpty() && restrictions.returnStaticContentOnPartitionWithNoRows())
            {
                result.newRow(protocolVersion, partition.partitionKey(), staticRow.clustering(), selection.getColumns());
                maybeFail(result, options);
                for (ColumnMetadata def : selection.getColumns())
                {
                    switch (def.kind)
                    {
                        case PARTITION_KEY:
                            result.add(keyComponents[def.position()]);
                            break;
                        case STATIC:
                            result.add(partition.staticRow().getColumnData(def), nowInSec);
                            break;
                        default:
                            result.add((ByteBuffer)null);
                    }
                }
            }
            return;
        }

        while (partition.hasNext())
        {
            Row row = partition.next();
            result.newRow(protocolVersion, partition.partitionKey(), row.clustering(), selection.getColumns());

            // reads aren't failed as soon the size exceeds the failure threshold, they're failed once the failure
            // threshold has been exceeded and we start adding more data. We're slightly more permissive to avoid
            // cases where a row can never be read. Since we only warn/fail after entire rows are read, this will
            // still allow the entire dataset to be read with LIMIT 1 queries, even if every row is oversized
            maybeFail(result, options);

            // Respect selection order
            for (ColumnMetadata def : selection.getColumns())
            {
                switch (def.kind)
                {
                    case PARTITION_KEY:
                        result.add(keyComponents[def.position()]);
                        break;
                    case CLUSTERING:
                        result.add(row.clustering().bufferAt(def.position()));
                        break;
                    case REGULAR:
                        result.add(row.getColumnData(def), nowInSec);
                        break;
                    case STATIC:
                        result.add(staticRow.getColumnData(def), nowInSec);
                        break;
                }
            }
        }
    }

    private boolean needsPostQueryOrdering()
    {
        // We need post-query ordering only for queries with IN on the partition key and an ORDER BY or index restriction reordering
        return restrictions.keyIsInRelation() && !parameters.orderings.isEmpty() || needIndexOrdering();
    }

    private boolean needIndexOrdering()
    {
        return orderingComparator != null && orderingComparator.indexOrdering();
    }

    /**
     * Orders results when multiple keys are selected (using IN).
     * 
     * In the case of ANN ordering the rows are first ordered in index column order and then by primary key.
     */
    private void orderResults(ResultSet cqlRows, QueryOptions options)
    {
        if (cqlRows.size() == 0 || !needsPostQueryOrdering())
            return;

        Comparator> comparator = orderingComparator.prepareFor(table, getRowFilter(options), options);
        if (comparator != null)
            cqlRows.rows.sort(comparator);
    }

    public static class RawStatement extends QualifiedStatement
    {
        public final Parameters parameters;
        public final List selectClause;
        public final WhereClause whereClause;
        public final Term.Raw limit;
        public final Term.Raw perPartitionLimit;
        private ClientState state;

        public RawStatement(QualifiedName cfName,
                            Parameters parameters,
                            List selectClause,
                            WhereClause whereClause,
                            Term.Raw limit,
                            Term.Raw perPartitionLimit)
        {
            super(cfName);
            this.parameters = parameters;
            this.selectClause = selectClause;
            this.whereClause = whereClause;
            this.limit = limit;
            this.perPartitionLimit = perPartitionLimit;
        }

        public SelectStatement prepare(ClientState state)
        {
            // Cache locally for use by Guardrails
            this.state = state;
            return prepare(state, false);
        }

        public SelectStatement prepare(ClientState state, boolean forView) throws InvalidRequestException
        {
            TableMetadata table = Schema.instance.validateTable(keyspace(), name());

            List selectables = RawSelector.toSelectables(selectClause, table);
            boolean containsOnlyStaticColumns = selectOnlyStaticColumns(table, selectables);

            List orderings = getOrderings(table);
            StatementRestrictions restrictions = prepareRestrictions(state, table, bindVariables, orderings, containsOnlyStaticColumns, forView);

            // If we order post-query, the sorted column needs to be in the ResultSet for sorting,
            // even if we don't ultimately ship them to the client (CASSANDRA-4911).
            Map orderingColumns = getOrderingColumns(orderings);
            Set resultSetOrderingColumns = getResultSetOrdering(restrictions, orderingColumns);

            Selection selection = prepareSelection(table,
                                                   selectables,
                                                   bindVariables,
                                                   resultSetOrderingColumns,
                                                   restrictions);

            if (parameters.isDistinct)
            {
                checkNull(perPartitionLimit, "PER PARTITION LIMIT is not allowed with SELECT DISTINCT queries");
                validateDistinctSelection(table, selection, restrictions);
            }

            AggregationSpecification.Factory aggregationSpecFactory = getAggregationSpecFactory(table,
                                                                                                bindVariables,
                                                                                                selection,
                                                                                                restrictions,
                                                                                                parameters.isDistinct);

            checkFalse(aggregationSpecFactory == AggregationSpecification.AGGREGATE_EVERYTHING_FACTORY
                       && perPartitionLimit != null,
                       "PER PARTITION LIMIT is not allowed with aggregate queries.");

            ColumnComparator> orderingComparator = null;
            boolean isReversed = false;

            if (!orderingColumns.isEmpty())
            {
                assert !forView;
                verifyOrderingIsAllowed(restrictions, orderingColumns);
                orderingComparator = getOrderingComparator(selection, restrictions, orderingColumns);
                isReversed = isReversed(table, orderingColumns, restrictions);
                if (isReversed && orderingComparator != null)
                    orderingComparator = orderingComparator.reverse();
           }

            checkNeedsFiltering(table, restrictions);

            return new SelectStatement(table,
                                       bindVariables,
                                       parameters,
                                       selection,
                                       restrictions,
                                       isReversed,
                                       aggregationSpecFactory,
                                       orderingComparator,
                                       prepareLimit(bindVariables, limit, keyspace(), limitReceiver()),
                                       prepareLimit(bindVariables, perPartitionLimit, keyspace(), perPartitionLimitReceiver()));
        }

        private Set getResultSetOrdering(StatementRestrictions restrictions, Map orderingColumns)
        {
            if (restrictions.keyIsInRelation() || orderingColumns.values().stream().anyMatch(o -> o.expression.hasNonClusteredOrdering()))
                return orderingColumns.keySet();
            return Collections.emptySet();
        }

        private Selection prepareSelection(TableMetadata table,
                                           List selectables,
                                           VariableSpecifications boundNames,
                                           Set resultSetOrderingColumns,
                                           StatementRestrictions restrictions)
        {
            boolean hasGroupBy = !parameters.groups.isEmpty();

            if (hasGroupBy)
                Guardrails.groupByEnabled.ensureEnabled(state);

            boolean isJson = parameters.isJson;
            boolean returnStaticContentOnPartitionWithNoRows = restrictions.returnStaticContentOnPartitionWithNoRows();

            if (selectables.isEmpty()) // wildcard query
            {
                return hasGroupBy || table.hasMaskedColumns()
                       ? Selection.wildcardWithGroupByOrMaskedColumns(table, boundNames, resultSetOrderingColumns, isJson, returnStaticContentOnPartitionWithNoRows)
                       : Selection.wildcard(table, isJson, returnStaticContentOnPartitionWithNoRows);
            }

            return Selection.fromSelectors(table,
                                           selectables,
                                           boundNames,
                                           resultSetOrderingColumns,
                                           restrictions.nonPKRestrictedColumns(false),
                                           hasGroupBy,
                                           isJson,
                                           returnStaticContentOnPartitionWithNoRows);
        }

        /**
         * Checks if the specified selectables select only partition key columns or static columns
         *
         * @param table the table metadata
         * @param selectables the selectables to check
         * @return {@code true} if the specified selectables select only partition key columns or static columns,
         * {@code false} otherwise.
         */
        private boolean selectOnlyStaticColumns(TableMetadata table, List selectables)
        {
            if (table.isStaticCompactTable())
                return false;

            if (!table.hasStaticColumns() || selectables.isEmpty())
                return false;

            return Selectable.selectColumns(selectables, (column) -> column.isStatic())
                    && !Selectable.selectColumns(selectables, (column) -> !column.isPartitionKey() && !column.isStatic());
        }

        /**
         * Returns the columns in the same order as given list used to order the data.
         * @return the columns in the same order as given list used to order the data.
         */
        private Map getOrderingColumns(List orderings)
        {
            if (orderings.isEmpty())
                return Collections.emptyMap();

            Map orderingColumns = new LinkedHashMap<>();
            for (Ordering ordering : orderings)
            {
                ColumnMetadata column = ordering.expression.getColumn();
                orderingColumns.put(column, ordering);
            }
            return orderingColumns;
        }

        private List getOrderings(TableMetadata table)
        {
            return parameters.orderings.stream()
                                       .map(o -> o.bind(table, bindVariables))
                                       .collect(Collectors.toList());
        }

        /**
         * Prepares the restrictions.
         *
         * @param metadata the column family meta data
         * @param boundNames the variable specifications
         * @param selectsOnlyStaticColumns {@code true} if the query select only static columns, {@code false} otherwise.
         * @return the restrictions
         * @throws InvalidRequestException if a problem occurs while building the restrictions
         */
        private StatementRestrictions prepareRestrictions(ClientState state,
                                                          TableMetadata metadata,
                                                          VariableSpecifications boundNames,
                                                          List orderings,
                                                          boolean selectsOnlyStaticColumns,
                                                          boolean forView) throws InvalidRequestException
        {
            return new StatementRestrictions(state,
                                             StatementType.SELECT,
                                             metadata,
                                             whereClause,
                                             boundNames,
                                             orderings,
                                             selectsOnlyStaticColumns,
                                             parameters.allowFiltering,
                                             forView);
        }

        /** Returns a Term for the limit or null if no limit is set */
        private Term prepareLimit(VariableSpecifications boundNames, Term.Raw limit,
                                  String keyspace, ColumnSpecification limitReceiver) throws InvalidRequestException
        {
            if (limit == null)
                return null;

            Term prepLimit = limit.prepare(keyspace, limitReceiver);
            prepLimit.collectMarkerSpecification(boundNames);
            return prepLimit;
        }

        private static void verifyOrderingIsAllowed(StatementRestrictions restrictions, Map orderingColumns) throws InvalidRequestException
        {
            if (orderingColumns.values().stream().anyMatch(o -> o.expression.hasNonClusteredOrdering()))
                return;
            checkFalse(restrictions.usesSecondaryIndexing(), "ORDER BY with 2ndary indexes is not supported, except for ANN queries.");
            checkFalse(restrictions.isKeyRange(), "ORDER BY is only supported when the partition key is restricted by an EQ or an IN.");
        }

        private static void validateDistinctSelection(TableMetadata metadata,
                                                      Selection selection,
                                                      StatementRestrictions restrictions)
                                                      throws InvalidRequestException
        {
            checkFalse(restrictions.hasClusteringColumnsRestrictions() ||
                       (restrictions.hasNonPrimaryKeyRestrictions() && !restrictions.nonPKRestrictedColumns(true).stream().allMatch(ColumnMetadata::isStatic)),
                       "SELECT DISTINCT with WHERE clause only supports restriction by partition key and/or static columns.");

            Collection requestedColumns = selection.getColumns();
            for (ColumnMetadata def : requestedColumns)
                checkFalse(!def.isPartitionKey() && !def.isStatic(),
                           "SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)",
                           def.name);

            // If it's a key range, we require that all partition key columns are selected so we don't have to bother
            // with post-query grouping.
            if (!restrictions.isKeyRange())
                return;

            for (ColumnMetadata def : metadata.partitionKeyColumns())
                checkTrue(requestedColumns.contains(def),
                          "SELECT DISTINCT queries must request all the partition key columns (missing %s)", def.name);
        }

        /**
         * Creates the {@code AggregationSpecification.Factory} used to make the aggregates.
         *
         * @param metadata the table metadata
         * @param selection the selection
         * @param restrictions the restrictions
         * @param isDistinct true if the query is a DISTINCT one. 
         * @return the {@code AggregationSpecification.Factory} used to make the aggregates
         */
        private AggregationSpecification.Factory getAggregationSpecFactory(TableMetadata metadata,
                                                                           VariableSpecifications boundNames,
                                                                           Selection selection,
                                                                           StatementRestrictions restrictions,
                                                                           boolean isDistinct)
        {
            if (parameters.groups.isEmpty())
                return selection.isAggregate() ? AggregationSpecification.AGGREGATE_EVERYTHING_FACTORY
                                               : null;

            int clusteringPrefixSize = 0;

            Iterator pkColumns = metadata.primaryKeyColumns().iterator();
            List columns = null;
            Selector.Factory selectorFactory = null;
            for (Selectable.Raw raw : parameters.groups)
            {
                Selectable selectable = raw.prepare(metadata);
                ColumnMetadata def = null;

                // For GROUP BY we only allow column names or functions at the higher level.
                if (selectable instanceof WithFunction)
                {
                    WithFunction withFunction = (WithFunction) selectable;
                    validateGroupByFunction(withFunction);
                    columns = new ArrayList();
                    selectorFactory = selectable.newSelectorFactory(metadata, null, columns, boundNames);
                    checkFalse(columns.isEmpty(), "GROUP BY functions must have one clustering column name as parameter");
                    if (columns.size() > 1)
                        throw invalidRequest("GROUP BY functions accept only one clustering column as parameter, got: %s",
                                             columns.stream().map(c -> c.name.toCQLString()).collect(Collectors.joining(",")));

                    def = columns.get(0);
                    checkTrue(def.isClusteringColumn(),
                              "Group by functions are only supported on clustering columns, got %s", def.name);
                }
                else
                {
                    def = (ColumnMetadata) selectable;
                    checkTrue(def.isPartitionKey() || def.isClusteringColumn(),
                              "Group by is currently only supported on the columns of the PRIMARY KEY, got %s", def.name);
                    checkNull(selectorFactory, "Functions are only supported on the last element of the GROUP BY clause");
                }

                while (true)
                {
                    checkTrue(pkColumns.hasNext(),
                              "Group by currently only support groups of columns following their declared order in the PRIMARY KEY");

                    ColumnMetadata pkColumn = pkColumns.next();

                    if (pkColumn.isClusteringColumn())
                        clusteringPrefixSize++;

                    // As we do not support grouping on only part of the partition key, we only need to know
                    // which clustering columns need to be used to build the groups
                    if (pkColumn.equals(def))
                        break;

                    checkTrue(restrictions.isColumnRestrictedByEq(pkColumn),
                              "Group by currently only support groups of columns following their declared order in the PRIMARY KEY");
                }
            }

            checkFalse(pkColumns.hasNext() && pkColumns.next().isPartitionKey(),
                       "Group by is not supported on only a part of the partition key");

            checkFalse(clusteringPrefixSize > 0 && isDistinct,
                       "Grouping on clustering columns is not allowed for SELECT DISTINCT queries");

            return selectorFactory == null ? AggregationSpecification.aggregatePkPrefixFactory(metadata.comparator, clusteringPrefixSize)
                                           : AggregationSpecification.aggregatePkPrefixFactoryWithSelector(metadata.comparator,
                                                                                                           clusteringPrefixSize,
                                                                                                           selectorFactory,
                                                                                                           columns);
        }

        /**
         * Checks that the function used is a valid one for the GROUP BY clause.
         *
         * @param withFunction the {@code Selectable} from which the function must be retrieved.
         * @return the monotonic scalar function that must be used for determining the groups.
         */
        private void validateGroupByFunction(WithFunction withFunction)
        {
            Function f = withFunction.function;
            checkFalse(f.isAggregate(), "Aggregate functions are not supported within the GROUP BY clause, got: %s", f.name());
        }

        private ColumnComparator> getOrderingComparator(Selection selection,
                                                                         StatementRestrictions restrictions,
                                                                         Map orderingColumns) throws InvalidRequestException
        {
            for (Map.Entry e : orderingColumns.entrySet())
            {
                if (e.getValue().expression.hasNonClusteredOrdering())
                {
                    Preconditions.checkState(orderingColumns.size() == 1);
                    return new IndexColumnComparator(e.getValue().expression.toRestriction(), selection.getOrderingIndex(e.getKey()));
                }
            }

            if (!restrictions.keyIsInRelation())
                return null;

            List idToSort = new ArrayList<>(orderingColumns.size());
            List> sorters = new ArrayList<>(orderingColumns.size());

            for (ColumnMetadata orderingColumn : orderingColumns.keySet())
            {
                idToSort.add(selection.getOrderingIndex(orderingColumn));
                sorters.add(orderingColumn.type);
            }
            return idToSort.size() == 1 ? new SingleColumnComparator(idToSort.get(0), sorters.get(0))
                    : new CompositeComparator(sorters, idToSort);
        }

        private boolean isReversed(TableMetadata table, Map orderingColumns, StatementRestrictions restrictions) throws InvalidRequestException
        {
            if (orderingColumns.values().stream().anyMatch(o -> o.expression.hasNonClusteredOrdering()))
                return false;
            Boolean[] reversedMap = new Boolean[table.clusteringColumns().size()];
            int i = 0;
            for (var entry : orderingColumns.entrySet())
            {
                ColumnMetadata def = entry.getKey();
                Ordering ordering = entry.getValue();
                boolean reversed = ordering.direction == Ordering.Direction.DESC;

                checkTrue(def.isClusteringColumn(),
                          "Order by is currently only supported on the clustered columns of the PRIMARY KEY, got %s", def.name);

                while (i != def.position())
                {
                    checkTrue(restrictions.isColumnRestrictedByEq(table.clusteringColumns().get(i++)),
                              "Order by currently only supports the ordering of columns following their declared order in the PRIMARY KEY");
                }
                i++;
                reversedMap[def.position()] = (reversed != def.isReversedType());
            }

            // Check that all boolean in reversedMap, if set, agrees
            Boolean isReversed = null;
            for (Boolean b : reversedMap)
            {
                // Column on which order is specified can be in any order
                if (b == null)
                    continue;

                if (isReversed == null)
                {
                    isReversed = b;
                    continue;
                }
                checkTrue(isReversed.equals(b), "Unsupported order by relation");
            }
            assert isReversed != null;
            return isReversed;
        }

        /** If ALLOW FILTERING was not specified, this verifies that it is not needed */
        private void checkNeedsFiltering(TableMetadata table, StatementRestrictions restrictions) throws InvalidRequestException
        {
            // non-key-range non-indexed queries cannot involve filtering underneath
            if (!parameters.allowFiltering && (restrictions.isKeyRange() || restrictions.usesSecondaryIndexing()))
            {
                // We will potentially filter data if the row filter is not the identity and there isn't any index group
                // supporting all the expressions in the filter.
                if (restrictions.requiresAllowFilteringIfNotSpecified())
                    checkFalse(restrictions.needFiltering(table), StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE);
            }
        }

        private ColumnSpecification limitReceiver()
        {
            return new ColumnSpecification(keyspace(), name(), new ColumnIdentifier("[limit]", true), Int32Type.instance);
        }

        private ColumnSpecification perPartitionLimitReceiver()
        {
            return new ColumnSpecification(keyspace(), name(), new ColumnIdentifier("[per_partition_limit]", true), Int32Type.instance);
        }

        @Override
        public String toString()
        {
            return MoreObjects.toStringHelper(this)
                              .add("name", qualifiedName)
                              .add("selectClause", selectClause)
                              .add("whereClause", whereClause)
                              .add("isDistinct", parameters.isDistinct)
                              .toString();
        }
    }

    public static class Parameters
    {
        // Public because CASSANDRA-9858
        public final List orderings;
        public final List groups;
        public final boolean isDistinct;
        public final boolean allowFiltering;
        public final boolean isJson;

        public Parameters(List orderings,
                          List groups,
                          boolean isDistinct,
                          boolean allowFiltering,
                          boolean isJson)
        {
            this.orderings = orderings;
            this.groups = groups;
            this.isDistinct = isDistinct;
            this.allowFiltering = allowFiltering;
            this.isJson = isJson;
        }
    }

    private static abstract class ColumnComparator implements Comparator
    {
        protected final int compare(Comparator comparator, ByteBuffer aValue, ByteBuffer bValue)
        {
            if (aValue == null)
                return bValue == null ? 0 : -1;

            return bValue == null ? 1 : comparator.compare(aValue, bValue);
        }

        public ColumnComparator reverse()
        {
            return new ReversedColumnComparator<>(this);
        }

        /**
         * @return true if ordering is performed by index
         */
        public boolean indexOrdering()
        {
            return false;
        }

        /**
         * Produces a prepared {@link ColumnComparator} for current table and query-options
         */
        public Comparator prepareFor(TableMetadata table, RowFilter rowFilter, QueryOptions options)
        {
            return this;
        }
    }

    private static class ReversedColumnComparator extends ColumnComparator
    {
        private final ColumnComparator wrapped;

        public ReversedColumnComparator(ColumnComparator wrapped)
        {
            this.wrapped = wrapped;
        }

        @Override
        public int compare(T o1, T o2)
        {
            return wrapped.compare(o2, o1);
        }
    }

    /**
     * Used in orderResults(...) method when single 'ORDER BY' condition where given
     */
    private static class SingleColumnComparator extends ColumnComparator>
    {
        private final int index;
        private final Comparator comparator;

        public SingleColumnComparator(int columnIndex, Comparator orderer)
        {
            index = columnIndex;
            comparator = orderer;
        }

        public int compare(List a, List b)
        {
            return compare(comparator, a.get(index), b.get(index));
        }
    }

    private static class IndexColumnComparator extends ColumnComparator>
    {
        private final SingleRestriction restriction;
        private final int columnIndex;

        public IndexColumnComparator(SingleRestriction restriction, int columnIndex)
        {
            this.restriction = restriction;
            this.columnIndex = columnIndex;
        }

        @Override
        public boolean indexOrdering()
        {
            return true;
        }

        @Override
        public Comparator> prepareFor(TableMetadata table, RowFilter rowFilter, QueryOptions options)
        {
            if (table.indexes.isEmpty() || rowFilter.isEmpty())
                return this;

            Index.QueryPlan indexQueryPlan = Keyspace.openAndGetStore(table).indexManager.getBestIndexQueryPlanFor(rowFilter);

            Index index = restriction.findSupportingIndexFromQueryPlan(indexQueryPlan);
            assert index != null;
            Comparator comparator = index.getPostQueryOrdering(restriction, options);
            return (a, b) -> compare(comparator, a.get(columnIndex), b.get(columnIndex));
        }

        @Override
        public int compare(List o1, List o2)
        {
            throw new UnsupportedOperationException();
        }
    }

    /**
     * Used in orderResults(...) method when multiple 'ORDER BY' conditions where given
     */
    private static class CompositeComparator extends ColumnComparator>
    {
        private final List> orderTypes;
        private final List positions;

        private CompositeComparator(List> orderTypes, List positions)
        {
            this.orderTypes = orderTypes;
            this.positions = positions;
        }

        public int compare(List a, List b)
        {
            for (int i = 0; i < positions.size(); i++)
            {
                Comparator type = orderTypes.get(i);
                int columnPos = positions.get(i);

                int comparison = compare(type, a.get(columnPos), b.get(columnPos));

                if (comparison != 0)
                    return comparison;
            }

            return 0;
        }
    }
    
    @Override
    public String toString()
    {
        return ToStringBuilder.reflectionToString(this, ToStringStyle.SHORT_PREFIX_STYLE);
    }

    private String loggableTokens(QueryOptions options, ClientState state)
    {
        if (restrictions.isKeyRange() || restrictions.usesSecondaryIndexing())
        {
            AbstractBounds bounds = restrictions.getPartitionKeyBounds(options);
            return "token range: " + (bounds.inclusiveLeft() ? '[' : '(') +
                   bounds.left.getToken().toString() + ", " +
                   bounds.right.getToken().toString() +
                   (bounds.inclusiveRight() ? ']' : ')');
        }
        else
        {
            Collection keys = restrictions.getPartitionKeys(options, state);
            if (keys.size() == 1)
            {
                return "token: " + table.partitioner.getToken(Iterables.getOnlyElement(keys)).toString();
            }
            else
            {
                StringBuilder sb = new StringBuilder("tokens: [");
                boolean isFirst = true;
                for (ByteBuffer key : keys)
                {
                    if (!isFirst) sb.append(", ");
                    sb.append(table.partitioner.getToken(key).toString());
                    isFirst = false;
                }
                return sb.append(']').toString();
            }
        }
    }

    private String asCQL(QueryOptions options, ClientState state)
    {
        ColumnFilter columnFilter = selection.newSelectors(options).getColumnFilter();
        StringBuilder sb = new StringBuilder();

        sb.append("SELECT ").append(queriedColumns().toCQLString());
        sb.append(" FROM ").append(table.keyspace).append('.').append(table.name);
        if (restrictions.isKeyRange() || restrictions.usesSecondaryIndexing())
        {
            // partition range
            ClusteringIndexFilter clusteringIndexFilter = makeClusteringIndexFilter(options, state, columnFilter);
            if (clusteringIndexFilter == null)
                return "EMPTY";

            RowFilter rowFilter = getRowFilter(options);

            // The LIMIT provided by the user is the number of CQL row he wants returned.
            // We want to have getRangeSlice to count the number of columns, not the number of keys.
            AbstractBounds keyBounds = restrictions.getPartitionKeyBounds(options);
            if (keyBounds == null)
                return "EMPTY";

            DataRange dataRange = new DataRange(keyBounds, clusteringIndexFilter);

            if (!dataRange.isUnrestricted(table) || !rowFilter.isEmpty())
            {
                sb.append(" WHERE ");
                // We put the row filter first because the data range can end by "ORDER BY"
                if (!rowFilter.isEmpty())
                {
                    sb.append(rowFilter);
                    if (!dataRange.isUnrestricted(table))
                        sb.append(" AND ");
                }
                if (!dataRange.isUnrestricted(table))
                    sb.append(dataRange.toCQLString(table, rowFilter));
            }
        }
        else
        {
            // single partition
            Collection keys = restrictions.getPartitionKeys(options, state);
            if (keys.isEmpty())
                return "EMPTY";
            ClusteringIndexFilter filter = makeClusteringIndexFilter(options, state, columnFilter);
            if (filter == null)
                return "EMPTY";

            sb.append(" WHERE ");


            boolean compoundPk = table.partitionKeyColumns().size() > 1;
            if (compoundPk) sb.append('(');
            sb.append(ColumnMetadata.toCQLString(table.partitionKeyColumns()));
            if (compoundPk) sb.append(')');
            if (keys.size() == 1)
            {
                sb.append(" = ");
                if (compoundPk) sb.append('(');
                DataRange.appendKeyString(sb, table.partitionKeyType, Iterables.getOnlyElement(keys));
                if (compoundPk) sb.append(')');
            }
            else
            {
                sb.append(" IN (");
                boolean first = true;
                for (ByteBuffer key : keys)
                {
                    if (!first)
                        sb.append(", ");

                    if (compoundPk) sb.append('(');
                    DataRange.appendKeyString(sb, table.partitionKeyType, key);
                    if (compoundPk) sb.append(')');
                    first = false;
                }

                sb.append(')');
            }

            RowFilter rowFilter = getRowFilter(options);
            if (!rowFilter.isEmpty())
                sb.append(" AND ").append(rowFilter);

            String filterString = filter.toCQLString(table, rowFilter);
            if (!filterString.isEmpty())
                sb.append(" AND ").append(filterString);
        }

        DataLimits limits = getDataLimits(getLimit(options), getPerPartitionLimit(options), options.getPageSize(), getAggregationSpec(options));
        if (limits != DataLimits.NONE)
            sb.append(' ').append(limits);
        return sb.toString();
    }
}