All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bazaarvoice.emodb.table.db.astyanax.CQLStashTableDAO Maven / Gradle / Ivy

package com.bazaarvoice.emodb.table.db.astyanax;

import com.bazaarvoice.emodb.common.dropwizard.guice.SystemTablePlacement;
import com.bazaarvoice.emodb.common.json.JsonHelper;
import com.bazaarvoice.emodb.datacenter.api.DataCenters;
import com.datastax.driver.core.BatchStatement;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.DataType;
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.querybuilder.QueryBuilder;
import com.datastax.driver.core.schemabuilder.SchemaBuilder;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.netflix.astyanax.model.ByteBufferRange;

import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static java.util.Objects.requireNonNull;

/**
 * This class is used to create and query a snapshot of tables for Stash.  For example, if a table is deleted, moved or
 * has its metadata altered mid-Stash the output should still include the content as it would have appeared at the beginning
 * of Stash.  It also provides an efficient API for querying which tables in that snapshot fall within a specific
 * token range.
 *
 * Since this class and {@link com.bazaarvoice.emodb.table.db.TableDAO} both provide low level access to tables here's an
 * explanation for why Stash-related table operations have been split from that class.  First, it's cleaner to separate
 * the concerns for Stash from the needs of general table management, which are quite complex in their own right.
 * Furthermore, TableDAO is used for both system of record and blob tables while Stash only applies to the former, so
 * keeping the Stash portion here reinforces this separation.  For these reasons TableDAO focuses on general table management
 * and delegates Stash operations to this class.  Consequently this class works exclusively with the serializable form of
 * tables, {@link TableJson}, leaving the domain knowledge of converting these back into tables to TableDAO.
 */
public class CQLStashTableDAO {
    
    protected String STASH_TOKEN_RANGE_TABLE = "stash_token_range";
    // Clean up stash tables if they aren't explicitly cleaned after 3 days.  No Stash should take over 1 day
    // so this should provide ample buffer.
    protected int TTL = (int) TimeUnit.DAYS.toSeconds(3);

    private final static String STASH_ID_COLUMN = "stash_id";
    private final static String DATA_CENTER_COLUMN = "data_center";
    private final static String PLACEMENT_COLUMN = "placement";
    private final static String RANGE_TOKEN_COLUMN = "range_token";
    private final static String IS_START_TOKEN_COLUMN = "is_start_token";
    private final static String TABLE_JSON_COLUMN = "table_json";

    private final String _systemTablePlacement;
    private final PlacementCache _placementCache;
    private final DataCenters _dataCenters;
    private volatile boolean _verifiedStashTokenRangeTableExists;

    @Inject
    public CQLStashTableDAO(@SystemTablePlacement String systemTablePlacement,
                            PlacementCache placementCache, DataCenters dataCenters) {
        _systemTablePlacement = requireNonNull(systemTablePlacement, "systemTablePlacement");
        _placementCache = requireNonNull(placementCache, "placementCache");
        _dataCenters = requireNonNull(dataCenters, "dataCenters");
    }

    public void addTokenRangesForTable(String stashId, AstyanaxStorage readStorage, TableJson tableJson) {
        String placement = readStorage.getPlacementName();
        ensureStashTokenRangeTableExists();

        String tableInfo = JsonHelper.asJson(tableJson.getRawJson());
        Session session = _placementCache.get(_systemTablePlacement).getKeyspace().getCqlSession();

        // Add two records for each shard for the table: one which identifies the start token for the shard, and
        // one that identifies (exclusively) the end token for the shard.  This will allow for efficient range queries
        // later on.

        Iterator tableTokenRanges = readStorage.scanIterator(null);
        // To prevent sending over-large batches split into groups of 8 ranges which results in 16 statements per batch
        Iterators.partition(tableTokenRanges, 8).forEachRemaining(ranges -> {
            BatchStatement batchStatement = new BatchStatement();
            for (ByteBufferRange range : ranges) {
                batchStatement.add(QueryBuilder.insertInto(STASH_TOKEN_RANGE_TABLE)
                        .value(STASH_ID_COLUMN, stashId)
                        .value(DATA_CENTER_COLUMN, _dataCenters.getSelf().getName())
                        .value(PLACEMENT_COLUMN, placement)
                        .value(RANGE_TOKEN_COLUMN, range.getStart())
                        .value(IS_START_TOKEN_COLUMN, true)
                        .value(TABLE_JSON_COLUMN, tableInfo));

                batchStatement.add(QueryBuilder.insertInto(STASH_TOKEN_RANGE_TABLE)
                        .value(STASH_ID_COLUMN, stashId)
                        .value(DATA_CENTER_COLUMN, _dataCenters.getSelf().getName())
                        .value(PLACEMENT_COLUMN, placement)
                        .value(RANGE_TOKEN_COLUMN, range.getEnd())
                        .value(IS_START_TOKEN_COLUMN, false)
                        .value(TABLE_JSON_COLUMN, tableInfo));
            }

            session.execute(batchStatement.setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM));
        });
    }

    public Iterator getTokenRangesBetween(String stashId, String placement, ByteBuffer fromInclusive, ByteBuffer toExclusive) {
        ensureStashTokenRangeTableExists();

        // Because of the way the stash token range table is laid out the query range must cross the start or end token for a
        // shard in order to be included in the results.  If the entire query range is for the same table and shard then
        // querying the table naively won't return any results, even if it matches a table.
        //
        // To account for this first check if the query range is within a single shard, then perform the appropriate
        // query based on the result.
        if (fromSameShard(fromInclusive, toExclusive)) {
            return getTokenRangesBetweenIntraShard(stashId, placement, fromInclusive, toExclusive);
        } else {
            return getTokenRangesBetweenInterShard(stashId, placement, fromInclusive, toExclusive);
        }
    }

    /**
     * Two tokens are from the same shard if the following are both true:
     * 
    *
  1. Both tokens are at least 9 bytes long (1 shard + 8 table uuid bytes)
  2. *
  3. The shard and table uuid for both tokens are identical
  4. *
*/ private boolean fromSameShard(ByteBuffer fromInclusive, ByteBuffer toExclusive) { return fromInclusive.remaining() >= 9 && toExclusive.remaining() >= 9 && RowKeyUtils.getShardId(fromInclusive) == RowKeyUtils.getShardId(toExclusive) && RowKeyUtils.getTableUuid(fromInclusive) == RowKeyUtils.getTableUuid(toExclusive); } private Iterator getTokenRangesBetweenIntraShard(String stashId, String placement, ByteBuffer fromInclusive, ByteBuffer toExclusive) { // Since the range falls entirely within a single shard run a targeted query that only looks for the beginning // of that shard. ByteBuffer startToken = RowKeyUtils.getRowKeyRaw(RowKeyUtils.getShardId(fromInclusive), RowKeyUtils.getTableUuid(fromInclusive), new byte[0]); ResultSet resultSet = _placementCache.get(_systemTablePlacement) .getKeyspace() .getCqlSession() .execute( QueryBuilder.select(TABLE_JSON_COLUMN) .from(STASH_TOKEN_RANGE_TABLE) .where(QueryBuilder.eq(STASH_ID_COLUMN, stashId)) .and(QueryBuilder.eq(DATA_CENTER_COLUMN, _dataCenters.getSelf().getName())) .and(QueryBuilder.eq(PLACEMENT_COLUMN, placement)) .and(QueryBuilder.eq(RANGE_TOKEN_COLUMN, startToken)) .and(QueryBuilder.eq(IS_START_TOKEN_COLUMN, true)) .limit(1) .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM)); Row row = resultSet.one(); if (row == null) { return Collections.emptyIterator(); } TableJson tableJson = toTableJson(row.getString(0)); return Iterators.singletonIterator(new ProtoStashTokenRange(fromInclusive, toExclusive, tableJson)); } private Iterator getTokenRangesBetweenInterShard(String stashId, String placement, ByteBuffer fromInclusive, ByteBuffer toExclusive) { // Since the range crosses the boundary between at least two tables it is guaranteed that if any table has a shard // whose token range intersects the query range then by querying for all start and end tokens within that range // either the start token, end token, or both will be returned in the results. ResultSet resultSet = _placementCache.get(_systemTablePlacement) .getKeyspace() .getCqlSession() .execute( QueryBuilder.select(RANGE_TOKEN_COLUMN, IS_START_TOKEN_COLUMN, TABLE_JSON_COLUMN) .from(STASH_TOKEN_RANGE_TABLE) .where(QueryBuilder.eq(STASH_ID_COLUMN, stashId)) .and(QueryBuilder.eq(DATA_CENTER_COLUMN, _dataCenters.getSelf().getName())) .and(QueryBuilder.eq(PLACEMENT_COLUMN, placement)) .and(QueryBuilder.gte(RANGE_TOKEN_COLUMN, fromInclusive)) .and(QueryBuilder.lt(RANGE_TOKEN_COLUMN, toExclusive)) .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM) .setFetchSize(100)); final Iterator resultSetIterator = resultSet.iterator(); return new AbstractIterator() { TableJson currentTable; ByteBuffer currentTableStartToken; @Override protected ProtoStashTokenRange computeNext() { ProtoStashTokenRange range = null; while (range == null) { if (resultSetIterator.hasNext()) { Row row = resultSetIterator.next(); TableJson table = getTableJson(row); if (isStartToken(row)) { if (currentTable == null) { currentTable = table; currentTableStartToken = getToken(row); } else { throw new IllegalStateException("Overlapping table range start rows found"); } } else if (currentTable == null) { // We're starting in the middle of a table's token range. range = new ProtoStashTokenRange(fromInclusive, getToken(row), table); } else if (currentTable.getTable().equals(table.getTable())) { range = new ProtoStashTokenRange(currentTableStartToken, getToken(row), table); currentTable = null; currentTableStartToken = null; } else { throw new IllegalStateException("Overlapping table range end rows found"); } } else if (currentTable != null) { // Were ending in the middle of a table's token range. range = new ProtoStashTokenRange(currentTableStartToken, toExclusive, currentTable); currentTable = null; currentTableStartToken = null; } else { return endOfData(); } } return range; } private ByteBuffer getToken(Row row) { return row.getBytesUnsafe(0); } private boolean isStartToken(Row row) { return row.getBool(1); } private TableJson getTableJson(Row row) { return toTableJson(row.getString(2)); } }; } private TableJson toTableJson(String tableJsonString) { Map tableJsonMap = JsonHelper.fromJson(tableJsonString, new TypeReference>() {}); return new TableJson(tableJsonMap); } public void clearTokenRanges(String stashId) { ensureStashTokenRangeTableExists(); _placementCache.get(_systemTablePlacement) .getKeyspace() .getCqlSession() .execute( QueryBuilder.delete() .from(STASH_TOKEN_RANGE_TABLE) .where(QueryBuilder.eq(STASH_ID_COLUMN, stashId)) .and(QueryBuilder.eq(DATA_CENTER_COLUMN, _dataCenters.getSelf().getName())) .setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM)); } private void ensureStashTokenRangeTableExists() { if (!_verifiedStashTokenRangeTableExists) { synchronized(this) { if (!_verifiedStashTokenRangeTableExists) { // Primary key is ((stash_id, data_center), placement, range_token, is_start_token). // Note that Cassandra performs unsigned byte comparison for "range_token" and sorts False before // True for "is_start_token". The latter is necessary because it sorts two tables with // adjacent UUIDs correctly, returning the exclusive "to" token for the previous table before the // inclusive "from" token for the next table. _placementCache.get(_systemTablePlacement) .getKeyspace() .getCqlSession() .execute(SchemaBuilder.createTable(STASH_TOKEN_RANGE_TABLE) .ifNotExists() .addPartitionKey(STASH_ID_COLUMN, DataType.text()) .addPartitionKey(DATA_CENTER_COLUMN, DataType.text()) .addClusteringColumn(PLACEMENT_COLUMN, DataType.text()) .addClusteringColumn(RANGE_TOKEN_COLUMN, DataType.blob()) .addClusteringColumn(IS_START_TOKEN_COLUMN, DataType.cboolean()) .addColumn(TABLE_JSON_COLUMN, DataType.text()) .withOptions() // The following cluster orders should be the defaults but for clarity let's be explicit .clusteringOrder(PLACEMENT_COLUMN, SchemaBuilder.Direction.ASC) .clusteringOrder(RANGE_TOKEN_COLUMN, SchemaBuilder.Direction.ASC) .clusteringOrder(IS_START_TOKEN_COLUMN, SchemaBuilder.Direction.ASC) .compactStorage() .defaultTimeToLive(TTL)); _verifiedStashTokenRangeTableExists = true; } } } } /** * Similar to {@link com.bazaarvoice.emodb.table.db.stash.StashTokenRange} except it utilizes TableJson. It is * up to the caller to convert that serialized form into a Table. */ public static class ProtoStashTokenRange { private ByteBuffer _from; private ByteBuffer _to; private TableJson _tableJson; public ProtoStashTokenRange(ByteBuffer from, ByteBuffer to, TableJson tableJson) { _from = from; _to = to; _tableJson = tableJson; } public ByteBuffer getFrom() { return _from; } public ByteBuffer getTo() { return _to; } public TableJson getTableJson() { return _tableJson; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy