All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.diskstorage.cassandra.thrift.CassandraThriftKeyColumnValueStore Maven / Gradle / Ivy

The newest version!
package com.thinkaurelius.titan.diskstorage.cassandra.thrift;

import static com.thinkaurelius.titan.diskstorage.cassandra.CassandraTransaction.getTx;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.Nullable;

import org.apache.cassandra.dht.AbstractByteOrderedPartitioner;
import org.apache.cassandra.dht.BytesToken;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.dht.RandomPartitioner;
import org.apache.cassandra.dht.Token;
import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.Column;
import org.apache.cassandra.thrift.ColumnOrSuperColumn;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.thrift.ConsistencyLevel;
import org.apache.cassandra.thrift.InvalidRequestException;
import org.apache.cassandra.thrift.KeyRange;
import org.apache.cassandra.thrift.KeySlice;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.thrift.TimedOutException;
import org.apache.cassandra.thrift.UnavailableException;
import org.apache.commons.lang.ArrayUtils;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterators;
import com.thinkaurelius.titan.diskstorage.PermanentStorageException;
import com.thinkaurelius.titan.diskstorage.StaticBuffer;
import com.thinkaurelius.titan.diskstorage.StorageException;
import com.thinkaurelius.titan.diskstorage.TemporaryStorageException;
import com.thinkaurelius.titan.diskstorage.cassandra.thrift.thriftpool.CTConnection;
import com.thinkaurelius.titan.diskstorage.cassandra.thrift.thriftpool.CTConnectionPool;
import com.thinkaurelius.titan.diskstorage.cassandra.utils.CassandraHelper;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.ByteBufferEntry;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.Entry;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.KCVMutation;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.KeyColumnValueStore;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.KeyIterator;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.KeyRangeQuery;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.KeySliceQuery;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.SliceQuery;
import com.thinkaurelius.titan.diskstorage.keycolumnvalue.StoreTransaction;
import com.thinkaurelius.titan.diskstorage.util.ByteBufferUtil;
import com.thinkaurelius.titan.diskstorage.util.RecordIterator;
import com.thinkaurelius.titan.diskstorage.util.StaticByteBuffer;

/**
 * A Titan {@code KeyColumnValueStore} backed by Cassandra.
 * This uses the Cassandra Thrift API.
 *
 * @author Dan LaRocque 
 * @see CassandraThriftStoreManager
 */
public class CassandraThriftKeyColumnValueStore implements KeyColumnValueStore {

    private static final Logger logger =
            LoggerFactory.getLogger(CassandraThriftKeyColumnValueStore.class);

    private static final Pattern BROKEN_BYTE_TOKEN_PATTERN = Pattern.compile("^Token\\(bytes\\[(.+)\\]\\)$");

    // Cassandra access
    private final CassandraThriftStoreManager storeManager;
    private final String keyspace;
    private final String columnFamily;
    private final CTConnectionPool pool;

    public CassandraThriftKeyColumnValueStore(String keyspace, String columnFamily, CassandraThriftStoreManager storeManager,
                                              CTConnectionPool pool) {
        this.storeManager = storeManager;
        this.keyspace = keyspace;
        this.columnFamily = columnFamily;
        this.pool = pool;
    }

    /**
     * Call Cassandra's Thrift get_slice() method.
     * 

* When columnEnd equals columnStart, and both startInclusive * and endInclusive are true, then this method calls * {@link #get(java.nio.ByteBuffer, java.nio.ByteBuffer, com.thinkaurelius.titan.diskstorage.keycolumnvalue.StoreTransaction)} * instead of calling Thrift's getSlice() method and returns * a one-element list containing the result. *

* When columnEnd equals columnStart and either startInclusive * or endInclusive is false (or both are false), then this * method returns an empty list without making any Thrift calls. *

* If columnEnd = columnStart + 1, and both startInclusive and * startExclusive are false, then the arguments effectively form * an empty interval. In this case, as in the one previous, * an empty list is returned. However, it may not necessarily * be handled efficiently; a Thrift call might still be made * before returning the empty list. * * @throws com.thinkaurelius.titan.diskstorage.StorageException * when columnEnd < columnStart */ @Override public List getSlice(KeySliceQuery query, StoreTransaction txh) throws StorageException { ByteBuffer key = query.getKey().asByteBuffer(); List slice = getNamesSlice(Arrays.asList(query.getKey()), query, txh).get(key); return (slice == null) ? Collections.emptyList() : slice; } @Override public List> getSlice(List keys, SliceQuery query, StoreTransaction txh) throws StorageException { return CassandraHelper.order(getNamesSlice(keys, query, txh), keys); } public Map> getNamesSlice(List keys, SliceQuery query, StoreTransaction txh) throws StorageException { Preconditions.checkArgument(query.getLimit() >= 0); if (0 == query.getLimit()) return Collections.emptyMap(); ColumnParent parent = new ColumnParent(columnFamily); /* * Cassandra cannot handle columnStart = columnEnd. * Cassandra's Thrift getSlice() throws InvalidRequestException * if columnStart = columnEnd. */ if (ByteBufferUtil.compare(query.getSliceStart(), query.getSliceEnd()) >= 0) { // Check for invalid arguments where columnEnd < columnStart if (ByteBufferUtil.isSmallerThan(query.getSliceEnd(), query.getSliceStart())) { throw new PermanentStorageException("columnStart=" + query.getSliceStart() + " is greater than columnEnd=" + query.getSliceEnd() + ". " + "columnStart must be less than or equal to columnEnd"); } if (0 != query.getSliceStart().length() && 0 != query.getSliceEnd().length()) { logger.debug("Return empty list due to columnEnd==columnStart and neither empty"); return Collections.emptyMap(); } } // true: columnStart < columnEnd ConsistencyLevel consistency = getTx(txh).getReadConsistencyLevel().getThriftConsistency(); SlicePredicate predicate = new SlicePredicate(); SliceRange range = new SliceRange(); range.setCount(query.getLimit()); range.setStart(query.getSliceStart().asByteBuffer()); range.setFinish(query.getSliceEnd().asByteBuffer()); predicate.setSlice_range(range); CTConnection conn = null; try { conn = pool.borrowObject(keyspace); Cassandra.Client client = conn.getClient(); List requestKeys = new ArrayList(keys.size()); { for (StaticBuffer key : keys) { requestKeys.add(key.asByteBuffer()); } } Map> rows = client.multiget_slice(requestKeys, parent, predicate, consistency); /* * The final size of the "result" List may be at most rows.size(). * However, "result" could also be up to two elements smaller than * rows.size(), depending on startInclusive and endInclusive */ Map> results = new HashMap>(); ByteBuffer sliceEndBB = query.getSliceEnd().asByteBuffer(); for (ByteBuffer key : rows.keySet()) { results.put(key, excludeLastColumn(rows.get(key), sliceEndBB)); } return results; } catch (Exception e) { throw convertException(e); } finally { pool.returnObjectUnsafe(keyspace, conn); } } private static List excludeLastColumn(List row, ByteBuffer lastColumn) { List entries = new ArrayList(); for (ColumnOrSuperColumn r : row) { Column c = r.getColumn(); // Skip column if it is equal to columnEnd because columnEnd is exclusive if (lastColumn.equals(c.bufferForName())) break; entries.add(new ByteBufferEntry(c.bufferForName(), c.bufferForValue())); } return entries; } @Override public void close() { // Do nothing } @Override public boolean containsKey(StaticBuffer key, StoreTransaction txh) throws StorageException { ColumnParent parent = new ColumnParent(columnFamily); ConsistencyLevel consistency = getTx(txh).getReadConsistencyLevel().getThriftConsistency(); SlicePredicate predicate = new SlicePredicate(); SliceRange range = new SliceRange(); range.setCount(1); byte[] empty = new byte[0]; range.setStart(empty); range.setFinish(empty); predicate.setSlice_range(range); CTConnection conn = null; try { conn = pool.borrowObject(keyspace); Cassandra.Client client = conn.getClient(); List result = client.get_slice(key.asByteBuffer(), parent, predicate, consistency); return 0 < result.size(); } catch (Exception e) { throw convertException(e); } finally { pool.returnObjectUnsafe(keyspace, conn); } } @Override public void acquireLock(StaticBuffer key, StaticBuffer column, StaticBuffer expectedValue, StoreTransaction txh) throws StorageException { throw new UnsupportedOperationException(); } @Override public KeyIterator getKeys(@Nullable SliceQuery sliceQuery, StoreTransaction txh) throws StorageException { final IPartitioner> partitioner = storeManager.getCassandraPartitioner(); if (!(partitioner instanceof RandomPartitioner) && !(partitioner instanceof Murmur3Partitioner)) throw new PermanentStorageException("This operation is only allowed when random partitioner (md5 or murmur3) is used."); try { return new AllTokensIterator>(partitioner, sliceQuery, storeManager.getPageSize()); } catch (Exception e) { throw convertException(e); } } @Override public KeyIterator getKeys(KeyRangeQuery keyRangeQuery, StoreTransaction txh) throws StorageException { final IPartitioner> partitioner = storeManager.getCassandraPartitioner(); // see rant about the reason of this limitation in Astyanax implementation of this method. if (!(partitioner instanceof AbstractByteOrderedPartitioner)) throw new PermanentStorageException("This operation is only allowed when byte-ordered partitioner is used."); try { SliceQuery columnSlice = new SliceQuery( keyRangeQuery.getSliceStart(), keyRangeQuery.getSliceEnd()); return new KeyRangeIterator>(partitioner, columnSlice, storeManager.getPageSize(), keyRangeQuery.getKeyStart().asByteBuffer(), keyRangeQuery.getKeyEnd().asByteBuffer()); } catch (Exception e) { throw convertException(e); } } @Override public StaticBuffer[] getLocalKeyPartition() throws StorageException { throw new UnsupportedOperationException(); } @Override public String getName() { return columnFamily; } @Override public void mutate(StaticBuffer key, List additions, List deletions, StoreTransaction txh) throws StorageException { Map mutations = ImmutableMap.of(key, new KCVMutation(additions, deletions)); mutateMany(mutations, txh); } public void mutateMany(Map mutations, StoreTransaction txh) throws StorageException { storeManager.mutateMany(ImmutableMap.of(columnFamily, mutations), txh); } static StorageException convertException(Throwable e) { if (e instanceof TException) { return new PermanentStorageException(e); } else if (e instanceof TimedOutException) { return new TemporaryStorageException(e); } else if (e instanceof UnavailableException) { return new TemporaryStorageException(e); } else if (e instanceof InvalidRequestException) { return new PermanentStorageException(e); } else { return new PermanentStorageException(e); } } @Override public String toString() { return "CassandraThriftKeyColumnValueStore[ks=" + keyspace + ", cf=" + columnFamily + "]"; } private List getKeySlice(ByteBuffer startKey, ByteBuffer endKey, SliceQuery columnSlice, int count) throws StorageException { return getRangeSlices(new KeyRange().setStart_key(startKey).setEnd_key(endKey).setCount(count), columnSlice); } private > List getTokenSlice(T startToken, T endToken, SliceQuery sliceQuery, int count) throws StorageException { String st = sanitizeBrokenByteToken(startToken); String et = sanitizeBrokenByteToken(endToken); KeyRange kr = new KeyRange().setStart_token(st).setEnd_token(et).setCount(count); return getRangeSlices(kr, sliceQuery); } private String sanitizeBrokenByteToken(Token tok) { /* * Background: https://issues.apache.org/jira/browse/CASSANDRA-5566 * * This hack can go away when we upgrade to or past 1.2.5. But as I * write this comment, we're still stuck on 1.2.2 because Astyanax * hasn't upgraded and tries to call an undefined thrift constructor * when I try running against Cassandra 1.2.10. I haven't tried 1.2.5. * However, I think it's not worth breaking from Astyanax's supported * Cassandra version unless we can break all the way to the latest * Cassandra version, and 1.2.5 is not the latest anyway. */ String st = tok.toString(); if (!(tok instanceof BytesToken)) return st; // Do a cheap 1-character startsWith before unleashing the regex if (st.startsWith("T")) { Matcher m = BROKEN_BYTE_TOKEN_PATTERN.matcher(st); if (!m.matches()) { logger.warn("Unknown token string format: \"{}\"", st); } else { String old = st; st = m.group(1); logger.debug("Rewrote token string: \"{}\" -> \"{}\"", old, st); } } return st; } private List getRangeSlices(KeyRange keyRange, @Nullable SliceQuery sliceQuery) throws StorageException { SliceRange sliceRange = new SliceRange(); if (sliceQuery == null) { sliceRange.setStart(ArrayUtils.EMPTY_BYTE_ARRAY) .setFinish(ArrayUtils.EMPTY_BYTE_ARRAY) .setCount(5); } else { sliceRange.setStart(sliceQuery.getSliceStart().asByteBuffer()) .setFinish(sliceQuery.getSliceEnd().asByteBuffer()) .setCount((sliceQuery.hasLimit()) ? sliceQuery.getLimit() : Integer.MAX_VALUE); } CTConnection connection = null; try { connection = pool.borrowObject(keyspace); List slices = connection.getClient().get_range_slices(new ColumnParent(columnFamily), new SlicePredicate() .setSlice_range(sliceRange), keyRange, ConsistencyLevel.QUORUM); for (KeySlice s : slices) { logger.debug("Key {}", ByteBufferUtil.toString(s.key, "-")); } /* Note: we need to fetch columns for each row as well to remove "range ghosts" */ List result = new ArrayList(slices.size()); KeyIterationPredicate pred = new KeyIterationPredicate(); for (KeySlice ks : slices) if (pred.apply(ks)) result.add(ks); return result; } catch (Exception e) { throw convertException(e); } finally { if (connection != null) pool.returnObjectUnsafe(keyspace, connection); } } private static class KeyIterationPredicate implements Predicate { @Override public boolean apply(@Nullable KeySlice row) { return (row != null) && row.getColumns().size() > 0; } } /** * Slices rows and columns using tokens. Recall that the partitioner turns * keys into tokens. For instance, under RandomPartitioner, tokens are the * MD5 hashes of keys. */ public class AbstractBufferedRowIter> implements KeyIterator { private final int pageSize; private final SliceQuery columnSlice; private boolean isClosed; private boolean seenEnd; protected Iterator ksIter; private KeySlice mostRecentRow; private final IPartitioner partitioner; private T nextStartToken; private final T endToken; private ByteBuffer nextStartKey; private boolean omitEndToken; public AbstractBufferedRowIter(IPartitioner partitioner, SliceQuery columnSlice, int pageSize, T startToken, T endToken, boolean omitEndToken) { this.pageSize = pageSize; this.partitioner = partitioner; this.nextStartToken = startToken; this.endToken = endToken; this.columnSlice = columnSlice; this.seenEnd = false; this.isClosed = false; this.ksIter = Iterators.emptyIterator(); this.mostRecentRow = null; this.omitEndToken = omitEndToken; } @Override public boolean hasNext() { ensureOpen(); if (!ksIter.hasNext() && !seenEnd) { try { ksIter = rebuffer().iterator(); } catch (StorageException e) { throw new RuntimeException(e); } } return ksIter.hasNext(); } @Override public StaticBuffer next() { ensureOpen(); if (!hasNext()) throw new NoSuchElementException(); mostRecentRow = ksIter.next(); Preconditions.checkNotNull(mostRecentRow); return new StaticByteBuffer(mostRecentRow.bufferForKey()); } @Override public void close() { closeIterator(); } @Override public void remove() { throw new UnsupportedOperationException(); } @Override public RecordIterator getEntries() { ensureOpen(); return new RecordIterator() { final Iterator columns = excludeLastColumn(mostRecentRow.getColumns(), columnSlice.getSliceEnd().asByteBuffer()).iterator(); @Override public boolean hasNext() { ensureOpen(); return columns.hasNext(); } @Override public Entry next() { ensureOpen(); return columns.next(); } @Override public void close() { closeIterator(); } @Override public void remove() { throw new UnsupportedOperationException(); } }; } private void ensureOpen() { if (isClosed) throw new IllegalStateException("Iterator has been closed."); } private void closeIterator() { if (!isClosed) { isClosed = true; } } private List rebuffer() throws StorageException { Preconditions.checkArgument(!seenEnd); return checkFreshSlices(getNextKeySlices()); } protected List checkFreshSlices(List ks) { if (0 == ks.size()) { seenEnd = true; return Collections.emptyList(); } nextStartKey = ks.get(ks.size() - 1).bufferForKey(); nextStartToken = partitioner.getToken(nextStartKey); if (nextStartToken.equals(endToken)) { seenEnd = true; if (omitEndToken) ks.remove(ks.size() - 1); } return ks; } protected final List getNextKeySlices() throws StorageException { return getTokenSlice(nextStartToken, endToken, columnSlice, pageSize); } } private final class AllTokensIterator> extends AbstractBufferedRowIter { public AllTokensIterator(IPartitioner partitioner, SliceQuery columnSlice, int pageSize) { super(partitioner, columnSlice, pageSize, partitioner.getMinimumToken(), partitioner.getMinimumToken(), false); } } private final class KeyRangeIterator> extends AbstractBufferedRowIter { public KeyRangeIterator(IPartitioner partitioner, SliceQuery columnSlice, int pageSize, ByteBuffer startKey, ByteBuffer endKey) throws StorageException { super(partitioner, columnSlice, pageSize, partitioner.getToken(startKey), partitioner.getToken(endKey), true); Preconditions.checkArgument(partitioner instanceof AbstractByteOrderedPartitioner); // Get first slice with key range instead of token range. Token // ranges are start-exclusive, key ranges are start-inclusive. Both // are end-inclusive. If we don't make the call below, then we will // erroneously miss startKey. List ks = getKeySlice(startKey, endKey, columnSlice, pageSize); this.ksIter = checkFreshSlices(ks).iterator(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy