org.apache.beam.sdk.io.astra.db.transforms.split.TokenRangeReadStatementGenerator Maven / Gradle / Ivy
Show all versions of beam-sdks-java-io-astra Show documentation
/*
* Copyright DataStax, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io.astra.db.transforms.split;
/*-
* #%L
* Beam SDK for Astra
* --
* Copyright (C) 2023 DataStax
* --
* Licensed under the Apache License, Version 2.0
* You may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.datastax.oss.driver.api.core.cql.SimpleStatement;
import com.datastax.oss.driver.api.core.cql.Statement;
import com.datastax.oss.driver.api.core.metadata.Metadata;
import com.datastax.oss.driver.api.core.metadata.TokenMap;
import com.datastax.oss.driver.api.core.metadata.schema.ColumnMetadata;
import com.datastax.oss.driver.api.core.metadata.schema.RelationMetadata;
import com.datastax.oss.driver.api.core.metadata.token.TokenRange;
import com.datastax.oss.driver.internal.core.metadata.token.Murmur3Token;
import edu.umd.cs.findbugs.annotations.NonNull;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;
/** Generates SELECT statements that read the entire table by token ranges. */
public class TokenRangeReadStatementGenerator {
private final RelationMetadata table;
private final TokenMap tokenMap;
/**
* @param table The table (or materialized view) to scan.
* @param metadata The cluster metadata to use.
*/
public TokenRangeReadStatementGenerator(@NonNull RelationMetadata table, @NonNull Metadata metadata) {
this.table = table;
tokenMap =
metadata
.getTokenMap()
.orElseThrow(() -> new IllegalStateException("Token metadata not present"));
}
/**
* Generates default SELECT statements to read the entire table, with a minimum of {@code
* splitCount} statements.
*
* For a given split / token range, the generated statement is a {@linkplain Statement
* statement} of the form: {@code SELECT col1, col2,... FROM table WHERE token(...) >
* [range.start()] AND token(...) <= [range.end()])}.
*
*
Note that the splitting algorithm doesn't guarantee an exact number of splits, but rather a
* minimum number. The number of resulting statements depends on the set of primary token ranges
* in the ring and how contiguous token ranges are distributed across the ring. In particular with
* vnodes, the total number of statements can be much higher than {@code splitCount}.
*
* @param splitCount The minimum desired number of statements to generate (on a best-effort
* basis).
* @return A list of SELECT statements to read the entire table.
*/
@NonNull
public Map generate(int splitCount) {
return generate(splitCount, this::generateSimpleStatement);
}
/**
* Generates SELECT statements to read the entire table, with a minimum of {@code splitCount}
* statements and using the given factory to generate statements.
*
* For each split / token range, the generated statement is a {@linkplain Statement statement}
* resulting from applying {@code statementFactory} to the token range; statement factories should
* typically generate a statement of the form: {@code SELECT col1, col2,... FROM table WHERE
* token(...) > ? AND token(...) <= ?)}. Please note that this method does not fully validate that
* the statements created by the factory are valid, and thus should be used with caution.
*
*
Note that the splitting algorithm doesn't guarantee an exact number of splits, but rather a
* minimum number. The number of resulting statements depends on the set of primary token ranges
* in the ring and how contiguous token ranges are distributed across the ring. In particular with
* vnodes, the total number of statements can be much higher than {@code splitCount}.
*
* @param
* The type of statement to generate.
* @param splitCount
* The minimum desired number of statements to generate (on a best-effort
* basis).
* @param statementFactory
* The factory to use to generate statements for each split.
* @return
* A list of SELECT statements to read the entire table.
*/
@NonNull
public > Map generate(
int splitCount, @NonNull Function statementFactory) {
AstraTokenFactory tokenFactory = new AstraTokenFactory();
PartitionGenerator generator = new PartitionGenerator(table.getKeyspace(), tokenMap, tokenFactory);
List partitions = generator.partition(splitCount);
Map statements = new TreeMap<>();
for (AstraTokenRange range : partitions) {
StatementT stmt = statementFactory.apply(range);
if (stmt.getKeyspace() != null) {
if (!stmt.getKeyspace().equals(table.getKeyspace())) {
throw new IllegalStateException(
String.format(
"Statement has different keyspace, expecting %s but got %s",
table.getKeyspace(), stmt.getKeyspace()));
}
} else {
stmt = stmt.setRoutingKeyspace(table.getKeyspace());
}
stmt = stmt.setRoutingToken(range.getEnd());
statements.put(range, stmt);
}
return statements;
}
private SimpleStatement generateSimpleStatement(TokenRange range) {
String all =
table.getColumns().keySet().stream()
.map(id -> id.asCql(true))
.collect(Collectors.joining(","));
String pks =
table.getPartitionKey().stream()
.map(ColumnMetadata::getName)
.map(id -> id.asCql(true))
.collect(Collectors.joining(","));
String query =
String.format(
"SELECT %s FROM %s.%s WHERE token(%s) > %s AND token(%s) <= %s",
all,
table.getKeyspace().asCql(true),
table.getName().asCql(true),
pks,
((Murmur3Token) range.getStart()).getValue(),
pks,
((Murmur3Token) range.getEnd()).getValue());
return SimpleStatement.newInstance(query);
}
}