main.app.cash.backfila.client.misk.hibernate.internal.BoundingRangeStrategy.kt Maven / Gradle / Ivy
package app.cash.backfila.client.misk.hibernate.internal
import app.cash.backfila.client.misk.hibernate.HibernateBackfill
import app.cash.backfila.client.misk.hibernate.PartitionProvider
import com.google.common.collect.Ordering
import javax.persistence.Table
import misk.hibernate.DbEntity
import misk.hibernate.Session
import misk.hibernate.Transacter
import misk.hibernate.shards
import misk.hibernate.transaction
import misk.vitess.Keyspace
import org.hibernate.internal.SessionImpl
/**
* The queries that are provided by the strategy are used to establish a primary key slice of
* the table off which the backfill criteria is applied.
*/
interface BoundingRangeStrategy, Pkey : Any> {
/**
* Computes the raw table min and max based on the primary key. Returns null if the table is empty.
*/
fun computeAbsoluteRange(
backfill: HibernateBackfill,
partitionName: String,
): MinMax?
/**
* Computes a bound of size request.scan_size, to get a set of records that can be scanned for
* records that match the criteria.
*
* Returns null if there is are no more records left in the table.
* The return value must be greater than or equal to [backfillRangeStart] and less than or equal
* to [backfillRangeEnd] and greater than [previousEndKey].
*
* @param backfillRangeStart this is [MinMax.min] unless a specific range was specified.
* @param backfillRangeEnd this is [MinMax.max] unless a specific range was specified.
* @param previousEndKey is the null at the start or the result of a previous call to this function.
*/
fun computeBoundingRangeMax(
backfill: HibernateBackfill,
partitionName: String,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
backfillRangeEnd: Pkey,
scanSize: Long?,
): Pkey?
/**
* Gets the min and count for the range of records.
*
* The returned [MinCount.min] value must be greater than [previousEndKey] and greater than or equal to [backfillRangeStart].
* If [previousEndKey] is null:
* The returned [MinCount.scannedCount] counts items in [backfillRangeStart] (inclusive) until [end] (inclusive)
*
* If [previousEndKey] is non-null:
* The returned [MinCount.scannedCount] counts items in [previousEndKey] (exclusive) until [end] (inclusive).
*
* @param backfillRangeStart this is [MinMax.min] unless a specific range was specified.
* @param end is the batch slice and is greater than or equal to [backfillRangeStart].
* @param previousEndKey is the null at the start or the result of a previous call [computeBoundingRangeMax].
*/
fun computeMinAndCountForRange(
backfill: HibernateBackfill,
session: Session,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
end: Pkey,
): MinCount
}
class UnshardedHibernateBoundingRangeStrategy, Pkey : Any>(
private val partitionProvider: PartitionProvider,
) : BoundingRangeStrategy {
override fun computeAbsoluteRange(
backfill: HibernateBackfill,
partitionName: String,
): MinMax? {
return partitionProvider.transaction(partitionName) { session ->
selectMinAndMax(
backfill,
session,
schemaAndTable(backfill),
)
}
}
override fun computeBoundingRangeMax(
backfill: HibernateBackfill,
partitionName: String,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
backfillRangeEnd: Pkey,
scanSize: Long?,
): Pkey? {
return partitionProvider.transaction(partitionName) { session ->
selectMaxBound(
backfill = backfill,
session = session,
schemaAndTable = schemaAndTable(backfill),
previousEndKey = previousEndKey,
backfillRangeStart = backfillRangeStart,
backfillRangeEnd = backfillRangeEnd,
scanSize = scanSize,
)
}
}
override fun computeMinAndCountForRange(
backfill: HibernateBackfill,
session: Session,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
end: Pkey,
): MinCount {
return selectMinAndCount(
backfill = backfill,
session = session,
schemaAndTable = schemaAndTable(backfill),
previousEndKey = previousEndKey,
backfillRangeStart = backfillRangeStart,
end = end,
)
}
}
class VitessShardedBoundingRangeStrategy, Pkey : Any>(
private val partitionProvider: PartitionProvider,
) : BoundingRangeStrategy {
override fun computeAbsoluteRange(
backfill: HibernateBackfill,
partitionName: String,
): MinMax? {
return partitionProvider.transaction(partitionName) { session ->
selectMinAndMax(
backfill,
session,
onlyTable(backfill),
)
}
}
override fun computeBoundingRangeMax(
backfill: HibernateBackfill,
partitionName: String,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
backfillRangeEnd: Pkey,
scanSize: Long?,
): Pkey? {
return partitionProvider.transaction(partitionName) { session ->
// We don't provide a schema when pinned to a shard.
selectMaxBound(
backfill = backfill,
session = session,
schemaAndTable = onlyTable(backfill),
previousEndKey = previousEndKey,
backfillRangeStart = backfillRangeStart,
backfillRangeEnd = backfillRangeEnd,
scanSize = scanSize,
)
}
}
override fun computeMinAndCountForRange(
backfill: HibernateBackfill,
session: Session,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
end: Pkey,
): MinCount {
return selectMinAndCount(
backfill = backfill,
session = session,
schemaAndTable = onlyTable(backfill),
previousEndKey = previousEndKey,
backfillRangeStart = backfillRangeStart,
end = end,
)
}
}
class VitessSingleCursorBoundingRangeStrategy, Pkey : Any>(
private val transacter: Transacter,
private val keyspace: Keyspace,
) : BoundingRangeStrategy {
override fun computeAbsoluteRange(
backfill: HibernateBackfill,
partitionName: String,
): MinMax? {
return transacter.transaction { session ->
selectMinAndMax(
backfill,
session,
onlyTable(backfill),
)
}
}
/**
* Computes a bounding range by scanning all shards and returning the minimum of MAX(pkey).
*
* Vitess does not support the nested select in `SELECT MAX(s.id) FROM (subquery)` used in
* [UnshardedHibernateBoundingRangeStrategy]. This is fine for backfills that run on each shard
* independently, as in [VitessShardedBoundingRangeStrategy]. To workaround for all-shard
* backfills we have to call SELECT MAX per shard, then pick the minimum value, ensuring we have
* at least bound_size records in the bound, but up to shard_count*bound_size in the bound. The
* former when the next scan_size records are on one shard, and the latter when they are evenly
* distributed across all shards.
*/
override fun computeBoundingRangeMax(
backfill: HibernateBackfill,
partitionName: String,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
backfillRangeEnd: Pkey,
scanSize: Long?,
): Pkey? {
return transacter.shards(keyspace).parallelStream().map {
transacter.transaction(it) { session ->
// We don't provide a schema when pinned to a shard.
selectMaxBound(
backfill = backfill,
session = session,
schemaAndTable = onlyTable(backfill),
previousEndKey = previousEndKey,
backfillRangeStart = backfillRangeStart,
backfillRangeEnd = backfillRangeEnd,
scanSize = scanSize,
)
}
}.toList()
.filterNotNull()
// Pkey must have a natural ordering
.minWithOrNull(Ordering.natural>() as Comparator)
}
override fun computeMinAndCountForRange(
backfill: HibernateBackfill,
session: Session,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
end: Pkey,
): MinCount {
return selectMinAndCount(
backfill = backfill,
session = session,
schemaAndTable = onlyTable(backfill),
previousEndKey = previousEndKey,
backfillRangeStart = backfillRangeStart,
end = end,
)
}
}
class SingleCursorVitess
private fun , Pkey : Any> selectMinAndMax(
backfill: HibernateBackfill,
session: Session,
schemaAndTable: String,
): MinMax? {
// This query uses raw sql to avoid bumping into hibernate features such as @Where and
// @SQLRestriction.
// All of [selectMaxBound], [selectMinAndMax] and [selectMinAndCount] must be raw SQL since
// they depend on each other having the same view of the table.
val pkeyName = backfill.primaryKeyName()
val sql = """
|SELECT MIN($pkeyName) as min, MAX($pkeyName) as max
|FROM $schemaAndTable
""".trimMargin()
val minMax = session.useConnection { connection ->
connection.prepareStatement(sql).use { ps ->
val pkeyType = session.hibernateSession.typeHelper.basic(backfill.pkeyClass.java)!!
val rs = ps.executeQuery()
rs.next()
val min = pkeyType.nullSafeGet(rs, "min", session.hibernateSession as SessionImpl, null)
val max = pkeyType.nullSafeGet(rs, "max", session.hibernateSession as SessionImpl, null)
if (min == null) {
// Empty table, no work to do for this partition.
return@use null
} else {
checkNotNull(max) { "Table max was null but min wasn't, this shouldn't happen" }
@Suppress("UNCHECKED_CAST") // Return type from the query should always be Pkey.
MinMax(min as Pkey, max as Pkey)
}
}
}
return minMax
}
data class MinMax(
val min: Pkey,
val max: Pkey,
)
private fun , Pkey : Any> selectMaxBound(
backfill: HibernateBackfill,
session: Session,
schemaAndTable: String,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
backfillRangeEnd: Pkey,
scanSize: Long?,
): Pkey? {
// Hibernate doesn't support subqueries in FROM, and we don't want to read in 100k+ records,
// so we use raw SQL here.
// All of [selectMaxBound], [selectMinAndMax] and [selectMinAndCount] must be raw SQL since
// they depend on each other having the same view of the table.
val pkeyName = backfill.primaryKeyName()
val params = mutableListOf()
var where = when {
previousEndKey != null -> {
params.add(previousEndKey)
"WHERE $pkeyName > ?"
}
else -> {
params.add(backfillRangeStart)
"WHERE $pkeyName >= ?"
}
}
params.add(backfillRangeEnd)
where += " AND $pkeyName <= ?"
val sql = """
|SELECT MAX(s.$pkeyName) as result FROM
| (SELECT DISTINCT $pkeyName FROM $schemaAndTable
| $where
| ORDER BY $pkeyName
| LIMIT $scanSize) s
""".trimMargin()
val max = session.useConnection { connection ->
connection.prepareStatement(sql).use { ps ->
val pkeyType = session.hibernateSession.typeHelper.basic(backfill.pkeyClass.java)!!
params.forEachIndexed { index, pkey ->
pkeyType.nullSafeSet(ps, pkey, index + 1, session.hibernateSession as SessionImpl)
}
val rs = ps.executeQuery()
rs.next()
pkeyType.nullSafeGet(rs, "result", session.hibernateSession as SessionImpl, null)
}
}
@Suppress("UNCHECKED_CAST") // Return type from the query should always match.
return max as Pkey?
}
private fun , Pkey : Any> selectMinAndCount(
backfill: HibernateBackfill,
session: Session,
schemaAndTable: String,
previousEndKey: Pkey?,
backfillRangeStart: Pkey,
end: Pkey,
): MinCount {
// This query uses raw sql to avoid bumping into hibernate features such as @Where and
// @SQLRestriction.
// All of [selectMaxBound], [selectMinAndMax] and [selectMinAndCount] must be raw SQL since
// they depend on each other having the same view of the table.
val pkeyName = backfill.primaryKeyName()
val params = mutableListOf()
var where = when {
previousEndKey != null -> {
params.add(previousEndKey)
"WHERE $pkeyName > ?"
}
else -> {
params.add(backfillRangeStart)
"WHERE $pkeyName >= ?"
}
}
params.add(end)
where += " AND $pkeyName <= ?"
val sql = """
|SELECT MIN($pkeyName) as start, COUNT(*) as scannedCount
|FROM $schemaAndTable
|$where
""".trimMargin()
val minCount = session.useConnection { connection ->
connection.prepareStatement(sql).use { ps ->
val pkeyType = session.hibernateSession.typeHelper.basic(backfill.pkeyClass.java)!!
params.forEachIndexed { index, pkey ->
pkeyType.nullSafeSet(ps, pkey, index + 1, session.hibernateSession as SessionImpl)
}
val rs = ps.executeQuery()
rs.next()
@Suppress("UNCHECKED_CAST") // Return type from the query should always be a Pkey and Long.
MinCount(
pkeyType.nullSafeGet(rs, "start", session.hibernateSession as SessionImpl, null) as Pkey,
rs.getLong("scannedCount"),
)
}
}
return minCount
}
data class MinCount(
val min: Pkey,
val scannedCount: Long,
)
private fun , Pkey : Any> schemaAndTable(backfill: HibernateBackfill): String {
val tableAnnotation = backfill.entityClass.java.getAnnotation(Table::class.java)
val schema = tableAnnotation.schema
val table = tableAnnotation.name.ifEmpty {
error(
"""
Entity class ${backfill.entityClass.simpleName} is missing Table name.
We require a table name to encourage best practices. An entity is singular while your table
is plural. Additionally, you probably want to name your classes in a way to make them
obviously entity classes. Such as prefixing all entities with `DB`.
You are welcome to create a copy of the bounding range strategy limited to your service
if you absolutely cannot have table name annotation on your entity class.
""".trimIndent(),
)
}
return when {
schema.isEmpty() -> "`$table`"
else -> "`$schema`.`$table`"
}
}
private fun , Pkey : Any> onlyTable(backfill: HibernateBackfill): String {
val tableAnnotation = backfill.entityClass.java.getAnnotation(Table::class.java)
val table = tableAnnotation.name.ifEmpty {
error(
"""
Entity class ${backfill.entityClass.simpleName} is missing Table name.
We require a table name to encourage best practices. An entity is singular while your table
is plural. Additionally, you probably want to name your classes in a way to make them
obviously entity classes. Such as prefixing all entities with `DB`.
You are welcome to create a copy of the bounding range strategy limited to your service
if you absolutely cannot have table name annotation on your entity class.
""".trimIndent(),
)
}
return "`$table`"
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy