
com.pingcap.tispark.statistics.StatisticsHelper.scala Maven / Gradle / Ivy
/*
*
* Copyright 2017 PingCAP, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.pingcap.tispark.statistics
import org.tikv.shade.com.google.common.primitives.UnsignedLong
import com.pingcap.tikv.expression.{ByItem, ColumnRef, ComparisonBinaryExpression, Constant}
import com.pingcap.tikv.key.Key
import com.pingcap.tikv.meta.TiDAGRequest.PushDownType
import com.pingcap.tikv.meta._
import com.pingcap.tikv.row.Row
import com.pingcap.tikv.statistics._
import com.pingcap.tikv.types.{BytesType, IntegerType}
import org.slf4j.LoggerFactory
import org.tikv.common.meta.TiTimestamp
import scala.collection.JavaConversions._
import scala.collection.mutable
object StatisticsHelper {
private final lazy val logger = LoggerFactory.getLogger(getClass.getName)
private val metaRequiredCols = Seq("version", "table_id", "modify_count", "count")
private val histRequiredCols =
Seq("table_id", "is_index", "hist_id", "distinct_count", "null_count", "version", "cm_sketch")
private val bucketRequiredCols = Seq(
"table_id",
"is_index",
"hist_id",
"bucket_id",
"count",
"repeats",
"lower_bound",
"upper_bound")
private[statistics] def isManagerReady: Boolean =
StatisticsManager.metaTable != null &&
StatisticsManager.bucketTable != null &&
StatisticsManager.histTable != null
private[statistics] def extractStatisticsDTO(
row: Row,
table: TiTableInfo,
loadAll: Boolean,
neededColIds: mutable.ArrayBuffer[Long],
histTable: TiTableInfo): StatisticsDTO = {
if (row.fieldCount() < 6) return null
if (row.getLong(0) != table.getId) {
// table id should be the same as what we fetched via coprocessor
logger.warn(s"table id not match ${row.getLong(0)}!=${table.getId}")
return null
}
val isIndex = row.getLong(1) > 0
val histID = row.getLong(2)
val distinct = row.getLong(3)
val nullCount = row.getLong(4)
val histVer = row.getUnsignedLong(5)
val cMSketch = if (checkColExists(histTable, "cm_sketch")) row.getBytes(6) else null
// get index/col info for StatisticsDTO
var indexInfos: mutable.Buffer[TiIndexInfo] = mutable.Buffer.empty[TiIndexInfo]
var colInfos: mutable.Buffer[TiColumnInfo] = mutable.Buffer.empty[TiColumnInfo]
var needed = true
// we should only query those columns that user specified before
if (!loadAll && !neededColIds.contains(histID)) needed = false
val (indexFlag, dataType) = if (isIndex) {
indexInfos = table.getIndices.filter { _.getId == histID }
if (indexInfos.isEmpty) {
logger.warn(
s"Cannot find index histogram id $histID in table info ${table.getName}[${table.getId}] now. It may be deleted.")
needed = false
(1, null)
} else {
(1, BytesType.BLOB)
}
} else {
colInfos = table.getColumns.filter { _.getId == histID }
if (colInfos.isEmpty) {
logger.warn(
s"Cannot find column histogram id $histID in table info ${table.getName}[${table.getId}] now. It may be deleted.")
needed = false
(0, null)
} else {
(0, colInfos.head.getType)
}
}
if (needed) {
StatisticsDTO(
histID,
indexFlag,
distinct,
histVer,
nullCount,
dataType,
cMSketch,
if (indexInfos.nonEmpty) indexInfos.head else null,
if (colInfos.nonEmpty) colInfos.head else null)
} else {
null
}
}
private def checkColExists(table: TiTableInfo, column: String): Boolean =
table.getColumns.exists { _.matchName(column) }
private[statistics] def shouldUpdateHistogram(
statistics: ColumnStatistics,
result: StatisticsResult): Boolean = {
if (statistics == null || result == null) return false
shouldUpdateHistogram(statistics.getHistogram, result.histogram)
}
/**
* Check whether histogram should be updated according to version
*/
private[statistics] def shouldUpdateHistogram(oldHis: Histogram, newHis: Histogram): Boolean = {
if (oldHis == null || newHis == null) return false
val oldVersion = UnsignedLong.fromLongBits(oldHis.getLastUpdateVersion)
val newVersion = UnsignedLong.fromLongBits(newHis.getLastUpdateVersion)
oldVersion.compareTo(newVersion) < 0
}
private[statistics] def shouldUpdateHistogram(
statistics: IndexStatistics,
result: StatisticsResult): Boolean = {
if (statistics == null || result == null) return false
shouldUpdateHistogram(statistics.getHistogram, result.histogram)
}
private[statistics] def extractStatisticResult(
histId: Long,
rows: Iterator[Row],
requests: Seq[StatisticsDTO]): StatisticsResult = {
val matches = requests.filter(_.colId == histId)
if (matches.nonEmpty) {
val matched = matches.head
var totalCount: Long = 0
val buckets = mutable.ArrayBuffer[Bucket]()
while (rows.hasNext) {
val row = rows.next()
val isRowIndex = if (row.getLong(1) > 0) true else false
val isRequestIndex = matched.isIndex > 0
// if required DTO type(index/non index) is the same with the row
if (isRequestIndex == isRowIndex) {
val count = row.getLong(4)
val repeats = row.getLong(5)
var lowerBound: Key = null
var upperBound: Key = null
// all bounds are stored as blob in bucketTable currently, decode using blob type
lowerBound = Key.toRawKey(row.getBytes(6))
upperBound = Key.toRawKey(row.getBytes(7))
totalCount += count
buckets += new Bucket(totalCount, repeats, lowerBound, upperBound)
}
}
// create histogram for column `colId`
val histogram = Histogram
.newBuilder()
.setId(matched.colId)
.setNDV(matched.distinct)
.setNullCount(matched.nullCount)
.setLastUpdateVersion(matched.version)
.setBuckets(buckets)
.build()
// parse CMSketch
val rawData = matched.rawCMSketch
val cMSketch = if (rawData == null || rawData.length <= 0) {
null
} else {
val sketch = com.pingcap.tidb.tipb.CMSketch.parseFrom(rawData)
val result =
CMSketch.newCMSketch(sketch.getRowsCount, sketch.getRows(0).getCountersCount)
for (i <- 0 until sketch.getRowsCount) {
val row = sketch.getRows(i)
result.setCount(0)
for (j <- 0 until row.getCountersCount) {
val counter = row.getCounters(j)
result.getTable()(i)(j) = counter
result.setCount(result.getCount + counter)
}
}
result
}
StatisticsResult(histId, histogram, cMSketch, matched.idxInfo, matched.colInfo)
} else {
null
}
}
private[statistics] def buildHistogramsRequest(
histTable: TiTableInfo,
targetTblId: Long,
startTs: TiTimestamp): TiDAGRequest =
buildRequest(histTable, histRequiredCols, targetTblId, startTs)
private def buildRequest(
tableInfo: TiTableInfo,
requiredCols: Seq[String],
targetTblId: Long,
startTs: TiTimestamp): TiDAGRequest = {
TiDAGRequest.Builder
.newBuilder()
.setFullTableScan(tableInfo)
.addFilter(
ComparisonBinaryExpression
.equal(
ColumnRef.create("table_id", IntegerType.BIGINT),
Constant.create(targetTblId, IntegerType.BIGINT)))
.addRequiredCols(requiredCols.filter(checkColExists(tableInfo, _)))
.setStartTs(startTs)
.build(PushDownType.NORMAL)
}
private[statistics] def buildMetaRequest(
metaTable: TiTableInfo,
targetTblId: Long,
startTs: TiTimestamp): TiDAGRequest =
buildRequest(metaTable, metaRequiredCols, targetTblId, startTs)
private[statistics] def buildBucketRequest(
bucketTable: TiTableInfo,
targetTblId: Long,
startTs: TiTimestamp): TiDAGRequest =
TiDAGRequest.Builder
.newBuilder()
.setFullTableScan(bucketTable)
.addFilter(
ComparisonBinaryExpression
.equal(
ColumnRef.create("table_id", IntegerType.BIGINT),
Constant.create(targetTblId, IntegerType.BIGINT)))
.setLimit(Int.MaxValue)
.addOrderBy(ByItem.create(ColumnRef.create("bucket_id", IntegerType.BIGINT), false))
.addRequiredCols(bucketRequiredCols.filter(checkColExists(bucketTable, _)))
.setStartTs(startTs)
.build(PushDownType.NORMAL)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy