org.apache.drill.exec.physical.base.ScanStats Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.base;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* Cost estimate for a scan. In general, relative costs are more important
* than absolute costs. If a scan supports filter push-down, the cost of
* the scan after the push-down must be less than the combined cost of
* the scan + project before push down, else Calcite will ignore the
* push-down. Also, the estimated row count may influence whether the
* table can be broadcast or hash partitioned. Otherwise, Calcite has
* no real choices based on scan cost.
*/
public class ScanStats {
public static final ScanStats TRIVIAL_TABLE = new ScanStats(GroupScanProperty.NO_EXACT_ROW_COUNT, 20, 1, 1);
public static final ScanStats ZERO_RECORD_TABLE = new ScanStats(GroupScanProperty.EXACT_ROW_COUNT, 0, 1, 1);
@JsonProperty
private final GroupScanProperty groupScanProperty;
@JsonProperty
private final double recordCount;
/**
* CPU cost for the scan which should consider both row and column
* count, and the effect of filters. Considered only if the group scan property is
* set to {@link GroupScanProperty#ESTIMATED_TOTAL_COST FULL_COST}. Default
* CPU cost is simply row count * column count.
*/
@JsonProperty
private final double cpuCost;
/**
* I/O cost for the scan. Considered only if the group scan property is
* set to {@link GroupScanProperty#ESTIMATED_TOTAL_COST FULL_COST}. Drill does not
* differentiate between network and disk I/O, despite the field name.
*/
@JsonProperty
private final double diskCost;
@JsonCreator
public ScanStats(@JsonProperty("groupScanProperty") GroupScanProperty groupScanProperty,
@JsonProperty("recordCount") double recordCount,
@JsonProperty("cpuCost") double cpuCost,
@JsonProperty("diskCost") double diskCost) {
this.groupScanProperty = groupScanProperty;
this.recordCount = recordCount;
this.cpuCost = cpuCost;
this.diskCost = diskCost;
}
/**
* Return whether GroupScan knows the exact row count in the result of getSize()
* call. By default, group scan does not know the exact row count, before it
* scans every rows. Currently, Parquet group scan will return the exact row
* count.
*
* @return group scan property
*/
public GroupScanProperty getGroupScanProperty() {
return groupScanProperty;
}
public double getRecordCount() {
return recordCount;
}
public double getCpuCost() {
return cpuCost;
}
public double getDiskCost() {
return diskCost;
}
@Override
public String toString() {
return "ScanStats{" +
"recordCount=" + recordCount +
", cpuCost=" + cpuCost +
", diskCost=" + diskCost +
", groupScanProperty=" + groupScanProperty +
'}';
}
public enum GroupScanProperty {
NO_EXACT_ROW_COUNT(false, false),
EXACT_ROW_COUNT(true, true),
/**
* Tells the planner to consider the full cost represented
* here. Else, the planner only looks at row count. However,
* we don't know the actual row count, a COUNT(*) query must
* still look at the input source if it wants an accurate count.
*/
ESTIMATED_TOTAL_COST(false, true);
private boolean hasExactRowCount, hasExactColumnValueCount;
GroupScanProperty (boolean hasExactRowCount, boolean hasExactColumnValueCount) {
this.hasExactRowCount = hasExactRowCount;
this.hasExactColumnValueCount = hasExactColumnValueCount;
}
public boolean hasExactRowCount() {
return hasExactRowCount;
}
public boolean hasExactColumnValueCount() {
return hasExactColumnValueCount;
}
public boolean hasFullCost() {
return this == ESTIMATED_TOTAL_COST;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy