io.questdb.griffin.engine.functions.groupby.ApproxCountDistinctIPv4GroupByFunction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of questdb Show documentation
Show all versions of questdb Show documentation
QuestDB is high performance SQL time series database
/*******************************************************************************
* ___ _ ____ ____
* / _ \ _ _ ___ ___| |_| _ \| __ )
* | | | | | | |/ _ \/ __| __| | | | _ \
* | |_| | |_| | __/\__ \ |_| |_| | |_) |
* \__\_\\__,_|\___||___/\__|____/|____/
*
* Copyright (c) 2014-2019 Appsicle
* Copyright (c) 2019-2024 QuestDB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package io.questdb.griffin.engine.functions.groupby;
import io.questdb.cairo.ArrayColumnTypes;
import io.questdb.cairo.ColumnType;
import io.questdb.cairo.map.MapValue;
import io.questdb.cairo.sql.Function;
import io.questdb.cairo.sql.Record;
import io.questdb.griffin.engine.functions.GroupByFunction;
import io.questdb.griffin.engine.functions.LongFunction;
import io.questdb.griffin.engine.functions.UnaryFunction;
import io.questdb.griffin.engine.groupby.GroupByAllocator;
import io.questdb.griffin.engine.groupby.hyperloglog.HyperLogLog;
import io.questdb.std.Hash;
import io.questdb.std.Numbers;
public class ApproxCountDistinctIPv4GroupByFunction extends LongFunction implements UnaryFunction, GroupByFunction {
private static final long NULL_VALUE = -1;
private final Function arg;
private final HyperLogLog hllA;
private final HyperLogLog hllB;
private int hllPtrIndex;
private int overwrittenFlagIndex;
private int valueIndex;
public ApproxCountDistinctIPv4GroupByFunction(Function arg, int precision) {
this.arg = arg;
this.hllA = new HyperLogLog(precision);
this.hllB = new HyperLogLog(precision);
}
public ApproxCountDistinctIPv4GroupByFunction(Function arg) {
this(arg, HyperLogLog.DEFAULT_PRECISION);
}
@Override
public void clear() {
hllA.resetPtr();
hllB.resetPtr();
}
@Override
public void computeFirst(MapValue mapValue, Record record, long rowId) {
final int val = arg.getIPv4(record);
if (val != Numbers.IPv4_NULL) {
final long hash = Hash.murmur3ToLong(val);
long cardinality = hllA.of(0).addAndComputeCardinalityFast(hash);
mapValue.putLong(hllPtrIndex, hllA.ptr());
mapValue.putLong(valueIndex, cardinality);
} else {
mapValue.putLong(hllPtrIndex, 0);
mapValue.putLong(valueIndex, NULL_VALUE);
}
mapValue.putBool(overwrittenFlagIndex, false);
}
@Override
public void computeNext(MapValue mapValue, Record record, long rowId) {
final int val = arg.getIPv4(record);
if (val != Numbers.IPv4_NULL) {
final long hash = Hash.murmur3ToLong(val);
long ptr = mapValue.getLong(hllPtrIndex);
long cardinality = hllA.of(ptr).addAndComputeCardinalityFast(hash);
mapValue.putLong(hllPtrIndex, hllA.ptr());
mapValue.putLong(valueIndex, cardinality);
}
}
@Override
public Function getArg() {
return arg;
}
@Override
public long getLong(Record rec) {
if (rec.getBool(overwrittenFlagIndex)) {
return rec.getLong(valueIndex);
}
long ptr = rec.getLong(hllPtrIndex);
if (ptr == 0) {
return 0;
}
long val = rec.getLong(valueIndex);
if (val != NULL_VALUE) {
return val;
}
hllA.of(ptr);
return hllA.computeCardinality();
}
@Override
public String getName() {
return "approx_count_distinct";
}
@Override
public int getValueIndex() {
return valueIndex;
}
@Override
public void initValueIndex(int valueIndex) {
this.valueIndex = valueIndex;
this.hllPtrIndex = valueIndex + 1;
this.overwrittenFlagIndex = valueIndex + 2;
}
@Override
public void initValueTypes(ArrayColumnTypes columnTypes) {
initValueIndex(columnTypes.getColumnCount());
columnTypes.add(ColumnType.LONG); // overwritten value
columnTypes.add(ColumnType.LONG); // pointer to HyperLogLog
columnTypes.add(ColumnType.BOOLEAN); // flag denoting whether the value has been overwritten
}
@Override
public boolean isConstant() {
return false;
}
@Override
public boolean isThreadSafe() {
return false;
}
@Override
public void merge(MapValue destValue, MapValue srcValue) {
if (srcValue.getBool(overwrittenFlagIndex)) {
long srcCount = srcValue.getLong(valueIndex);
if (srcCount == 0 || srcCount == Numbers.LONG_NULL) {
return;
}
// If reached here, it would mean that the value has been overwritten by interpolation
// associated with SAMPLE BY. However, since merge() is called only when the execution
// is parallel, this cannot happen. To produce the correct result, interpolation can
// only run on merged data (yielded by the merge phase), not on individual partitions.
assert false : "merging overwritten values with HyperLogLog is unsupported";
}
long srcPtr = srcValue.getLong(hllPtrIndex);
if (srcPtr == 0) {
return;
}
if (destValue.getBool(overwrittenFlagIndex)) {
long dstCount = destValue.getLong(valueIndex);
if (dstCount == 0 || dstCount == Numbers.LONG_NULL) {
destValue.putBool(overwrittenFlagIndex, false);
destValue.putLong(hllPtrIndex, srcPtr);
destValue.putLong(valueIndex, NULL_VALUE);
return;
}
// See the comment above. The same applies here.
assert false : "merging overwritten values with HyperLogLog is unsupported";
}
long destPtr = destValue.getLong(hllPtrIndex);
if (destPtr == 0) {
destValue.putBool(overwrittenFlagIndex, false);
destValue.putLong(hllPtrIndex, srcPtr);
destValue.putLong(valueIndex, NULL_VALUE);
return;
}
hllA.of(destPtr);
hllB.of(srcPtr);
long mergedPtr = HyperLogLog.merge(hllA, hllB);
destValue.putBool(overwrittenFlagIndex, false);
destValue.putLong(hllPtrIndex, mergedPtr);
destValue.putLong(valueIndex, NULL_VALUE);
}
@Override
public void setAllocator(GroupByAllocator allocator) {
hllA.setAllocator(allocator);
hllB.setAllocator(allocator);
}
@Override
public void setEmpty(MapValue mapValue) {
overwrite(mapValue, 0L);
}
@Override
public void setLong(MapValue mapValue, long value) {
overwrite(mapValue, value);
}
@Override
public void setNull(MapValue mapValue) {
overwrite(mapValue, Numbers.LONG_NULL);
}
@Override
public boolean supportsParallelism() {
return true;
}
private void overwrite(MapValue mapValue, long value) {
mapValue.putLong(valueIndex, value);
mapValue.putLong(hllPtrIndex, 0);
mapValue.putBool(overwrittenFlagIndex, true);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy