
io.druid.query.aggregation.datasketches.theta.SketchOperations Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-datasketches Show documentation
Show all versions of druid-datasketches Show documentation
Druid Aggregators based on datasketches lib http://datasketches.github.io/
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.query.aggregation.datasketches.theta;
import com.google.common.base.Charsets;
import com.metamx.common.logger.Logger;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.memory.Memory;
import com.yahoo.sketches.memory.NativeMemory;
import com.yahoo.sketches.theta.AnotB;
import com.yahoo.sketches.theta.Intersection;
import com.yahoo.sketches.theta.SetOperation;
import com.yahoo.sketches.theta.Sketch;
import com.yahoo.sketches.theta.Sketches;
import com.yahoo.sketches.theta.Union;
import org.apache.commons.codec.binary.Base64;
public class SketchOperations
{
private static final Logger LOG = new Logger(SketchOperations.class);
public static final Sketch EMPTY_SKETCH = Sketches.updateSketchBuilder().build().compact(true, null);
public static enum Func
{
UNION,
INTERSECT,
NOT;
}
public static Sketch deserialize(Object serializedSketch)
{
if (serializedSketch instanceof String) {
return deserializeFromBase64EncodedString((String) serializedSketch);
} else if (serializedSketch instanceof byte[]) {
return deserializeFromByteArray((byte[]) serializedSketch);
} else if (serializedSketch instanceof Sketch) {
return (Sketch) serializedSketch;
}
throw new IllegalStateException(
"Object is not of a type that can deserialize to sketch: "
+ serializedSketch.getClass()
);
}
public static Sketch deserializeFromBase64EncodedString(String str)
{
return deserializeFromByteArray(
Base64.decodeBase64(
str.getBytes(Charsets.UTF_8)
)
);
}
public static Sketch deserializeFromByteArray(byte[] data)
{
return deserializeFromMemory(new NativeMemory(data));
}
public static Sketch deserializeFromMemory(Memory mem)
{
if (Sketch.getSerializationVersion(mem) < 3) {
return Sketches.heapifySketch(mem);
} else {
return Sketches.wrapSketch(mem);
}
}
public static Sketch sketchSetOperation(Func func, int sketchSize, Sketch... sketches)
{
//in the code below, I am returning SetOp.getResult(false, null)
//"false" gets us an unordered sketch which is faster to build
//"true" returns an ordered sketch but slower to compute. advantage of ordered sketch
//is that they are faster to "union" later but given that this method is used in
//the final stages of query processing, ordered sketch would be of no use.
switch (func) {
case UNION:
Union union = (Union) SetOperation.builder().build(sketchSize, Family.UNION);
for (Sketch sketch : sketches) {
union.update(sketch);
}
return union.getResult(false, null);
case INTERSECT:
Intersection intersection = (Intersection) SetOperation.builder().build(sketchSize, Family.INTERSECTION);
for (Sketch sketch : sketches) {
intersection.update(sketch);
}
return intersection.getResult(false, null);
case NOT:
if (sketches.length < 1) {
throw new IllegalArgumentException("A-Not-B requires atleast 1 sketch");
}
if (sketches.length == 1) {
return sketches[0];
}
Sketch result = sketches[0];
for (int i = 1; i < sketches.length; i++) {
AnotB anotb = (AnotB) SetOperation.builder().build(sketchSize, Family.A_NOT_B);
anotb.update(result, sketches[i]);
result = anotb.getResult(false, null);
}
return result;
default:
throw new IllegalArgumentException("Unknown sketch operation " + func);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy