All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.druid.query.aggregation.datasketches.theta.SketchOperations Maven / Gradle / Ivy

There is a newer version: 0.12.3
Show newest version
/*
 * Licensed to Metamarkets Group Inc. (Metamarkets) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Metamarkets licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package io.druid.query.aggregation.datasketches.theta;

import com.google.common.base.Charsets;
import com.metamx.common.logger.Logger;
import com.yahoo.sketches.Family;
import com.yahoo.sketches.memory.Memory;
import com.yahoo.sketches.memory.NativeMemory;
import com.yahoo.sketches.theta.AnotB;
import com.yahoo.sketches.theta.Intersection;
import com.yahoo.sketches.theta.SetOperation;
import com.yahoo.sketches.theta.Sketch;
import com.yahoo.sketches.theta.Sketches;
import com.yahoo.sketches.theta.Union;
import org.apache.commons.codec.binary.Base64;

public class SketchOperations
{

  private static final Logger LOG = new Logger(SketchOperations.class);

  public static final Sketch EMPTY_SKETCH = Sketches.updateSketchBuilder().build().compact(true, null);

  public static enum Func
  {
    UNION,
    INTERSECT,
    NOT;
  }

  public static Sketch deserialize(Object serializedSketch)
  {
    if (serializedSketch instanceof String) {
      return deserializeFromBase64EncodedString((String) serializedSketch);
    } else if (serializedSketch instanceof byte[]) {
      return deserializeFromByteArray((byte[]) serializedSketch);
    } else if (serializedSketch instanceof Sketch) {
      return (Sketch) serializedSketch;
    }

    throw new IllegalStateException(
        "Object is not of a type that can deserialize to sketch: "
        + serializedSketch.getClass()
    );
  }

  public static Sketch deserializeFromBase64EncodedString(String str)
  {
    return deserializeFromByteArray(
        Base64.decodeBase64(
            str.getBytes(Charsets.UTF_8)
        )
    );
  }

  public static Sketch deserializeFromByteArray(byte[] data)
  {
    return deserializeFromMemory(new NativeMemory(data));
  }

  public static Sketch deserializeFromMemory(Memory mem)
  {
    if (Sketch.getSerializationVersion(mem) < 3) {
      return Sketches.heapifySketch(mem);
    } else {
      return Sketches.wrapSketch(mem);
    }
  }

  public static Sketch sketchSetOperation(Func func, int sketchSize, Sketch... sketches)
  {
    //in the code below, I am returning SetOp.getResult(false, null)
    //"false" gets us an unordered sketch which is faster to build
    //"true" returns an ordered sketch but slower to compute. advantage of ordered sketch
    //is that they are faster to "union" later but given that this method is used in
    //the final stages of query processing, ordered sketch would be of no use.
    switch (func) {
      case UNION:
        Union union = (Union) SetOperation.builder().build(sketchSize, Family.UNION);
        for (Sketch sketch : sketches) {
          union.update(sketch);
        }
        return union.getResult(false, null);
      case INTERSECT:
        Intersection intersection = (Intersection) SetOperation.builder().build(sketchSize, Family.INTERSECTION);
        for (Sketch sketch : sketches) {
          intersection.update(sketch);
        }
        return intersection.getResult(false, null);
      case NOT:
        if (sketches.length < 1) {
          throw new IllegalArgumentException("A-Not-B requires atleast 1 sketch");
        }

        if (sketches.length == 1) {
          return sketches[0];
        }

        Sketch result = sketches[0];
        for (int i = 1; i < sketches.length; i++) {
          AnotB anotb = (AnotB) SetOperation.builder().build(sketchSize, Family.A_NOT_B);
          anotb.update(result, sketches[i]);
          result = anotb.getResult(false, null);
        }
        return result;
      default:
        throw new IllegalArgumentException("Unknown sketch operation " + func);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy