All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.sketches.theta.PairwiseSetOperations Maven / Gradle / Ivy

There is a newer version: 0.6.0
Show newest version
/*
 * Copyright 2016, Yahoo! Inc. Licensed under the terms of the
 * Apache License 2.0. See LICENSE file at the project root for terms.
 */

package com.yahoo.sketches.theta;

import static com.yahoo.sketches.theta.SetOperation.createCompactSketch;

import java.util.Arrays;

import com.yahoo.sketches.SketchesArgumentException;
import com.yahoo.sketches.Util;

/**
 * Set Operations where the arguments are presented in pairs as in C = Op(A,B). These are
 * stateless operations and the result is returned immediately.
 *
 * 

These operations are designed for convenience and accept Sketches that may be either * Heap-based or Direct. * * @author Lee Rhodes */ public class PairwiseSetOperations { /** * This implements a stateless, pair-wise Intersect operation on sketches * that are either Heap-based or Direct. * If both inputs are null a null is returned. * * @param skA The first Sketch argument. * @param skB The second Sketch argument. * @return the result as an ordered CompactSketch on the heap. */ public static CompactSketch intersect(final Sketch skA, final Sketch skB) { if ((skA == null) && (skB == null)) { return null; } final short seedHash = (skA == null) ? skB.getSeedHash() : skA.getSeedHash(); final Intersection inter = new IntersectionImpl(seedHash); return inter.intersect(skA, skB, true, null); } /** * This implements a stateless, pair-wise A AND NOT B operation on Sketches * that are either Heap-based or Direct. * If both inputs are null a null is returned. * * @param skA The first Sketch argument. * @param skB The second Sketch argument. * @return the result as an ordered CompactSketch on the heap. */ public static CompactSketch aNotB(final Sketch skA, final Sketch skB) { if ((skA == null) && (skB == null)) { return null; } final short seedHash = (skA == null) ? skB.getSeedHash() : skA.getSeedHash(); final HeapAnotB anotb = new HeapAnotB(seedHash); return anotb.aNotB(skA, skB, true, null); } /** * This implements a stateless, pair-wise union operation on ordered, * CompactSketches that are either Heap-based or Direct. * If both inputs are null a null is returned. * If one is null the other is returned, which can be either Heap-based or Direct. * This is equivalent to union(skA, skB, k) where k is the default of 4096. * * @param skA The first ordered, CompactSketch argument. * @param skB The second ordered, CompactSketch argument * @return the result as an ordered CompactSketch. */ public static CompactSketch union(final CompactSketch skA, final CompactSketch skB) { return union(skA, skB, Util.DEFAULT_NOMINAL_ENTRIES); } /** * This implements a stateless, pair-wise union operation on ordered, * CompactSketches that are either Heap-based or Direct. The returned sketch will be cutback to * k if required, similar to the regular Union operation. If a cutback is required, the returned * sketch will always be on the heap. * If both inputs are null a null is returned. If either sketch is empty its Theta is ignored. * If one is null the other is returned, which may be either Direct or heap-based if a cutback * is required. * * @param skA The first ordered, CompactSketch argument. * @param skB The second ordered, CompactSketch argument * @param k The upper bound of the number of entries to be retained by the sketch * @return the result as an ordered CompactSketch. */ @SuppressWarnings("null") public static CompactSketch union(final CompactSketch skA, final CompactSketch skB, final int k) { //Handle all corner cases with null or empty arguments //For backward compatibility, we must allow input empties with Theta < 1.0. final int swA, swB; if (skA == null) { swA = 1; } else { checkOrdered(skA); swA = skA.isEmpty() ? 2 : 3; } if (skB == null) { swB = 1; } else { checkOrdered(skB); swB = skB.isEmpty() ? 2 : 3; } final int sw = (swA << 2) | swB; switch (sw) { case 5: { //skA == null; skB == null; return null. Cannot determine seedhash. return null; } case 6: { //skA == null; skB == empty; return empty final long thetaLong = skB.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null] return (thetaLong == Long.MAX_VALUE) ? skB : HeapCompactOrderedSketch.compact(new long[0], true, skB.getSeedHash(), 0, Long.MAX_VALUE); } case 7: { //skA == null; skB == valid; return skB return maybeCutback(skB, k); } case 9: { //skA == empty; skB == null; return empty final long thetaLong = skA.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null] return (thetaLong == Long.MAX_VALUE) ? skA : HeapCompactOrderedSketch.compact(new long[0], true, skA.getSeedHash(), 0, Long.MAX_VALUE); } case 10: { //skA == empty; skB == empty; return empty final short seedHash = seedHashesCheck(skA, skB); long thetaLong = skA.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null] if (thetaLong == Long.MAX_VALUE) { return skA; } thetaLong = skB.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null] if (thetaLong == Long.MAX_VALUE) { return skB; } return HeapCompactOrderedSketch.compact(new long[0], true, seedHash, 0, Long.MAX_VALUE); } case 11: { //skA == empty; skB == valid; return skB seedHashesCheck(skA, skB); return maybeCutback(skB, k); } case 13: { //skA == valid; skB == null; return skA return maybeCutback(skA, k); } case 14: { //skA == valid; skB == empty; return skA seedHashesCheck(skA, skB); return maybeCutback(skA, k); } case 15: { //skA == valid; skB == valid; perform full union seedHashesCheck(skA, skB); break; } //default: cannot happen } //Both sketches are valid with matching seedhashes and ordered //Full Union operation: final long thetaLongA = skA.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null] final long thetaLongB = skB.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null] long thetaLong = Math.min(thetaLongA, thetaLongB); //Theta rule final long[] cacheA = (skA.hasMemory()) ? skA.getCache() : skA.getCache().clone(); final long[] cacheB = (skB.hasMemory()) ? skB.getCache() : skB.getCache().clone(); final int aLen = cacheA.length; final int bLen = cacheB.length; final long[] outCache = new long[aLen + bLen]; int indexA = 0; int indexB = 0; int indexOut = 0; long hashA = (aLen == 0) ? thetaLong : cacheA[indexA]; long hashB = (bLen == 0) ? thetaLong : cacheB[indexB]; while ((indexA < aLen) || (indexB < bLen)) { if (hashA == hashB) { if (hashA < thetaLong) { if (indexOut >= k) { thetaLong = hashA; break; } outCache[indexOut++] = hashA; hashA = (++indexA < aLen) ? cacheA[indexA] : thetaLong; hashB = (++indexB < bLen) ? cacheB[indexB] : thetaLong; continue; } break; } else if (hashA < hashB) { if (hashA < thetaLong) { if (indexOut >= k) { thetaLong = hashA; break; } outCache[indexOut++] = hashA; hashA = (++indexA < aLen) ? cacheA[indexA] : thetaLong; continue; } break; } else { //hashA > hashB if (hashB < thetaLong) { if (indexOut >= k) { thetaLong = hashB; break; } outCache[indexOut++] = hashB; hashB = (++indexB < bLen) ? cacheB[indexB] : thetaLong; continue; } break; } } int curCount = indexOut; final long[] outArr; if (indexOut > k) { outArr = Arrays.copyOf(outCache, k); //cutback to k curCount = k; } else { outArr = Arrays.copyOf(outCache, curCount); //copy only valid items } return createCompactSketch(outArr, false, skA.getSeedHash(), curCount, thetaLong, true, null); } private static CompactSketch maybeCutback(final CompactSketch csk, final int k) { final boolean empty = csk.isEmpty(); int curCount = csk.getRetainedEntries(true); long thetaLong = csk.getThetaLong(); if (curCount > k) { //cutback to k final long[] cache = (csk.hasMemory()) ? csk.getCache() : csk.getCache().clone(); thetaLong = cache[k]; final long[] arr = Arrays.copyOf(cache, k); curCount = k; return createCompactSketch(arr, empty, csk.getSeedHash(), curCount, thetaLong, true, null); } return csk; } private static void checkOrdered(final CompactSketch csk) { if (!csk.isOrdered()) { throw new SketchesArgumentException("Given sketch must be ordered."); } } private static short seedHashesCheck(final Sketch skA, final Sketch skB) { final short seedHashA = skA.getSeedHash(); //lgtm [java/dereferenced-value-may-be-null] final short seedHashB = skB.getSeedHash(); //lgtm [java/dereferenced-value-may-be-null] return Util.checkSeedHashes(seedHashA, seedHashB); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy