All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.theta.UpdateSketchBuilder Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 7.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.theta;

import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import static org.apache.datasketches.Util.LS;
import static org.apache.datasketches.Util.MAX_LG_NOM_LONGS;
import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
import static org.apache.datasketches.Util.TAB;
import static org.apache.datasketches.Util.ceilingPowerOf2;
import static org.apache.datasketches.Util.checkNomLongs;

import org.apache.datasketches.Family;
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.SketchesStateException;
import org.apache.datasketches.memory.DefaultMemoryRequestServer;
import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.WritableMemory;

/**
 * For building a new UpdateSketch.
 *
 * @author Lee Rhodes
 */
public class UpdateSketchBuilder {
  private int bLgNomLongs;
  private long bSeed;
  private ResizeFactor bRF;
  private Family bFam;
  private float bP;
  private MemoryRequestServer bMemReqSvr;

  //Fields for concurrent theta sketch
  private int bNumPoolThreads;
  private int bLocalLgNomLongs;
  private boolean bPropagateOrderedCompact;
  private double bMaxConcurrencyError;
  private int bMaxNumLocalThreads;

  /**
   * Constructor for building a new UpdateSketch. The default configuration is
   * 
    *
  • Nominal Entries: {@value org.apache.datasketches.Util#DEFAULT_NOMINAL_ENTRIES}
  • *
  • Seed: {@value org.apache.datasketches.Util#DEFAULT_UPDATE_SEED}
  • *
  • Input Sampling Probability: 1.0
  • *
  • Family: {@link org.apache.datasketches.Family#QUICKSELECT}
  • *
  • Resize Factor: The default for sketches on the Java heap is {@link ResizeFactor#X8}. * For direct sketches, which are targeted for native memory off the Java heap, this value will * be fixed at either {@link ResizeFactor#X1} or {@link ResizeFactor#X2}.
  • *
  • MemoryRequestServer (Direct only): * {@link org.apache.datasketches.memory.DefaultMemoryRequestServer}.
  • *
* Parameters unique to the concurrent sketches only: *
    *
  • Number of local Nominal Entries: 4
  • *
  • Concurrent NumPoolThreads: 3
  • *
  • Concurrent PropagateOrderedCompact: true
  • *
  • Concurrent MaxConcurrencyError: 0
  • *
*/ public UpdateSketchBuilder() { bLgNomLongs = Integer.numberOfTrailingZeros(DEFAULT_NOMINAL_ENTRIES); bSeed = DEFAULT_UPDATE_SEED; bP = (float) 1.0; bRF = ResizeFactor.X8; bFam = Family.QUICKSELECT; bMemReqSvr = new DefaultMemoryRequestServer(); // Default values for concurrent sketch bNumPoolThreads = ConcurrentPropagationService.NUM_POOL_THREADS; bLocalLgNomLongs = 4; //default is smallest legal QS sketch bPropagateOrderedCompact = true; bMaxConcurrencyError = 0; bMaxNumLocalThreads = 1; } /** * Sets the Nominal Entries for this sketch. * This value is also used for building a shared concurrent sketch. * The minimum value is 16 (2^4) and the maximum value is 67,108,864 (2^26). * Be aware that sketches as large as this maximum value may not have been * thoroughly tested or characterized for performance. * * @param nomEntries Nominal Entries * This will become the ceiling power of 2 if the given value is not. * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setNominalEntries(final int nomEntries) { bLgNomLongs = checkNomLongs(nomEntries); return this; } /** * Alternative method of setting the Nominal Entries for this sketch from the log_base2 value. * This value is also used for building a shared concurrent sketch. * The minimum value is 4 and the maximum value is 26. * Be aware that sketches as large as this maximum value may not have been * thoroughly tested or characterized for performance. * * @param lgNomEntries the Log Nominal Entries for the concurrent shared sketch * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setLogNominalEntries(final int lgNomEntries) { bLgNomLongs = checkNomLongs(1 << lgNomEntries); return this; } /** * Returns Log-base 2 Nominal Entries * @return Log-base 2 Nominal Entries */ public int getLgNominalEntries() { return bLgNomLongs; } /** * Sets the Nominal Entries for the concurrent local sketch. The minimum value is 16 and the * maximum value is 67,108,864, which is 2^26. * Be aware that sketches as large as this maximum * value have not been thoroughly tested or characterized for performance. * * @param nomEntries Nominal Entries * This will become the ceiling power of 2 if it is not. * @return this ConcurrentThetaBuilder */ public UpdateSketchBuilder setLocalNominalEntries(final int nomEntries) { bLocalLgNomLongs = Integer.numberOfTrailingZeros(ceilingPowerOf2(nomEntries)); if ((bLocalLgNomLongs > MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < MIN_LG_NOM_LONGS)) { throw new SketchesArgumentException( "Nominal Entries must be >= 16 and <= 67108864: " + nomEntries); } return this; } /** * Alternative method of setting the Nominal Entries for a local concurrent sketch from the * log_base2 value. * The minimum value is 4 and the maximum value is 26. * Be aware that sketches as large as this maximum * value have not been thoroughly tested or characterized for performance. * * @param lgNomEntries the Log Nominal Entries for a concurrent local sketch * @return this ConcurrentThetaBuilder */ public UpdateSketchBuilder setLocalLogNominalEntries(final int lgNomEntries) { bLocalLgNomLongs = lgNomEntries; if ((bLocalLgNomLongs > MAX_LG_NOM_LONGS) || (bLocalLgNomLongs < MIN_LG_NOM_LONGS)) { throw new SketchesArgumentException( "Log Nominal Entries must be >= 4 and <= 26: " + lgNomEntries); } return this; } /** * Returns Log-base 2 Nominal Entries for the concurrent local sketch * @return Log-base 2 Nominal Entries for the concurrent local sketch */ public int getLocalLgNominalEntries() { return bLocalLgNomLongs; } /** * Sets the long seed value that is required by the hashing function. * @param seed See seed * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setSeed(final long seed) { bSeed = seed; return this; } /** * Returns the seed * @return the seed */ public long getSeed() { return bSeed; } /** * Sets the upfront uniform sampling probability, p * @param p See Sampling Probability, p * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setP(final float p) { if ((p <= 0.0) || (p > 1.0)) { throw new SketchesArgumentException("p must be > 0 and <= 1.0: " + p); } bP = p; return this; } /** * Returns the pre-sampling probability p * @return the pre-sampling probability p */ public float getP() { return bP; } /** * Sets the cache Resize Factor. * @param rf See Resize Factor * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setResizeFactor(final ResizeFactor rf) { bRF = rf; return this; } /** * Returns the Resize Factor * @return the Resize Factor */ public ResizeFactor getResizeFactor() { return bRF; } /** * Set the Family. * @param family the family for this builder * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setFamily(final Family family) { bFam = family; return this; } /** * Returns the Family * @return the Family */ public Family getFamily() { return bFam; } /** * Set the MemoryRequestServer * @param memReqSvr the given MemoryRequestServer * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setMemoryRequestServer(final MemoryRequestServer memReqSvr) { bMemReqSvr = memReqSvr; return this; } /** * Returns the MemoryRequestServer * @return the MemoryRequestServer */ public MemoryRequestServer getMemoryRequestServer() { return bMemReqSvr; } /** * Sets the number of pool threads used for background propagation in the concurrent sketches. * @param numPoolThreads the given number of pool threads */ public void setNumPoolThreads(final int numPoolThreads) { bNumPoolThreads = numPoolThreads; } /** * Gets the number of background pool threads used for propagation in the concurrent sketches. * @return the number of background pool threads */ public int getNumPoolThreads() { return bNumPoolThreads; } /** * Sets the Propagate Ordered Compact flag to the given value. Used with concurrent sketches. * * @param prop the given value * @return this UpdateSketchBuilder */ public UpdateSketchBuilder setPropagateOrderedCompact(final boolean prop) { bPropagateOrderedCompact = prop; return this; } /** * Gets the Propagate Ordered Compact flag used with concurrent sketches. * @return the Propagate Ordered Compact flag */ public boolean getPropagateOrderedCompact() { return bPropagateOrderedCompact; } /** * Sets the Maximum Concurrency Error. * @param maxConcurrencyError the given Maximum Concurrency Error. */ public void setMaxConcurrencyError(final double maxConcurrencyError) { bMaxConcurrencyError = maxConcurrencyError; } /** * Gets the Maximum Concurrency Error * @return the Maximum Concurrency Error */ public double getMaxConcurrencyError() { return bMaxConcurrencyError; } /** * Sets the Maximum Number of Local Threads. * This is used to set the size of the local concurrent buffers. * @param maxNumLocalThreads the given Maximum Number of Local Threads */ public void setMaxNumLocalThreads(final int maxNumLocalThreads) { bMaxNumLocalThreads = maxNumLocalThreads; } /** * Gets the Maximum Number of Local Threads. * @return the Maximum Number of Local Threads. */ public int getMaxNumLocalThreads() { return bMaxNumLocalThreads; } // BUILD FUNCTIONS /** * Returns an UpdateSketch with the current configuration of this Builder. * @return an UpdateSketch */ public UpdateSketch build() { return build(null); } /** * Returns an UpdateSketch with the current configuration of this Builder * with the specified backing destination Memory store. * Note: this cannot be used with the Alpha Family of sketches. * @param dstMem The destination Memory. * @return an UpdateSketch */ public UpdateSketch build(final WritableMemory dstMem) { UpdateSketch sketch = null; switch (bFam) { case ALPHA: { if (dstMem == null) { sketch = HeapAlphaSketch.newHeapInstance(bLgNomLongs, bSeed, bP, bRF); } else { throw new SketchesArgumentException("AlphaSketch cannot be made Direct to Memory."); } break; } case QUICKSELECT: { if (dstMem == null) { sketch = new HeapQuickSelectSketch(bLgNomLongs, bSeed, bP, bRF, false); } else { sketch = new DirectQuickSelectSketch( bLgNomLongs, bSeed, bP, bRF, bMemReqSvr, dstMem, false); } break; } default: { throw new SketchesArgumentException( "Given Family cannot be built as a Theta Sketch: " + bFam.toString()); } } return sketch; } /** * Returns an on-heap concurrent shared UpdateSketch with the current configuration of the * Builder. * *

The parameters unique to the shared concurrent sketch are: *

    *
  • Number of Pool Threads (default is 3)
  • *
  • Maximum Concurrency Error
  • *
* *

Key parameters that are in common with other Theta sketches: *

    *
  • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
  • *
* * @return an on-heap concurrent UpdateSketch with the current configuration of the Builder. */ public UpdateSketch buildShared() { return buildShared(null); } /** * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current * configuration of the Builder and the given destination WritableMemory. If the destination * WritableMemory is null, this defaults to an on-heap concurrent shared UpdateSketch. * *

The parameters unique to the shared concurrent sketch are: *

    *
  • Number of Pool Threads (default is 3)
  • *
  • Maximum Concurrency Error
  • *
* *

Key parameters that are in common with other Theta sketches: *

    *
  • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
  • *
  • Destination Writable Memory (if not null, returned sketch is Direct. Default is null.)
  • *
* * @param dstMem the given WritableMemory for Direct, otherwise null. * @return a concurrent UpdateSketch with the current configuration of the Builder * and the given destination WritableMemory. */ public UpdateSketch buildShared(final WritableMemory dstMem) { ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; if (dstMem == null) { return new ConcurrentHeapQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError); } else { return new ConcurrentDirectQuickSelectSketch(bLgNomLongs, bSeed, bMaxConcurrencyError, dstMem); } } /** * Returns a direct (potentially off-heap) concurrent shared UpdateSketch with the current * configuration of the Builder, the data from the given sketch, and the given destination * WritableMemory. If the destination WritableMemory is null, this defaults to an on-heap * concurrent shared UpdateSketch. * *

The parameters unique to the shared concurrent sketch are: *

    *
  • Number of Pool Threads (default is 3)
  • *
  • Maximum Concurrency Error
  • *
* *

Key parameters that are in common with other Theta sketches: *

    *
  • Nominal Entries or Log Nominal Entries (for the shared concurrent sketch)
  • *
  • Destination Writable Memory (if not null, returned sketch is Direct. Default is null.)
  • *
* * @param sketch a given UpdateSketch from which the data is used to initialize the returned * shared sketch. * @param dstMem the given WritableMemory for Direct, otherwise null. * @return a concurrent UpdateSketch with the current configuration of the Builder * and the given destination WritableMemory. */ public UpdateSketch buildSharedFromSketch(final UpdateSketch sketch, final WritableMemory dstMem) { ConcurrentPropagationService.NUM_POOL_THREADS = bNumPoolThreads; if (dstMem == null) { return new ConcurrentHeapQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError); } else { return new ConcurrentDirectQuickSelectSketch(sketch, bSeed, bMaxConcurrencyError, dstMem); } } /** * Returns a local, on-heap, concurrent UpdateSketch to be used as a per-thread local buffer * along with the given concurrent shared UpdateSketch and the current configuration of this * Builder. * *

The parameters unique to the local concurrent sketch are: *

    *
  • Local Nominal Entries or Local Log Nominal Entries
  • *
  • Propagate Ordered Compact flag
  • *
* * @param shared the concurrent shared sketch to be accessed via the concurrent local sketch. * @return an UpdateSketch to be used as a per-thread local buffer. */ public UpdateSketch buildLocal(final UpdateSketch shared) { if ((shared == null) || !(shared instanceof ConcurrentSharedThetaSketch)) { throw new SketchesStateException("The concurrent shared sketch must be built first."); } return new ConcurrentHeapThetaBuffer(bLocalLgNomLongs, bSeed, (ConcurrentSharedThetaSketch) shared, bPropagateOrderedCompact, bMaxNumLocalThreads); } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("UpdateSketchBuilder configuration:").append(LS); sb.append("LgK:").append(TAB).append(bLgNomLongs).append(LS); sb.append("K:").append(TAB).append(1 << bLgNomLongs).append(LS); sb.append("LgLocalK:").append(TAB).append(bLocalLgNomLongs).append(LS); sb.append("LocalK:").append(TAB).append(1 << bLocalLgNomLongs).append(LS); sb.append("Seed:").append(TAB).append(bSeed).append(LS); sb.append("p:").append(TAB).append(bP).append(LS); sb.append("ResizeFactor:").append(TAB).append(bRF).append(LS); sb.append("Family:").append(TAB).append(bFam).append(LS); final String mrsStr = bMemReqSvr.getClass().getSimpleName(); sb.append("MemoryRequestServer:").append(TAB).append(mrsStr).append(LS); sb.append("Propagate Ordered Compact").append(TAB).append(bPropagateOrderedCompact).append(LS); sb.append("NumPoolThreads").append(TAB).append(bNumPoolThreads).append(LS); sb.append("MaxConcurrencyError").append(TAB).append(bMaxConcurrencyError).append(LS); sb.append("MaxNumLocalThreads").append(TAB).append(bMaxNumLocalThreads).append(LS); return sb.toString(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy