All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.theta.CompactSketch Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 7.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.theta;

import static org.apache.datasketches.theta.PreambleUtil.SER_VER;
import static org.apache.datasketches.theta.PreambleUtil.insertCurCount;
import static org.apache.datasketches.theta.PreambleUtil.insertFamilyID;
import static org.apache.datasketches.theta.PreambleUtil.insertFlags;
import static org.apache.datasketches.theta.PreambleUtil.insertP;
import static org.apache.datasketches.theta.PreambleUtil.insertPreLongs;
import static org.apache.datasketches.theta.PreambleUtil.insertSeedHash;
import static org.apache.datasketches.theta.PreambleUtil.insertSerVer;
import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong;

import java.util.Arrays;

import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;

/**
 * The parent class of all the CompactSketches. CompactSketches are never created directly.
 * They are created as a result of the compact() method of an UpdateSketch or as a result of a
 * getResult() of a SetOperation.
 *
 * 

A CompactSketch is the simplest form of a Theta Sketch. It consists of a compact list * (i.e., no intervening spaces) of hash values, which may be ordered or not, a value for theta * and a seed hash. A CompactSketch is read-only, * and the space required when stored is only the space required for the hash values and 8 to 24 * bytes of preamble. An empty CompactSketch consumes only 8 bytes.

* * @author Lee Rhodes */ public abstract class CompactSketch extends Sketch { //Sketch @Override public CompactSketch compact() { return this; } @Override public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) { return this; } @Override public Family getFamily() { return Family.COMPACT; } @Override public boolean isCompact() { return true; } //restricted methods /** * Compact the given array. The source cache can be a hash table with interstitial zeros or * "dirty" values. * @param srcCache anything * @param curCount must be correct * @param thetaLong The correct * thetaLong. * @param dstOrdered true if output array must be sorted * @return the compacted array */ static final long[] compactCache(final long[] srcCache, final int curCount, final long thetaLong, final boolean dstOrdered) { if (curCount == 0) { return new long[0]; } final long[] cacheOut = new long[curCount]; final int len = srcCache.length; int j = 0; for (int i = 0; i < len; i++) { //scan the full srcCache final long v = srcCache[i]; if ((v <= 0L) || (v >= thetaLong) ) { continue; } //ignoring zeros or dirty values cacheOut[j++] = v; } assert curCount == j; if (dstOrdered && (curCount > 1)) { Arrays.sort(cacheOut); } return cacheOut; } /** * Compact first 2^lgArrLongs of given array * @param srcCache anything * @param lgArrLongs The correct * lgArrLongs. * @param curCount must be correct * @param thetaLong The correct * thetaLong. * @param dstOrdered true if output array must be sorted * @return the compacted array */ static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs, final int curCount, final long thetaLong, final boolean dstOrdered) { if (curCount == 0) { return new long[0]; } final long[] cacheOut = new long[curCount]; final int len = 1 << lgArrLongs; int j = 0; for (int i = 0; i < len; i++) { final long v = srcCache[i]; if ((v <= 0L) || (v >= thetaLong) ) { continue; } cacheOut[j++] = v; } assert curCount == j; if (dstOrdered) { Arrays.sort(cacheOut); } return cacheOut; } //compactCache and dstMem must be valid static final Memory loadCompactMemory(final long[] compactCache, final short seedHash, final int curCount, final long thetaLong, final WritableMemory dstMem, final byte flags, final int preLongs) { assert (dstMem != null) && (compactCache != null); final int outLongs = preLongs + curCount; final int outBytes = outLongs << 3; final int dstBytes = (int) dstMem.getCapacity(); if (outBytes > dstBytes) { throw new SketchesArgumentException("Insufficient Memory: " + dstBytes + ", Need: " + outBytes); } final byte famID = (byte) Family.COMPACT.getID(); insertPreLongs(dstMem, preLongs); //RF not used = 0 insertSerVer(dstMem, SER_VER); insertFamilyID(dstMem, famID); //ignore lgNomLongs, lgArrLongs bytes for compact sketches insertFlags(dstMem, flags); insertSeedHash(dstMem, seedHash); if ((preLongs == 1) && (curCount == 1)) { //singleItem dstMem.putLong(8, compactCache[0]); return dstMem; } if (preLongs > 1) { insertCurCount(dstMem, curCount); insertP(dstMem, (float) 1.0); } if (preLongs > 2) { insertThetaLong(dstMem, thetaLong); } if (curCount > 0) { dstMem.putLongArray(preLongs << 3, compactCache, 0, curCount); } return dstMem; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy