org.apache.datasketches.theta.CompactSketch Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datasketches-java Show documentation
Show all versions of datasketches-java Show documentation
Core sketch algorithms used alone and by other Java repositories in the DataSketches library.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.theta;
import static org.apache.datasketches.theta.PreambleUtil.SER_VER;
import static org.apache.datasketches.theta.PreambleUtil.insertCurCount;
import static org.apache.datasketches.theta.PreambleUtil.insertFamilyID;
import static org.apache.datasketches.theta.PreambleUtil.insertFlags;
import static org.apache.datasketches.theta.PreambleUtil.insertP;
import static org.apache.datasketches.theta.PreambleUtil.insertPreLongs;
import static org.apache.datasketches.theta.PreambleUtil.insertSeedHash;
import static org.apache.datasketches.theta.PreambleUtil.insertSerVer;
import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong;
import java.util.Arrays;
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
* The parent class of all the CompactSketches. CompactSketches are never created directly.
* They are created as a result of the compact() method of an UpdateSketch or as a result of a
* getResult() of a SetOperation.
*
* A CompactSketch is the simplest form of a Theta Sketch. It consists of a compact list
* (i.e., no intervening spaces) of hash values, which may be ordered or not, a value for theta
* and a seed hash. A CompactSketch is read-only,
* and the space required when stored is only the space required for the hash values and 8 to 24
* bytes of preamble. An empty CompactSketch consumes only 8 bytes.
*
* @author Lee Rhodes
*/
public abstract class CompactSketch extends Sketch {
//Sketch
@Override
public CompactSketch compact() { return this; }
@Override
public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
return this;
}
@Override
public Family getFamily() {
return Family.COMPACT;
}
@Override
public boolean isCompact() {
return true;
}
//restricted methods
/**
* Compact the given array. The source cache can be a hash table with interstitial zeros or
* "dirty" values.
* @param srcCache anything
* @param curCount must be correct
* @param thetaLong The correct
* thetaLong.
* @param dstOrdered true if output array must be sorted
* @return the compacted array
*/
static final long[] compactCache(final long[] srcCache, final int curCount,
final long thetaLong, final boolean dstOrdered) {
if (curCount == 0) {
return new long[0];
}
final long[] cacheOut = new long[curCount];
final int len = srcCache.length;
int j = 0;
for (int i = 0; i < len; i++) { //scan the full srcCache
final long v = srcCache[i];
if ((v <= 0L) || (v >= thetaLong) ) { continue; } //ignoring zeros or dirty values
cacheOut[j++] = v;
}
assert curCount == j;
if (dstOrdered && (curCount > 1)) {
Arrays.sort(cacheOut);
}
return cacheOut;
}
/**
* Compact first 2^lgArrLongs of given array
* @param srcCache anything
* @param lgArrLongs The correct
* lgArrLongs.
* @param curCount must be correct
* @param thetaLong The correct
* thetaLong.
* @param dstOrdered true if output array must be sorted
* @return the compacted array
*/
static final long[] compactCachePart(final long[] srcCache, final int lgArrLongs,
final int curCount, final long thetaLong, final boolean dstOrdered) {
if (curCount == 0) {
return new long[0];
}
final long[] cacheOut = new long[curCount];
final int len = 1 << lgArrLongs;
int j = 0;
for (int i = 0; i < len; i++) {
final long v = srcCache[i];
if ((v <= 0L) || (v >= thetaLong) ) { continue; }
cacheOut[j++] = v;
}
assert curCount == j;
if (dstOrdered) {
Arrays.sort(cacheOut);
}
return cacheOut;
}
//compactCache and dstMem must be valid
static final Memory loadCompactMemory(final long[] compactCache, final short seedHash,
final int curCount, final long thetaLong, final WritableMemory dstMem,
final byte flags, final int preLongs) {
assert (dstMem != null) && (compactCache != null);
final int outLongs = preLongs + curCount;
final int outBytes = outLongs << 3;
final int dstBytes = (int) dstMem.getCapacity();
if (outBytes > dstBytes) {
throw new SketchesArgumentException("Insufficient Memory: " + dstBytes
+ ", Need: " + outBytes);
}
final byte famID = (byte) Family.COMPACT.getID();
insertPreLongs(dstMem, preLongs); //RF not used = 0
insertSerVer(dstMem, SER_VER);
insertFamilyID(dstMem, famID);
//ignore lgNomLongs, lgArrLongs bytes for compact sketches
insertFlags(dstMem, flags);
insertSeedHash(dstMem, seedHash);
if ((preLongs == 1) && (curCount == 1)) { //singleItem
dstMem.putLong(8, compactCache[0]);
return dstMem;
}
if (preLongs > 1) {
insertCurCount(dstMem, curCount);
insertP(dstMem, (float) 1.0);
}
if (preLongs > 2) {
insertThetaLong(dstMem, thetaLong);
}
if (curCount > 0) {
dstMem.putLongArray(preLongs << 3, compactCache, 0, curCount);
}
return dstMem;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy