org.apache.datasketches.theta.Union Maven / Gradle / Ivy
Show all versions of datasketches-java Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.theta;
import org.apache.datasketches.common.Family;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
* Compute the union of two or more theta sketches.
* A new instance represents an empty set.
*
* @author Lee Rhodes
*/
public abstract class Union extends SetOperation {
/**
* Returns the number of storage bytes required for this union in its current state.
*
* @return the number of storage bytes required for this union in its current state.
*/
public abstract int getCurrentBytes();
@Override
public Family getFamily() {
return Family.UNION;
}
/**
* Returns the maximum required storage bytes for this union.
* @return the maximum required storage bytes for this union.
*/
public abstract int getMaxUnionBytes();
/**
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
* This does not disturb the underlying data structure of the union.
* Therefore, it is OK to continue updating the union after this operation.
* @return the result of this operation as an ordered CompactSketch on the Java heap
*/
public abstract CompactSketch getResult();
/**
* Gets the result of this operation as a CompactSketch of the chosen form.
* This does not disturb the underlying data structure of the union.
* Therefore, it is OK to continue updating the union after this operation.
*
* @param dstOrdered
* See Destination Ordered
*
* @param dstMem
* See Destination Memory.
*
* @return the result of this operation as a CompactSketch of the chosen form
*/
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem);
/**
* Resets this Union. The seed remains intact, everything else reverts back to its virgin state.
*/
public abstract void reset();
/**
* Returns a byte array image of this Union object
* @return a byte array image of this Union object
*/
public abstract byte[] toByteArray();
/**
* This implements a stateless, pair-wise union operation. The returned sketch will be cut back to
* the smaller of the two k values if required.
*
* Nulls and empty sketches are ignored.
*
* @param sketchA The first argument
* @param sketchB The second argument
* @return the result ordered CompactSketch on the heap.
*/
public CompactSketch union(final Sketch sketchA, final Sketch sketchB) {
return union(sketchA, sketchB, true, null);
}
/**
* This implements a stateless, pair-wise union operation. The returned sketch will be cut back to
* k if required, similar to the regular Union operation.
*
* Nulls and empty sketches are ignored.
*
* @param sketchA The first argument
* @param sketchB The second argument
* @param dstOrdered If true, the returned CompactSketch will be ordered.
* @param dstMem If not null, the returned CompactSketch will be placed in this WritableMemory.
* @return the result CompactSketch.
*/
public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstOrdered,
WritableMemory dstMem);
/**
* Perform a Union operation with this union and the given on-heap sketch of the Theta Family.
* This method is not valid for the older SetSketch, which was prior to Open Source (August, 2015).
*
* This method can be repeatedly called.
*
*
Nulls and empty sketches are ignored.
*
* @param sketchIn The incoming sketch.
*/
public abstract void union(Sketch sketchIn);
/**
* Perform a Union operation with this union and the given Memory image of any sketch of the
* Theta Family. The input image may be from earlier versions of the Theta Compact Sketch,
* called the SetSketch (circa 2014), which was prior to Open Source and are compact and ordered.
*
* This method can be repeatedly called.
*
*
Nulls and empty sketches are ignored.
*
* @param mem Memory image of sketch to be merged
*/
public abstract void union(Memory mem);
/**
* Update this union with the given long data item.
*
* @param datum The given long datum.
*/
public abstract void update(long datum);
/**
* Update this union with the given double (or float) data item.
* The double will be converted to a long using Double.doubleToLongBits(datum),
* which normalizes all NaN values to a single NaN representation.
* Plus and minus zero will be normalized to plus zero.
* Each of the special floating-point values NaN and +/- Infinity are treated as distinct.
*
* @param datum The given double datum.
*/
public abstract void update(double datum);
/**
* Update this union with the with the given String data item.
* The string is converted to a byte array using UTF8 encoding.
* If the string is null or empty no update attempt is made and the method returns.
*
* Note: this will not produce the same output hash values as the {@link #update(char[])}
* method and will generally be a little slower depending on the complexity of the UTF8 encoding.
*
*
* Note: this is not a Sketch Union operation. This treats the given string as a data item.
*
* @param datum The given String.
*/
public abstract void update(String datum);
/**
* Update this union with the given byte array item.
* If the byte array is null or empty no update attempt is made and the method returns.
*
* Note: this is not a Sketch Union operation. This treats the given byte array as a data
* item.
*
* @param data The given byte array.
*/
public abstract void update(byte[] data);
/**
* Update this union with the given integer array item.
* If the integer array is null or empty no update attempt is made and the method returns.
*
* Note: this is not a Sketch Union operation. This treats the given integer array as a data
* item.
*
* @param data The given int array.
*/
public abstract void update(int[] data);
/**
* Update this union with the given char array item.
* If the char array is null or empty no update attempt is made and the method returns.
*
* Note: this will not produce the same output hash values as the {@link #update(String)}
* method but will be a little faster as it avoids the complexity of the UTF8 encoding.
*
* Note: this is not a Sketch Union operation. This treats the given char array as a data
* item.
*
* @param data The given char array.
*/
public abstract void update(char[] data);
/**
* Update this union with the given long array item.
* If the long array is null or empty no update attempt is made and the method returns.
*
* Note: this is not a Sketch Union operation. This treats the given char array as a data
* item.
*
* @param data The given long array.
*/
public abstract void update(long[] data);
}