All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.quantiles.ItemsUtil Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.quantiles;

import static org.apache.datasketches.Util.LS;

import java.util.Arrays;
import java.util.Comparator;

import org.apache.datasketches.SketchesArgumentException;

/**
 * Utility class for generic quantiles sketch.
 *
 * 

This class contains a highly specialized sort called blockyTandemMergeSort(). * It also contains methods that are used while building histograms and other common * functions.

* * @author Kevin Lang * @author Alexander Saydakov */ final class ItemsUtil { private ItemsUtil() {} static final int ITEMS_SER_VER = 3; static final int PRIOR_ITEMS_SER_VER = 2; /** * Check the validity of the given serialization version * @param serVer the given serialization version */ static void checkItemsSerVer(final int serVer) { if ((serVer == ITEMS_SER_VER) || (serVer == PRIOR_ITEMS_SER_VER)) { return; } throw new SketchesArgumentException( "Possible corruption: Invalid Serialization Version: " + serVer); } /** * Checks the sequential validity of the given array of values. * They must be unique, monotonically increasing and not null. * @param the data type * @param values given array of values * @param comparator the comparator for data type T */ static final void validateValues(final T[] values, final Comparator comparator) { final int lenM1 = values.length - 1; for (int j = 0; j < lenM1; j++) { if ((values[j] != null) && (values[j + 1] != null) && (comparator.compare(values[j], values[j + 1]) < 0)) { continue; } throw new SketchesArgumentException( "Values must be unique, monotonically increasing and not null."); } } /** * Called when the base buffer has just acquired 2*k elements. * @param the data type * @param sketch the given quantiles sketch */ @SuppressWarnings("unchecked") static void processFullBaseBuffer(final ItemsSketch sketch) { final int bbCount = sketch.getBaseBufferCount(); final long n = sketch.getN(); assert bbCount == (2 * sketch.getK()); // internal consistency check // make sure there will be enough levels for the propagation ItemsUpdateImpl.maybeGrowLevels(sketch, n); // important: n_ was incremented by update before we got here // this aliasing is a bit dangerous; notice that we did it after the possible resizing final Object[] baseBuffer = sketch.getCombinedBuffer(); Arrays.sort((T[]) baseBuffer, 0, bbCount, sketch.getComparator()); ItemsUpdateImpl.inPlacePropagateCarry( 0, null, 0, // this null is okay (T[]) baseBuffer, 0, true, sketch); sketch.baseBufferCount_ = 0; Arrays.fill(baseBuffer, 0, 2 * sketch.getK(), null); // to release the discarded objects assert (n / (2L * sketch.getK())) == sketch.getBitPattern(); // internal consistency check } static String toString(final boolean sketchSummary, final boolean dataDetail, final ItemsSketch sketch) { final StringBuilder sb = new StringBuilder(); final String thisSimpleName = sketch.getClass().getSimpleName(); final int bbCount = sketch.getBaseBufferCount(); final int combAllocCount = sketch.getCombinedBufferAllocatedCount(); final int k = sketch.getK(); final long bitPattern = sketch.getBitPattern(); if (dataDetail) { sb.append(Util.LS).append("### ").append(thisSimpleName).append(" DATA DETAIL: ").append(Util.LS); final Object[] items = sketch.getCombinedBuffer(); //output the base buffer sb.append(" BaseBuffer :"); if (bbCount > 0) { for (int i = 0; i < bbCount; i++) { sb.append(' ').append(items[i]); } } sb.append(Util.LS); //output all the levels final int numItems = combAllocCount; if (numItems > (2 * k)) { sb.append(" Valid | Level"); for (int j = 2 * k; j < numItems; j++) { //output level data starting at 2K if ((j % k) == 0) { //start output of new level final int levelNum = j > (2 * k) ? (j - (2 * k)) / k : 0; final String validLvl = ((1L << levelNum) & bitPattern) > 0 ? " T " : " F "; final String lvl = String.format("%5d", levelNum); sb.append(Util.LS).append(" ").append(validLvl).append(" ").append(lvl).append(":"); } sb.append(' ').append(items[j]); } sb.append(Util.LS); } sb.append("### END DATA DETAIL").append(Util.LS); } if (sketchSummary) { final long n = sketch.getN(); final String nStr = String.format("%,d", n); final int numLevels = Util.computeNumLevelsNeeded(k, n); final String bufCntStr = String.format("%,d", combAllocCount); final int preBytes = sketch.isEmpty() ? Long.BYTES : 2 * Long.BYTES; final double epsPmf = Util.getNormalizedRankError(k, true); final String epsPmfPctStr = String.format("%.3f%%", epsPmf * 100.0); final double eps = Util.getNormalizedRankError(k, false); final String epsPctStr = String.format("%.3f%%", eps * 100.0); final int numSamples = sketch.getRetainedItems(); final String numSampStr = String.format("%,d", numSamples); sb.append(Util.LS).append("### ").append(thisSimpleName).append(" SUMMARY: ").append(Util.LS); sb.append(" K : ").append(k).append(Util.LS); sb.append(" N : ").append(nStr).append(Util.LS); sb.append(" BaseBufferCount : ").append(bbCount).append(Util.LS); sb.append(" CombinedBufferAllocatedCount : ").append(bufCntStr).append(Util.LS); sb.append(" Total Levels : ").append(numLevels).append(Util.LS); sb.append(" Valid Levels : ").append(Util.computeValidLevels(bitPattern)) .append(Util.LS); sb.append(" Level Bit Pattern : ").append(Long.toBinaryString(bitPattern)) .append(Util.LS); sb.append(" Valid Samples : ").append(numSampStr).append(Util.LS); sb.append(" Preamble Bytes : ").append(preBytes).append(Util.LS); sb.append(" Normalized Rank Error : ").append(epsPctStr).append(LS); sb.append(" Normalized Rank Error (PMF) : ").append(epsPmfPctStr).append(LS); sb.append(" Min Value : ").append(sketch.getMinValue()).append(Util.LS); sb.append(" Max Value : ").append(sketch.getMaxValue()).append(Util.LS); sb.append("### END SKETCH SUMMARY").append(Util.LS); } return sb.toString(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy