All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opensearch.search.aggregations.metrics.HyperLogLogPlusPlusSparse Maven / Gradle / Ivy

There is a newer version: 2.18.0
Show newest version
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.search.aggregations.metrics;

import org.opensearch.common.lease.Releasable;
import org.opensearch.common.lease.Releasables;
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.IntArray;

/**
 * AbstractHyperLogLogPlusPlus instance that only supports linear counting. The maximum number of hashes supported
 * by the structure is determined at construction time.
 * 

* This structure expects all the added values to be distinct and therefore there are no checks * if an element has been previously added. * * @opensearch.internal */ final class HyperLogLogPlusPlusSparse extends AbstractHyperLogLogPlusPlus implements Releasable { private final LinearCounting lc; /** * Create an sparse HLL++ algorithm where capacity is the maximum number of hashes this structure can hold * per bucket. */ HyperLogLogPlusPlusSparse(int precision, BigArrays bigArrays, int capacity, int initialSize) { super(precision); this.lc = new LinearCounting(precision, bigArrays, capacity, initialSize); } @Override public long maxOrd() { return lc.sizes.size(); } @Override public long cardinality(long bucketOrd) { return lc.cardinality(bucketOrd); } @Override protected boolean getAlgorithm(long bucketOrd) { return LINEAR_COUNTING; } @Override protected AbstractLinearCounting.HashesIterator getLinearCounting(long bucketOrd) { return lc.values(bucketOrd); } @Override protected AbstractHyperLogLog.RunLenIterator getHyperLogLog(long bucketOrd) { throw new IllegalArgumentException("Implementation does not support HLL structures"); } @Override public void collect(long bucket, long hash) { lc.collect(bucket, hash); } @Override public void close() { Releasables.close(lc); } protected void addEncoded(long bucket, int encoded) { lc.addEncoded(bucket, encoded); } /** * The plus plus * * @opensearch.internal */ private static class LinearCounting extends AbstractLinearCounting implements Releasable { private final int capacity; private final BigArrays bigArrays; private final LinearCountingIterator iterator; // We are actually using HyperLogLog's runLens array but interpreting it as a hash set for linear counting. // Number of elements stored. private IntArray values; private IntArray sizes; LinearCounting(int p, BigArrays bigArrays, int capacity, int initialSize) { super(p); this.bigArrays = bigArrays; this.capacity = capacity; IntArray values = null; IntArray sizes = null; boolean success = false; try { values = bigArrays.newIntArray(initialSize * capacity); sizes = bigArrays.newIntArray(initialSize); success = true; } finally { if (success == false) { Releasables.close(values, sizes); } } this.values = values; this.sizes = sizes; iterator = new LinearCountingIterator(this, capacity); } @Override protected int addEncoded(long bucketOrd, int encoded) { assert encoded != 0; return set(bucketOrd, encoded); } @Override protected int size(long bucketOrd) { if (bucketOrd >= sizes.size()) { return 0; } final int size = sizes.get(bucketOrd); assert size == recomputedSize(bucketOrd); return size; } @Override protected HashesIterator values(long bucketOrd) { iterator.reset(bucketOrd, size(bucketOrd)); return iterator; } private long index(long bucketOrd, int index) { return (bucketOrd * capacity) + index; } private int get(long bucketOrd, int index) { long globalIndex = index(bucketOrd, index); if (values.size() < globalIndex) { return 0; } return values.get(globalIndex); } private int set(long bucketOrd, int value) { int size = size(bucketOrd); if (size == 0) { sizes = bigArrays.grow(sizes, bucketOrd + 1); values = bigArrays.grow(values, (bucketOrd + 1) * capacity); } values.set(index(bucketOrd, size), value); return sizes.increment(bucketOrd, 1); } private int recomputedSize(long bucketOrd) { for (int i = 0; i < capacity; ++i) { final int v = get(bucketOrd, i); if (v == 0) { return i; } } return capacity; } @Override public void close() { Releasables.close(values, sizes); } } /** * The plus plus iterator * * @opensearch.internal */ private static class LinearCountingIterator implements AbstractLinearCounting.HashesIterator { private final LinearCounting lc; private final int capacity; long start; long end; private int value, size; private long pos; LinearCountingIterator(LinearCounting lc, int capacity) { this.lc = lc; this.capacity = capacity; } void reset(long bucketOrd, int size) { this.start = bucketOrd * capacity; this.size = size; this.end = start + size; this.pos = start; } @Override public int size() { return size; } @Override public boolean next() { if (pos < end) { value = lc.values.get(pos++); return true; } return false; } @Override public int value() { return value; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy