All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.clearspring.analytics.stream.quantile.GroupTree Maven / Gradle / Ivy

Go to download

A library for summarizing data in streams for which it is infeasible to store all events

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.clearspring.analytics.stream.quantile;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Iterator;

import com.clearspring.analytics.util.AbstractIterator;
import com.clearspring.analytics.util.Preconditions;

/**
 * A tree containing TDigest.Group.  This adds to the normal NavigableSet the
 * ability to sum up the size of elements to the left of a particular group.
 */
public class GroupTree implements Iterable {

    private int count;
    private int size;
    private int depth;
    private TDigest.Group leaf;
    private GroupTree left, right;

    public GroupTree() {
        count = size = depth = 0;
        leaf = null;
        left = right = null;
    }

    public GroupTree(TDigest.Group leaf) {
        size = depth = 1;
        this.leaf = leaf;
        count = leaf.count();
        left = right = null;
    }

    public GroupTree(GroupTree left, GroupTree right) {
        this.left = left;
        this.right = right;
        count = left.count + right.count;
        size = left.size + right.size;
        rebalance();
        leaf = this.right.first();
    }

    public void add(TDigest.Group group) {
        if (size == 0) {
            leaf = group;
            depth = 1;
            count = group.count();
            size = 1;
            return;
        } else if (size == 1) {
            int order = group.compareTo(leaf);
            if (order < 0) {
                left = new GroupTree(group);
                right = new GroupTree(leaf);
            } else if (order > 0) {
                left = new GroupTree(leaf);
                right = new GroupTree(group);
                leaf = group;
            }
        } else if (group.compareTo(leaf) < 0) {
            left.add(group);
        } else {
            right.add(group);
        }
        count += group.count();
        size++;
        depth = Math.max(left.depth, right.depth) + 1;

        rebalance();
    }

    private void rebalance() {
        int l = left.depth();
        int r = right.depth();
        if (l > r + 1) {
            if (left.left.depth() > left.right.depth()) {
                rotate(left.left.left, left.left.right, left.right, right);
            } else {
                rotate(left.left, left.right.left, left.right.right, right);
            }
        } else if (r > l + 1) {
            if (right.left.depth() > right.right.depth()) {
                rotate(left, right.left.left, right.left.right, right.right);
            } else {
                rotate(left, right.left, right.right.left, right.right.right);
            }
        } else {
            depth = Math.max(left.depth(), right.depth()) + 1;
        }
    }

    private void rotate(GroupTree a, GroupTree b, GroupTree c, GroupTree d) {
        left = new GroupTree(a, b);
        right = new GroupTree(c, d);
        count = left.count + right.count;
        size = left.size + right.size;
        depth = Math.max(left.depth(), right.depth()) + 1;
        leaf = right.first();
    }

    private int depth() {
        return depth;
    }

    public int size() {
        return size;
    }

    /**
     * @return the number of items strictly before the current element
     */
    public int headCount(TDigest.Group base) {
        if (size == 0) {
            return 0;
        } else if (left == null) {
            return leaf.compareTo(base) < 0 ? 1 : 0;
        } else {
            if (base.compareTo(leaf) < 0) {
                return left.headCount(base);
            } else {
                return left.size + right.headCount(base);
            }
        }
    }

    /**
     * @return the sum of the size() function for all elements strictly before the current element.
     */
    public int headSum(TDigest.Group base) {
        if (size == 0) {
            return 0;
        } else if (left == null) {
            return leaf.compareTo(base) < 0 ? count : 0;
        } else {
            if (base.compareTo(leaf) <= 0) {
                return left.headSum(base);
            } else {
                return left.count + right.headSum(base);
            }
        }
    }

    /**
     * @return the first Group in this set
     */
    public TDigest.Group first() {
        Preconditions.checkState(size > 0, "No first element of empty set");
        if (left == null) {
            return leaf;
        } else {
            return left.first();
        }
    }

    /**
     * Iteratres through all groups in the tree.
     */
    public Iterator iterator() {
        return iterator(null);
    }

    /**
     * Iterates through all of the Groups in this tree in ascending order of means
     *
     * @param start The place to start this subset.  Remember that Groups are ordered by mean *and* id.
     * @return An iterator that goes through the groups in order of mean and id starting at or after the
     * specified Group.
     */
    private Iterator iterator(final TDigest.Group start) {
        return new AbstractIterator() {
            {
                stack = new ArrayDeque();
                push(GroupTree.this, start);
            }

            Deque stack;

            // recurses down to the leaf that is >= start
            // pending right hand branches on the way are put on the stack
            private void push(GroupTree z, TDigest.Group start) {
                while (z.left != null) {
                    if (start == null || start.compareTo(z.leaf) < 0) {
                        // remember we will have to process the right hand branch later
                        stack.push(z.right);
                        // note that there is no guarantee that z.left has any good data
                        z = z.left;
                    } else {
                        // if the left hand branch doesn't contain start, then no push
                        z = z.right;
                    }
                }
                // put the leaf value on the stack if it is valid
                if (start == null || z.leaf.compareTo(start) >= 0) {
                    stack.push(z);
                }
            }

            @Override
            protected TDigest.Group computeNext() {
                GroupTree r = stack.poll();
                while (r != null && r.left != null) {
                    // unpack r onto the stack
                    push(r, start);
                    r = stack.poll();
                }

                // at this point, r == null or r.left == null
                // if r == null, stack is empty and we are done
                // if r != null, then r.left != null and we have a result
                if (r != null) {
                    return r.leaf;
                }
                return endOfData();
            }
        };
    }

    public void remove(TDigest.Group base) {
        Preconditions.checkState(size > 0, "Cannot remove from empty set");
        if (size == 1) {
            Preconditions.checkArgument(base.compareTo(leaf) == 0, "Element %s not found", base);
            count = size = 0;
            leaf = null;
        } else {
            if (base.compareTo(leaf) < 0) {
                if (left.size > 1) {
                    left.remove(base);
                    count -= base.count();
                    size--;
                    rebalance();
                } else {
                    size = right.size;
                    count = right.count;
                    depth = right.depth;
                    leaf = right.leaf;
                    left = right.left;
                    right = right.right;
                }
            } else {
                if (right.size > 1) {
                    right.remove(base);
                    leaf = right.first();
                    count -= base.count();
                    size--;
                    rebalance();
                } else {
                    size = left.size;
                    count = left.count;
                    depth = left.depth;
                    leaf = left.leaf;
                    right = left.right;
                    left = left.left;
                }
            }
        }
    }

    /**
     * @return the largest element less than or equal to base
     */
    public TDigest.Group floor(TDigest.Group base) {
        if (size == 0) {
            return null;
        } else {
            if (size == 1) {
                return base.compareTo(leaf) >= 0 ? leaf : null;
            } else {
                if (base.compareTo(leaf) < 0) {
                    return left.floor(base);
                } else {
                    TDigest.Group floor = right.floor(base);
                    if (floor == null) {
                        floor = left.last();
                    }
                    return floor;
                }
            }
        }
    }

    public TDigest.Group last() {
        Preconditions.checkState(size > 0, "Cannot find last element of empty set");
        if (size == 1) {
            return leaf;
        } else {
            return right.last();
        }
    }

    /**
     * @return the smallest element greater than or equal to base.
     */
    public TDigest.Group ceiling(TDigest.Group base) {
        if (size == 0) {
            return null;
        } else if (size == 1) {
            return base.compareTo(leaf) <= 0 ? leaf : null;
        } else {
            if (base.compareTo(leaf) < 0) {
                TDigest.Group r = left.ceiling(base);
                if (r == null) {
                    r = right.first();
                }
                return r;
            } else {
                return right.ceiling(base);
            }
        }
    }

    /**
     * @return the subset of elements equal to or greater than base.
     */
    public Iterable tailSet(final TDigest.Group start) {
        return new Iterable() {
            @Override
            public Iterator iterator() {
                return GroupTree.this.iterator(start);
            }
        };
    }

    public int sum() {
        return count;
    }

    public void checkBalance() {
        if (left != null) {
            Preconditions.checkState(Math.abs(left.depth() - right.depth()) < 2, "Imbalanced");
            int l = left.depth();
            int r = right.depth();
            Preconditions.checkState(depth == Math.max(l, r) + 1, "Depth doesn't match children");
            Preconditions.checkState(size == left.size + right.size, "Sizes don't match children");
            Preconditions.checkState(count == left.count + right.count, "Counts don't match children");
            Preconditions.checkState(leaf.compareTo(right.first()) == 0, "Split is wrong %.5d != %.5d or %d != %d", leaf.mean(), right.first().mean(), leaf.id(), right.first().id());
            left.checkBalance();
            right.checkBalance();
        }
    }

    public void print(int depth) {
        for (int i = 0; i < depth; i++) {
            System.out.printf("| ");
        }
        int imbalance = Math.abs((left != null ? left.depth : 1) - (right != null ? right.depth : 1));
        System.out.printf("%s%s, %d, %d, %d\n", (imbalance > 1 ? "* " : "") + (right != null && leaf.compareTo(right.first()) != 0 ? "+ " : ""), leaf, size, count, this.depth);
        if (left != null) {
            left.print(depth + 1);
            right.print(depth + 1);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy