All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.anomaly.IsolationTree Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.anomaly;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import smile.math.MathEx;
import smile.stat.distribution.GaussianDistribution;

/**
 * Isolation tree.
 *
 * @author Haifeng Li
 */
public class IsolationTree implements Serializable {
    /**
     * Isolation tree node.
     */
    class Node implements Serializable {
        /** The adjusted depth of node in the tree. */
        final double depth;
        /** The normal vector of random hyperplane, uniformly over the unit N-Sphere. */
        final double[] slope;
        /**
         * The intercept point, draw from a uniform distribution
         * over the range of values present at each branching point
         */
        final double[] intercept;
        /** The dot product of slope and intercept. */
        final double bias;
        /** The left child branch. */
        final Node left;
        /** The right child branch. */
        final Node right;

        /**
         * Leaf node constructor.
         * @param depth the adjusted depth of node in the tree.
         */
        Node(double depth) {
            this(depth, null, null, 0.0, null, null);
        }

        /**
         * Constructor.
         * @param depth the adjusted depth of node in the tree.
         * @param slope the normal vector of random hyperplane.
         * @param intercept the intercept point.
         * @param bias the dot product of slope and intercept.
         * @param left the left child branch.
         * @param right the right child branch.
         */
        Node(double depth, double[] slope, double[] intercept, double bias, Node left, Node right) {
            this.depth = depth;
            this.slope = slope;
            this.intercept = intercept;
            this.bias = bias;
            this.left = left;
            this.right = right;
        }

        /**
         * Returns the path length from the root to the leaf node.
         * @param x the sample.
         * @return the path length.
         */
        public double path(double[] x) {
            if (left == null && right == null) {
                return depth;
            } else {
                double dot = MathEx.dot(x, slope);
                if (dot < bias) {
                    return left.path(x);
                } else {
                    return right.path(x);
                }
            }
        }
    }

    /**
     * Tree root node.
     */
    private final Node root;

    /**
     * Constructor.
     *
     * @param data the training data.
     * @param maxDepth the maximum depth of the tree.
     * @param extensionLevel the extension level.
     */
    public IsolationTree(List data, int maxDepth, int extensionLevel) {
        root = buildNode(data, maxDepth, extensionLevel, 0);
    }

    /**
     * Returns the path length from the root to the leaf node.
     * @param x the sample.
     * @return the path length.
     */
    public double path(double[] x) {
        return root.path(x);
    }

    /**
     * Builds an isolation tree node.
     * @param data the training data.
     * @param maxDepth the maximum depth of the tree.
     * @param extensionLevel the extension level.
     * @param depth the node depth in the tree.
     * @return the node.
     */
    private Node buildNode(List data, int maxDepth, int extensionLevel, int depth) {
        if (depth >= maxDepth || data.size() <= 1) {
            double adjustedDepth = depth;
            if (data.size() > 1) {
                adjustedDepth += IsolationForest.factor(data.size());
            }
            return new Node(adjustedDepth);
        } else {
            double[] min = data.get(0).clone();
            double[] max = data.get(0).clone();
            int p = min.length;
            for (double[] x : data) {
                for (int i = 0; i < p; i++) {
                    if (x[i] < min[i]) min[i] = x[i];
                    else if (x[i] > max[i]) max[i] = x[i];
                }
            }

            // Pick a random point on splitting hyperplane
            double[] intercept = new double[p];
            for (int i = 0; i < p; i++) {
                intercept[i] = MathEx.random(min[i], max[i]);
            }

            // Pick a random normal vector according to specified extension level
            GaussianDistribution gauss = GaussianDistribution.getInstance();
            double[] slope = new double[p];
            for (int i = 0; i < p; i++) {
                slope[i] = gauss.rand();
            }

            int[] index = MathEx.permutate(p);
            for (int i = 0; i < p - extensionLevel - 1; i++) {
                slope[index[i]] = 0.0;
            }

            double bias = MathEx.dot(slope, intercept);
            ArrayList leftData = new ArrayList<>();
            ArrayList rightData = new ArrayList<>();
            for (double[] x : data) {
                double dot = MathEx.dot(x, slope);
                if (dot < bias) {
                    leftData.add(x);
                } else {
                    rightData.add(x);
                }
            }

            Node left = buildNode(leftData, maxDepth, extensionLevel, depth+1);
            Node right = buildNode(rightData, maxDepth, extensionLevel, depth+1);

            return new Node(depth, slope, intercept, bias, left, right);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy