All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazon.randomcutforest.RandomCutForestShingledBenchmark Maven / Gradle / Ivy

/*
 * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package com.amazon.randomcutforest;

import java.util.List;
import java.util.Random;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import com.amazon.randomcutforest.returntypes.DensityOutput;
import com.amazon.randomcutforest.returntypes.DiVector;
import com.amazon.randomcutforest.returntypes.Neighbor;
import com.amazon.randomcutforest.testutils.ShingledMultiDimDataWithKeys;

@Warmup(iterations = 2)
@Measurement(iterations = 5)
@Fork(value = 1)
@State(Scope.Thread)
public class RandomCutForestShingledBenchmark {

    public final static int DATA_SIZE = 50_000;
    public final static int INITIAL_DATA_SIZE = 25_000;

    @State(Scope.Benchmark)
    public static class BenchmarkState {
        @Param({ "5" })
        int baseDimensions;

        @Param({ "8" })
        int shingleSize;

        @Param({ "30" })
        int numberOfTrees;

        @Param({ "1.0", "0.9", "0.8", "0.7", "0.6", "0.5", "0.4", "0.3", "0.2", "0.1", "0.0" })
        double boundingBoxCacheFraction;

        @Param({ "false", "true" })
        boolean parallel;

        double[][] data;
        RandomCutForest forest;

        @Setup(Level.Trial)
        public void setUpData() {
            data = ShingledMultiDimDataWithKeys.getMultiDimData(DATA_SIZE + INITIAL_DATA_SIZE, 50, 100, 5, 17,
                    baseDimensions).data;
        }

        @Setup(Level.Invocation)
        public void setUpForest() {
            forest = RandomCutForest.builder().numberOfTrees(numberOfTrees).dimensions(baseDimensions * shingleSize)
                    .internalShinglingEnabled(true).shingleSize(shingleSize).parallelExecutionEnabled(parallel)
                    .boundingBoxCacheFraction(boundingBoxCacheFraction).randomSeed(99).build();

            for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
                forest.update(data[i]);
            }
        }
    }

    private RandomCutForest forest;

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest updateOnly(BenchmarkState state) {
        double[][] data = state.data;
        forest = state.forest;

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            forest.update(data[i]);
        }

        return forest;
    }

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest scoreOnly(BenchmarkState state, Blackhole blackhole) {
        double[][] data = state.data;
        forest = state.forest;
        double score = 0.0;
        Random rnd = new Random(0);

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            score += forest.getAnomalyScore(data[i]);
            if (rnd.nextDouble() < 0.01) {
                forest.update(data[i]); // this should execute sparingly
            }
        }

        blackhole.consume(score);
        return forest;
    }

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest scoreAndUpdate(BenchmarkState state, Blackhole blackhole) {
        double[][] data = state.data;
        forest = state.forest;
        double score = 0.0;

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            score = forest.getAnomalyScore(data[i]);
            forest.update(data[i]);
        }

        blackhole.consume(score);
        return forest;
    }

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest attributionAndUpdate(BenchmarkState state, Blackhole blackhole) {
        double[][] data = state.data;
        forest = state.forest;
        DiVector vector = new DiVector(forest.getDimensions());

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            vector = forest.getAnomalyAttribution(data[i]);
            forest.update(data[i]);
        }

        blackhole.consume(vector);
        return forest;
    }

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest basicDensityAndUpdate(BenchmarkState state, Blackhole blackhole) {
        double[][] data = state.data;
        forest = state.forest;
        DensityOutput output = new DensityOutput(forest.getDimensions(), forest.getSampleSize());

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            output = forest.getSimpleDensity(data[i]);
            forest.update(data[i]);
        }

        blackhole.consume(output);
        return forest;
    }

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest neighborAndUpdate(BenchmarkState state, Blackhole blackhole) {
        double[][] data = state.data;
        forest = state.forest;
        List output = null;

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            output = forest.getNearNeighborsInSample(data[i]);
            forest.update(data[i]);
        }

        blackhole.consume(output);
        return forest;
    }

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest imputeAndUpdate(BenchmarkState state, Blackhole blackhole) {
        double[][] data = state.data;
        forest = state.forest;
        double[] output = null;

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            output = forest.imputeMissingValues(data[i], 1, new int[] { state.baseDimensions - 1 });
            forest.update(data[i]);
        }

        blackhole.consume(output);
        return forest;
    }

    @Benchmark
    @OperationsPerInvocation(DATA_SIZE)
    public RandomCutForest extrapolateAndUpdate(BenchmarkState state, Blackhole blackhole) {
        double[][] data = state.data;
        forest = state.forest;
        double[] output = null;

        for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
            output = forest.extrapolate(1);
            forest.update(data[i]);
        }

        blackhole.consume(output);
        return forest;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy