All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.chen0040.lof.LOCI Maven / Gradle / Ivy

The newest version!
package com.github.chen0040.lof;

import com.github.chen0040.data.frame.DataFrame;
import com.github.chen0040.data.frame.DataRow;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.Setter;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.function.BiFunction;


/**
 * Created by xschen on 23/8/15.
 */
@Getter
@Setter
public class LOCI implements Cloneable {
    @Setter(AccessLevel.NONE)
    private double r_max;

    private double alpha = 0.5;
    private double kSigma = 3;

    @Setter(AccessLevel.NONE)
    private double[][] distanceMatrix;

    private BiFunction distanceMeasure;

    public void copy(LOCI that){
        r_max = that.r_max;
        alpha = that.alpha;
        kSigma = that.kSigma;
        distanceMatrix = that.distanceMatrix == null ? null : that.distanceMatrix.clone();
        distanceMeasure = that.distanceMeasure;
    }

    @Override
    public Object clone() throws CloneNotSupportedException {
        LOCI clone = (LOCI)super.clone();
        clone.copy(this);

        return clone;
    }

    public LOCI(){
    }

    public DataFrame fitAndTransform(DataFrame batch) {
        batch = batch.makeCopy();

        int m = batch.rowCount();

        distanceMatrix = new double[m][];
        for(int i=0; i < m; ++i) {
            distanceMatrix[i] = new double[m];
        }

        double maxDistance = Double.MIN_VALUE;
        for(int i=0; i < m; ++i){
            DataRow tuple_i = batch.row(i);
            for(int j=i+1; j < m; ++j){
                DataRow tuple_j = batch.row(j);
                double distance = DistanceMeasureService.getDistance(batch, tuple_i, tuple_j, distanceMeasure);
                distanceMatrix[i][j] = distance;
                distanceMatrix[j][i] = distance;
                maxDistance = Math.max(maxDistance, distance);
            }
        }

        r_max = maxDistance / alpha;

        List> D = new ArrayList<>();

        for(int i=0; i < m; ++i){
            List D_i = get_r_neighbors(i, r_max, distanceMatrix);
            D.add(D_i);
        }

        for(int i=0; i < m; ++i){
            List D_i = D.get(i);
            int n = D_i.size();
            boolean isOutlier = false;
            for(int j=0; j < n; ++j){
                double r = distanceMatrix[i][D_i.get(j)];
                double alphar = alpha * r;
                int n_pi_alphar = get_alphar_neighbor_count(i, alphar, D_i, distanceMatrix);
                double nhat_pi_r_alpha = get_nhat_pi_r_alpha(i, alpha, r, D, distanceMatrix);
                double sigma_nhat_pi_r_alpha = get_sigma_nhat_pi_r_alpha(i, alpha, r, D, distanceMatrix, nhat_pi_r_alpha);
                double MDEF = 1 - n_pi_alphar / nhat_pi_r_alpha;
                double sigma_MDEF = sigma_nhat_pi_r_alpha / nhat_pi_r_alpha;

                if(MDEF  >  kSigma *sigma_MDEF){
                    isOutlier = true;
                    break;
                }
            }

            DataRow tuple = batch.row(i);
            if(isOutlier){
                tuple.setCategoricalTargetCell("anomaly","1");
            }else{
                tuple.setCategoricalTargetCell("anomaly","0");
            }
        }



        return batch;
    }

    private double get_sigma_nhat_pi_r_alpha(int i, double alpha, double r, List> D, double[][] distanceMatrix, double n_hat){
        List D_i = D.get(i);
        int n_pi_r = D_i.size()+1; // including i itself
        double alphar = alpha * r;
        double sum = 0;
        for(Integer j : D_i){
            sum += Math.pow(get_alphar_neighbor_count(j, alphar, D.get(j), distanceMatrix) - n_hat, 2);
        }
        return Math.sqrt(sum / n_pi_r);
    }

    private double get_nhat_pi_r_alpha(int i, double alpha, double r, List> D, double[][] distanceMatrix) {
        List D_i = D.get(i);
        int n_pi_r = D_i.size()+1; // including i itself
        double alphar = alpha * r;
        double sum = 0;
        for(Integer j : D_i){
            sum += get_alphar_neighbor_count(j, alphar, D.get(j), distanceMatrix);
        }
        return sum / n_pi_r;
    }

    private int get_alphar_neighbor_count(int i, double alphar, List d_i, double[][] distanceMatrix) {
        int count = 1; // including i itself
        for(Integer j : d_i){
            double distance = distanceMatrix[i][j];
            if(distance < alphar){
                count++;
            }
        }

        return count;
    }

    public List get_r_neighbors(int i, double r, double[][] distanceMatrix){
        int m = distanceMatrix.length;
        List rnn = new ArrayList();
        for(int j=0; j < m; ++j){
            if(i==j) continue;
            double distance = distanceMatrix[i][j];
            if(distance < r){
                rnn.add(j);
            }
        }

        return rnn;
    }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy