All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.association.ARM Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.association;

import java.util.*;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import smile.association.TotalSupportTree.Node;

/**
 * Association Rule Mining.
 * Let I = {i1, i2,..., in}
 * be a set of n binary attributes called items. Let
 * D = {t1, t2,..., tm}
 * be a set of transactions called the database. Each transaction in
 * D has an unique transaction ID and contains a subset of
 * the items in I. An association rule is defined as an
 * implication of the form X ⇒ Y
 * where X, Y ⊆ I and X ∩ Y = Ø.
 * The item sets X and Y are called antecedent
 * (left-hand-side or LHS)
 * and consequent (right-hand-side or RHS) of the rule, respectively.
 * The support supp(X) of an item set X is defined as
 * the proportion of transactions in the database which contain the item set.
 * Note that the support of an association rule X ⇒ Y is
 * supp(X ∪ Y). The confidence of a rule is defined
 * conf(X ⇒ Y) = supp(X ∪ Y) / supp(X).
 * Confidence can be interpreted as an estimate of the probability
 * P(Y | X), the probability of finding the RHS of the
 * rule in transactions under the condition that these transactions
 * also contain the LHS. Association rules are usually required to
 * satisfy a user-specified minimum support and a user-specified
 * minimum confidence at the same time.
 * 
 * @author Haifeng Li
 */
public class ARM implements Iterable {

    /**
     * The number transactions in the database.
     */
    private final int size;
    /**
     * The confidence threshold for association rules.
     */
    private final double confidence;
    /**
     * Compressed set enumeration tree.
     */
    private final TotalSupportTree ttree;
    /**
     * The buffer to collect mining results.
     */
    private final Queue buffer = new LinkedList<>();

    /**
     * Constructor.
     * @param confidence the confidence threshold for association rules.
     */
    ARM(double confidence, TotalSupportTree ttree) {
        this.size = ttree.size();
        this.confidence = confidence;
        this.ttree = ttree;
    }

    @Override
    public Iterator iterator() {
        return new Iterator() {
            int i = 0;

            @Override
            public boolean hasNext() {
                if (buffer.isEmpty()) {
                    TotalSupportTree.Node root = ttree.root();
                    for (; i < root.children.length; i++) {
                        Node child = root.children[i];
                        if (root.children[i] != null) {
                            int[] itemset = {child.id};
                            generate(itemset, i, child);

                            if (!buffer.isEmpty()) {
                                i++; // we will miss i++ in for loop once break
                                break;
                            }
                        }
                    }
                }

                return !buffer.isEmpty();
            }

            @Override
            public AssociationRule next() {
                return buffer.poll();
            }
        };
    }

    /**
     * Mines the association rules.
     * @param confidence the confidence threshold for association rules.
     * @param tree the FP-tree.
     * @return the stream of association rules.
     */
    public static Stream apply(double confidence, FPTree tree) {
        TotalSupportTree ttree = new TotalSupportTree(tree);
        ARM arm = new ARM(confidence, ttree);
        return StreamSupport.stream(arm.spliterator(), false);
    }

    /**
     * Generates association rules from a T-tree.
     * @param itemset the label for a T-tree node as generated so far.
     * @param size the size of the current array level in the T-tree.
     * @param node the current node in the T-tree.
     */
    private void generate(int[] itemset, int size, Node node) {
        if (node.children == null) {
            return;
        }

        for (int i = 0; i < size; i++) {
            if (node.children[i] != null) {
                int[] newItemset = FPGrowth.insert(itemset, node.children[i].id);
                // Generate ARs for current large itemset
                generate(newItemset, node.children[i].support);
                // Continue generation process
                generate(newItemset, i, node.children[i]);
            }
        }
    }

    /**
     * Generates all association rules for a given item set.
     * @param itemset the given frequent item set.
     * @param support the associated support value for the item set.
     */
    private void generate(int[] itemset, int support) {
        // Determine combinations
        int[][] combinations = getPowerSet(itemset);

        // Loop through combinations
        for (int[] combination : combinations) {
            // Find complement of combination in given itemSet
            int[] complement = getComplement(combination, itemset);
            // If complement is not empty generate rule
            if (complement != null) {
                double antecedentSupport = ttree.getSupport(combination);
                double arc = support / antecedentSupport;
                if (arc >= confidence) {
                    double supp = (double) support / size;
                    double consequentSupport = ttree.getSupport(complement);
                    double lift = support / (antecedentSupport * consequentSupport / size);
                    double leverage = supp - (antecedentSupport / size) * (consequentSupport / size);
                    AssociationRule ar = new AssociationRule(combination, complement, supp, arc, lift, leverage);
                    buffer.offer(ar);
                }
            }
        }
    }

    /**
     * Returns the complement of subset.
     */
    private static int[] getComplement(int[] subset, int[] fullset) {
        int size = fullset.length - subset.length;

        // Returns null if no complement
        if (size < 1) {
            return null;
        }

        // Otherwise define combination array and determine complement
        int[] complement = new int[size];
        int index = 0;
        for (int item : fullset) {
            boolean member = false;
            for (int i : subset) {
                if (item == i) {
                    member = true;
                    break;
                }
            }

            if (!member) {
                complement[index++] = item;
            }

        }

        return complement;
    }

    /**
     * Returns all possible subsets except null and full set.
     */
    private static int[][] getPowerSet(int[] set) {
        int[][] sets = new int[getPowerSetSize(set.length)][];
        getPowerSet(set, 0, null, sets, 0);
        return sets;
    }

    /**
     * Recursively calculates all possible subsets.
     * @param set the input item set.
     * @param inputIndex the index within the input set marking current
     * element under consideration (0 at start).
     * @param sofar the current combination determined so far during the
     * recursion (null at start).
     * @param sets the power set to store all combinations when recursion ends.
     * @param outputIndex the current location in the output set.
     * @return revised output index.
     */
    private static int getPowerSet(int[] set, int inputIndex, int[] sofar, int[][] sets, int outputIndex) {
        for (int i = inputIndex; i < set.length; i++) {
            int n = sofar == null ? 0 : sofar.length;
            if (n < set.length-1) {
                int[] subset = new int[n + 1];
                subset[n] = set[i];
                if (sofar != null) {
                    System.arraycopy(sofar, 0, subset, 0, n);
                }

                sets[outputIndex] = subset;
                outputIndex = getPowerSet(set, i + 1, subset, sets, outputIndex + 1);
            }
        }

        return outputIndex;
    }

    /**
     * Returns the size of power set except null and full set.
     * @param n the size of set.
     */
    private static int getPowerSetSize(int n) {
        return (int) Math.pow(2.0, n) - 2;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy