All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.association.AssociationRule Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.association;

import java.util.Arrays;

/**
 * Association rule object. Let
 * I = {i1, i2,..., in}
 * be a set of n binary attributes called items. Let
 * D = {t1, t2,..., tm}
 * be a set of transactions called the database. Each transaction in
 * D has an unique transaction ID and contains a subset
 * of the items in I. An association rule is defined
 * as an implication of the form X ⇒ Y
 * where X, Y ⊆ I and X ∩ Y = Ø.
 * The item sets X and Y are called
 * antecedent (left-hand-side or LHS) and consequent (right-hand-side or RHS)
 * of the rule, respectively.
 * 

* The support supp(X) of an item * set X is defined as the proportion of transactions * in the database which contain the item set. Note that the support of * an association rule X ⇒ Y is supp(X ∪ Y). *

* The confidence of a rule is defined as * conf(X ⇒ Y) = supp(X ∪ Y) / supp(X). * Confidence can be interpreted as an estimate of the probability * P(Y | X), the probability of finding the RHS of the rule * in transactions under the condition that these transactions also contain * the LHS. *

* Lift is a measure of the performance of a targeting model * (association rule) at predicting or classifying cases as having * an enhanced response (with respect to the population as a whole), * measured against a random choice targeting model. A targeting model * is doing a good job if the response within the target is much better * than the average for the population as a whole. Lift is simply the ratio * of these values: target response divided by average response. * For an association rule X ⇒ Y, if the lift is equal * to 1, it means that X and Y are independent. If the lift is higher * than 1, it means that X and Y are positively correlated. * If the lift is lower than 1, it means that X and Y are negatively * correlated. * * @author Haifeng Li */ public class AssociationRule { /** * Antecedent itemset. */ public final int[] antecedent; /** * Consequent itemset. */ public final int[] consequent; /** * The support value. The support supp(X) of an itemset X is defined as * the proportion of transactions in the database which contain the itemset. */ public final double support; /** * The confidence value. The confidence of a rule is defined * conf(X ⇒ Y) = supp(X ∪ Y) / supp(X). Confidence can be * interpreted as an estimate of the probability P(Y | X), the probability * of finding the RHS of the rule in transactions under the condition * that these transactions also contain the LHS. */ public final double confidence; /** * How many times more often antecedent and consequent occur together * than expected if they were statistically independent. * Lift is a measure of the performance of a targeting model * (association rule) at predicting or classifying cases as having * an enhanced response (with respect to the population as a whole), * measured against a random choice targeting model. A targeting model * is doing a good job if the response within the target is much better * than the average for the population as a whole. * * Lift is simply the ratio of these values: target response divided by * average response. * * For an association rule X ⇒ Y, if the lift is equal * to 1, it means that X and Y are independent. If the lift is higher * than 1, it means that X and Y are positively correlated. * If the lift is lower than 1, it means that X and Y are negatively * correlated. */ public final double lift; /** * The difference between the probability of the rule and the expected * probability if the items were statistically independent. */ public final double leverage; /** * Constructor. * @param antecedent the antecedent itemset (LHS) of the association rule. * @param consequent the consequent itemset (RHS) of the association rule. * @param support the proportion of instances in the dataset that contain an itemset. * @param confidence the percentage of instances that contain the consequent * and antecedent together over the number of instances that * only contain the antecedent. * @param lift how many times more often antecedent and consequent occur together * than expected if they were statistically independent. * @param leverage the difference between the probability of the rule and the expected * probability if the items were statistically independent. */ public AssociationRule(int[] antecedent, int[] consequent, double support, double confidence, double lift, double leverage) { this.antecedent = antecedent; this.consequent = consequent; this.support = support; this.confidence = confidence; this.lift = lift; this.leverage = leverage; } @Override public boolean equals(Object o) { if (o instanceof AssociationRule) { AssociationRule a = (AssociationRule) o; if (support != a.support) { return false; } if (confidence != a.confidence) { return false; } if (antecedent.length != a.antecedent.length) { return false; } if (consequent.length != a.consequent.length) { return false; } for (int i = 0; i < antecedent.length; i++) { if (antecedent[i] != a.antecedent[i]) { return false; } } for (int i = 0; i < consequent.length; i++) { if (consequent[i] != a.consequent[i]) { return false; } } return true; } return false; } @Override public int hashCode() { int hash = 7; hash = 13 * hash + Arrays.hashCode(this.antecedent); hash = 13 * hash + Arrays.hashCode(this.consequent); hash = 13 * hash + (int) (Double.doubleToLongBits(this.support) ^ (Double.doubleToLongBits(this.support) >>> 32)); hash = 13 * hash + (int) (Double.doubleToLongBits(this.confidence) ^ (Double.doubleToLongBits(this.confidence) >>> 32)); return hash; } @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append('('); sb.append(antecedent[0]); for (int i = 1; i < antecedent.length; i++) { sb.append(", "); sb.append(antecedent[i]); } sb.append(") => ("); sb.append(consequent[0]); for (int i = 1; i < consequent.length; i++) { sb.append(", "); sb.append(consequent[i]); } sb.append(String.format(") support = %.2f%% confidence = %.2f%% lift = %.2f leverage = %.4f", 100*support, 100*confidence, lift, leverage)); return sb.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy