smile.association.AssociationRule Maven / Gradle / Ivy
/*
* Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
*
* Smile is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Smile is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Smile. If not, see .
*/
package smile.association;
import java.util.Arrays;
/**
* Association rule object. Let
* I = {i1, i2,..., in}
* be a set of n
binary attributes called items. Let
* D = {t1, t2,..., tm}
* be a set of transactions called the database. Each transaction in
* D
has an unique transaction ID and contains a subset
* of the items in I
. An association rule is defined
* as an implication of the form X ⇒ Y
* where X, Y ⊆ I
and X ∩ Y = Ø
.
* The item sets X
and Y
are called
* antecedent (left-hand-side or LHS) and consequent (right-hand-side or RHS)
* of the rule, respectively.
*
* The support supp(X)
of an item
* set X
is defined as the proportion of transactions
* in the database which contain the item set. Note that the support of
* an association rule X ⇒ Y
is supp(X ∪ Y)
.
*
* The confidence of a rule is defined as
* conf(X ⇒ Y) = supp(X ∪ Y) / supp(X)
.
* Confidence can be interpreted as an estimate of the probability
* P(Y | X)
, the probability of finding the RHS of the rule
* in transactions under the condition that these transactions also contain
* the LHS.
*
* Lift is a measure of the performance of a targeting model
* (association rule) at predicting or classifying cases as having
* an enhanced response (with respect to the population as a whole),
* measured against a random choice targeting model. A targeting model
* is doing a good job if the response within the target is much better
* than the average for the population as a whole. Lift is simply the ratio
* of these values: target response divided by average response.
* For an association rule X ⇒ Y
, if the lift is equal
* to 1, it means that X and Y are independent. If the lift is higher
* than 1, it means that X and Y are positively correlated.
* If the lift is lower than 1, it means that X and Y are negatively
* correlated.
*
* @author Haifeng Li
*/
public class AssociationRule {
/**
* Antecedent itemset.
*/
public final int[] antecedent;
/**
* Consequent itemset.
*/
public final int[] consequent;
/**
* The support value. The support supp(X) of an itemset X is defined as
* the proportion of transactions in the database which contain the itemset.
*/
public final double support;
/**
* The confidence value. The confidence of a rule is defined
* conf(X ⇒ Y) = supp(X ∪ Y) / supp(X). Confidence can be
* interpreted as an estimate of the probability P(Y | X), the probability
* of finding the RHS of the rule in transactions under the condition
* that these transactions also contain the LHS.
*/
public final double confidence;
/**
* How many times more often antecedent and consequent occur together
* than expected if they were statistically independent.
* Lift is a measure of the performance of a targeting model
* (association rule) at predicting or classifying cases as having
* an enhanced response (with respect to the population as a whole),
* measured against a random choice targeting model. A targeting model
* is doing a good job if the response within the target is much better
* than the average for the population as a whole.
*
* Lift is simply the ratio of these values: target response divided by
* average response.
*
* For an association rule X ⇒ Y
, if the lift is equal
* to 1, it means that X and Y are independent. If the lift is higher
* than 1, it means that X and Y are positively correlated.
* If the lift is lower than 1, it means that X and Y are negatively
* correlated.
*/
public final double lift;
/**
* The difference between the probability of the rule and the expected
* probability if the items were statistically independent.
*/
public final double leverage;
/**
* Constructor.
* @param antecedent the antecedent itemset (LHS) of the association rule.
* @param consequent the consequent itemset (RHS) of the association rule.
* @param support the proportion of instances in the dataset that contain an itemset.
* @param confidence the percentage of instances that contain the consequent
* and antecedent together over the number of instances that
* only contain the antecedent.
* @param lift how many times more often antecedent and consequent occur together
* than expected if they were statistically independent.
* @param leverage the difference between the probability of the rule and the expected
* probability if the items were statistically independent.
*/
public AssociationRule(int[] antecedent, int[] consequent, double support, double confidence, double lift, double leverage) {
this.antecedent = antecedent;
this.consequent = consequent;
this.support = support;
this.confidence = confidence;
this.lift = lift;
this.leverage = leverage;
}
@Override
public boolean equals(Object o) {
if (o instanceof AssociationRule) {
AssociationRule a = (AssociationRule) o;
if (support != a.support) {
return false;
}
if (confidence != a.confidence) {
return false;
}
if (antecedent.length != a.antecedent.length) {
return false;
}
if (consequent.length != a.consequent.length) {
return false;
}
for (int i = 0; i < antecedent.length; i++) {
if (antecedent[i] != a.antecedent[i]) {
return false;
}
}
for (int i = 0; i < consequent.length; i++) {
if (consequent[i] != a.consequent[i]) {
return false;
}
}
return true;
}
return false;
}
@Override
public int hashCode() {
int hash = 7;
hash = 13 * hash + Arrays.hashCode(this.antecedent);
hash = 13 * hash + Arrays.hashCode(this.consequent);
hash = 13 * hash + (int) (Double.doubleToLongBits(this.support) ^ (Double.doubleToLongBits(this.support) >>> 32));
hash = 13 * hash + (int) (Double.doubleToLongBits(this.confidence) ^ (Double.doubleToLongBits(this.confidence) >>> 32));
return hash;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append('(');
sb.append(antecedent[0]);
for (int i = 1; i < antecedent.length; i++) {
sb.append(", ");
sb.append(antecedent[i]);
}
sb.append(") => (");
sb.append(consequent[0]);
for (int i = 1; i < consequent.length; i++) {
sb.append(", ");
sb.append(consequent[i]);
}
sb.append(String.format(") support = %.2f%% confidence = %.2f%% lift = %.2f leverage = %.4f", 100*support, 100*confidence, lift, leverage));
return sb.toString();
}
}