All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.association.TotalSupportTree Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.association;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

/**
 * Total support tree is a kind of compressed set enumeration tree so that we
 * can generate association rules in a storage efficient way.
 *
 * 

References

*
    *
  1. Frans Coenen, Paul Leng, and Shakil Ahmed. Data Structure for Association Rule Mining: T-Trees and P-Trees. IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING, 16(6):774-778, 2004.
  2. *
* * @author Haifeng Li */ class TotalSupportTree implements Iterable { static class Node { /** * The id of item. */ int id = -1; /** * The support associate with the item set represented by the node. */ int support = 0; /** * The set of children nodes. */ Node[] children = null; /** * Constructor. */ Node() { } /** * Constructor. * @param id the id of item. */ Node(int id) { this.id = id; } } /** * The root of t-tree. */ private final Node root = new Node(); /** * The number transactions in the database. */ private final int numTransactions; /** * The required minimum support of item sets. */ private final int minSupport; /** * The index of items after sorting. */ private final int[] order; /** * The buffer to collect mining results. */ private final Queue buffer = new LinkedList<>(); /** * Constructor. */ public TotalSupportTree(FPTree tree) { this.numTransactions = tree.numTransactions; this.minSupport = tree.minSupport; this.order = tree.order; root.children = new Node[tree.numFreqItems]; FPGrowth.apply(tree).forEach(itemset -> add(itemset.items, itemset.support)); } /** * Returns the number transactions in the database. */ public int size() { return numTransactions; } /** * Returns the root node. */ public Node root() { return root; } @Override public Iterator iterator() { return new Iterator() { int i = 0; @Override public boolean hasNext() { if (buffer.isEmpty()) { for (; i < root.children.length; i++) { Node child = root.children[i]; if (child != null && child.support >= minSupport) { int[] itemset = {child.id}; generate(itemset, i, child); if (!buffer.isEmpty()) { i++; // we will miss i++ in for loop once break break; } } } } return !buffer.isEmpty(); } @Override public ItemSet next() { return buffer.poll(); } }; } /** * Adds an item set with its support value. * @param itemset the given item set. The items in the set has to be in the * descending order according to their frequency. * @param support the support value associated with the given item set. */ private void add(int[] itemset, int support) { add(root, 0, itemset.length - 1, itemset, support); } /** * Inserts a node into a T-tree. * @param node the root of subtree. * @param size the size of the current array in T-tree. * @param index the index of the last item in the item set, which is also * used as a level counter. * @param itemset the given item set. * @param support the support value associated with the given item set. */ private void add(Node node, int size, int index, int[] itemset, int support) { if (node.children == null) { node.children = new Node[size]; } int item = order[itemset[index]]; if (node.children[item] == null) { node.children[item] = new Node(itemset[index]); } if (index == 0) { node.children[item].support += support; } else { add(node.children[item], item, index - 1, itemset, support); } } /** * Returns the support value for the given item set. * @param itemset the given item set. The items in the set has to be in the * descending order according to their frequency. * @return the support value (0 if not found) */ public int getSupport(int[] itemset) { if (root.children != null) { return getSupport(itemset, itemset.length - 1, root); } else { return 0; } } /** * Returns the support value for the given item set if found in the T-tree * and 0 otherwise. * @param itemset the given item set. * @param index the current index in the given item set. * @return the support value (0 if not found) */ private int getSupport(int[] itemset, int index, Node node) { int item = order[itemset[index]]; Node child = node.children[item]; if (child != null) { // If the index is 0, then this is the last element (i.e the // input is a 1 itemset) and therefore item set found if (index == 0) { return child.support; } else { if (child.children != null) { return getSupport(itemset, index - 1, child); } } } return 0; } /** * Mines the frequent item sets. * @return the stream of frequent item sets */ public Stream stream() { return StreamSupport.stream(spliterator(), false); } /** * Returns the set of frequent item sets. * @param node the path from root to this node matches the given item set. * @param itemset the frequent item set generated so far. * @param size the length/size of the current array level in the T-tree. */ private void generate(int[] itemset, int size, Node node) { ItemSet set = new ItemSet(itemset, node.support); buffer.offer(set); if (node.children != null) { for (int i = 0; i < size; i++) { Node child = node.children[i]; if (child != null && child.support >= minSupport) { int[] newItemset = FPGrowth.insert(itemset, child.id); generate(newItemset, i, child); } } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy