weka.associations.gsp.Sequence Maven / Gradle / Ivy
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* Sequence.java
* Copyright (C) 2007 Sebastian Beer
*
*/
package weka.associations.gsp;
import weka.core.FastVector;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import java.io.Serializable;
import java.util.Enumeration;
/**
* Class representing a sequence of elements/itemsets.
*
* @author Sebastian Beer
* @version $Revision: 1.2 $
*/
public class Sequence
implements Cloneable, Serializable, RevisionHandler {
/** for serialization */
private static final long serialVersionUID = -5001018056339156390L;
/** the support count of the Sequence */
protected int m_SupportCount;
/** ordered list of the comprised elements/itemsets */
protected FastVector m_Elements;
/**
* Constructor.
*/
public Sequence() {
m_SupportCount = 0;
m_Elements = new FastVector();
}
/**
* Constructor accepting a set of elements as parameter.
*
* @param elements the Elements of the Sequence
*/
public Sequence(FastVector elements) {
m_SupportCount = 0;
m_Elements = elements;
}
/**
* Constructor accepting an int value as parameter to set the support count.
*
* @param supportCount the support count to set
*/
public Sequence(int supportCount) {
m_SupportCount = supportCount;
m_Elements = new FastVector();
}
/**
* Generates all possible candidate k-Sequences and prunes the ones that
* contain an infrequent (k-1)-Sequence.
*
* @param kMinusOneSequences the set of (k-1)-Sequences, used for verification
* @return the generated set of k-candidates
* @throws CloneNotSupportedException
*/
public static FastVector aprioriGen(FastVector kMinusOneSequences) throws CloneNotSupportedException {
FastVector allCandidates = generateKCandidates(kMinusOneSequences);
FastVector prunedCandidates = pruneCadidates(allCandidates, kMinusOneSequences);
return prunedCandidates;
}
/**
* Deletes Sequences of a given set which don't meet the minimum support
* count threshold.
*
* @param sequences the set Sequences to be checked
* @param minSupportCount the minimum support count
* @return the set of Sequences after deleting
*/
public static FastVector deleteInfrequentSequences(FastVector sequences, long minSupportCount) {
FastVector deletedSequences = new FastVector();
Enumeration seqEnum = sequences.elements();
while (seqEnum.hasMoreElements()) {
Sequence currentSeq = (Sequence) seqEnum.nextElement();
long curSupportCount = currentSeq.getSupportCount();
if (curSupportCount >= minSupportCount) {
deletedSequences.addElement(currentSeq);
}
}
return deletedSequences;
}
/**
* Generates candidate k-Sequences on the basis of a given (k-1)-Sequence set.
*
* @param kMinusOneSequences the set of (k-1)-Sequences
* @return the set of candidate k-Sequences
* @throws CloneNotSupportedException
*/
protected static FastVector generateKCandidates(FastVector kMinusOneSequences) throws CloneNotSupportedException {
FastVector candidates = new FastVector();
FastVector mergeResult = new FastVector();
for (int i = 0; i < kMinusOneSequences.size(); i++) {
for (int j = 0; j < kMinusOneSequences.size(); j++) {
Sequence originalSeq1 = (Sequence) kMinusOneSequences.elementAt(i);
Sequence seq1 = originalSeq1.clone();
Sequence originalSeq2 = (Sequence) kMinusOneSequences.elementAt(j);
Sequence seq2 = originalSeq2.clone();
Sequence subseq1 = seq1.deleteEvent("first");
Sequence subseq2 = seq2.deleteEvent("last");
if (subseq1.equals(subseq2)) {
//seq1 and seq2 are 1-sequences
if ((subseq1.getElements().size() == 0) && (subseq2.getElements().size() == 0)) {
if (i >= j) {
mergeResult = merge(seq1, seq2, true, true);
} else {
mergeResult = merge(seq1, seq2, true, false);
}
//seq1 and seq2 are k-sequences
} else {
mergeResult = merge(seq1, seq2, false, false);
}
candidates.appendElements(mergeResult);
}
}
}
return candidates;
}
/**
* Merges two Sequences in the course of candidate generation. Differentiates
* between merging 1-Sequences and k-Sequences, k > 1.
*
* @param seq1 Sequence at first position
* @param seq2 Sequence at second position
* @param oneElements true, if 1-Elements should be merged, else false
* @param mergeElements true, if two 1-Elements were not already merged
* (regardless of their position), else false
* @return set of resulting Sequences
*/
protected static FastVector merge(Sequence seq1, Sequence seq2, boolean oneElements, boolean mergeElements) {
FastVector mergeResult = new FastVector();
//merge 1-sequences
if (oneElements) {
Element element1 = (Element) seq1.getElements().firstElement();
Element element2 = (Element) seq2.getElements().firstElement();
Element element3 = null;
if (mergeElements) {
for (int i = 0; i < element1.getEvents().length; i++) {
if (element1.getEvents()[i] > -1) {
if (element2.getEvents()[i] > -1) {
break;
} else {
element3 = Element.merge(element1, element2);
}
}
}
}
FastVector newElements1 = new FastVector();
//generate <{x}{y}>
newElements1.addElement(element1);
newElements1.addElement(element2);
mergeResult.addElement(new Sequence(newElements1));
//generate <{x,y}>
if (element3 != null) {
FastVector newElements2 = new FastVector();
newElements2.addElement(element3);
mergeResult.addElement(new Sequence(newElements2));
}
return mergeResult;
//merge k-sequences, k > 1
} else {
Element lastElementSeq1 = (Element) seq1.getElements().lastElement();
Element lastElementSeq2 = (Element) seq2.getElements().lastElement();
Sequence resultSeq = new Sequence();
FastVector resultSeqElements = resultSeq.getElements();
//if last two events/items belong to the same element/itemset
if (lastElementSeq2.containsOverOneEvent()) {
for (int i = 0; i < (seq1.getElements().size()-1); i++) {
resultSeqElements.addElement(seq1.getElements().elementAt(i));
}
resultSeqElements.addElement(Element.merge(lastElementSeq1, lastElementSeq2));
mergeResult.addElement(resultSeq);
return mergeResult;
//if last two events/items belong to different elements/itemsets
} else {
for (int i = 0; i < (seq1.getElements().size()); i++) {
resultSeqElements.addElement(seq1.getElements().elementAt(i));
}
resultSeqElements.addElement(lastElementSeq2);
mergeResult.addElement(resultSeq);
return mergeResult;
}
}
}
/**
* Converts a set of 1-Elements into a set of 1-Sequences.
*
* @param elements the set of 1-Elements
* @return the set of 1-Sequences
*/
public static FastVector oneElementsToSequences(FastVector elements) {
FastVector sequences = new FastVector();
Enumeration elementEnum = elements.elements();
while (elementEnum.hasMoreElements()) {
Sequence seq = new Sequence();
FastVector seqElements = seq.getElements();
seqElements.addElement(elementEnum.nextElement());
sequences.addElement(seq);
}
return sequences;
}
/**
* Prints a set of Sequences as String output.
*
* @param setOfSequences the set of sequences
*/
public static void printSetOfSequences(FastVector setOfSequences) {
Enumeration seqEnum = setOfSequences.elements();
int i = 1;
while(seqEnum.hasMoreElements()) {
Sequence seq = (Sequence) seqEnum.nextElement();
System.out.print("[" + i++ + "]" + " " + seq.toString());
}
}
/**
* Prunes a k-Sequence of a given candidate set if one of its (k-1)-Sequences
* is infrequent.
*
* @param allCandidates the set of all potential k-Sequences
* @param kMinusOneSequences the set of (k-1)-Sequences for verification
* @return the set of the pruned candidates
*/
protected static FastVector pruneCadidates(FastVector allCandidates, FastVector kMinusOneSequences) {
FastVector prunedCandidates = new FastVector();
boolean isFrequent;
//for each candidate
for (int i = 0; i < allCandidates.size(); i++) {
Sequence candidate = (Sequence) allCandidates.elementAt(i);
isFrequent = true;
FastVector canElements = candidate.getElements();
//generate each possible (k-1)-sequence and verify if it's frequent
for (int j = 0; j < canElements.size(); j++) {
if(isFrequent) {
Element origElement = (Element) canElements.elementAt(j);
int[] origEvents = origElement.getEvents();
for (int k = 0; k < origEvents.length; k++) {
if (origEvents[k] > -1) {
int helpEvent = origEvents[k];
origEvents[k] = -1;
if (origElement.isEmpty()) {
canElements.removeElementAt(j);
//check if the (k-1)-sequence is contained in the set of kMinusOneSequences
int containedAt = kMinusOneSequences.indexOf(candidate);
if (containedAt != -1) {
origEvents[k] = helpEvent;
canElements.insertElementAt(origElement, j);
break;
} else {
isFrequent = false;
break;
}
} else {
//check if the (k-1)-sequence is contained in the set of kMinusOneSequences
int containedAt = kMinusOneSequences.indexOf(candidate);
if (containedAt != -1) {
origEvents[k] = helpEvent;
continue;
} else {
isFrequent = false;
break;
}
}
}
}
} else {
break;
}
}
if (isFrequent) {
prunedCandidates.addElement(candidate);
}
}
return prunedCandidates;
}
/**
* Returns a String representation of a set of Sequences where the numeric
* value of each event/item is represented by its respective nominal value.
*
* @param setOfSequences the set of Sequences
* @param dataSet the corresponding data set containing the header
* information
* @param filterAttributes the attributes to filter out
* @return the String representation
*/
public static String setOfSequencesToString(FastVector setOfSequences, Instances dataSet, FastVector filterAttributes) {
StringBuffer resString = new StringBuffer();
Enumeration SequencesEnum = setOfSequences.elements();
int i = 1;
boolean printSeq;
while(SequencesEnum.hasMoreElements()) {
Sequence seq = (Sequence) SequencesEnum.nextElement();
Integer filterAttr = (Integer) filterAttributes.elementAt(0);
printSeq = true;
if (filterAttr.intValue() != -1) {
for (int j=0; j < filterAttributes.size(); j++) {
filterAttr = (Integer) filterAttributes.elementAt(j);
FastVector seqElements = seq.getElements();
if (printSeq) {
for (int k=0; k < seqElements.size(); k++) {
Element currentElement = (Element) seqElements.elementAt(k);
int[] currentEvents = currentElement.getEvents();
if (currentEvents[filterAttr.intValue()] != -1) {
continue;
} else {
printSeq = false;
break;
}
}
}
}
}
if (printSeq) {
resString.append("[" + i++ + "]" + " " + seq.toNominalString(dataSet));
}
}
return resString.toString();
}
/**
* Updates the support count of a set of Sequence candidates according to a
* given set of data sequences.
*
* @param candidates the set of candidates
* @param dataSequences the set of data sequences
*/
public static void updateSupportCount(FastVector candidates, FastVector dataSequences) {
Enumeration canEnumeration = candidates.elements();
while(canEnumeration.hasMoreElements()){
Enumeration dataSeqEnumeration = dataSequences.elements();
Sequence candidate = (Sequence) canEnumeration.nextElement();
while(dataSeqEnumeration.hasMoreElements()) {
Instances dataSequence = (Instances) dataSeqEnumeration.nextElement();
if (candidate.isSubsequenceOf(dataSequence)) {
candidate.setSupportCount(candidate.getSupportCount() + 1);
}
}
}
}
/**
* Returns a deep clone of a Sequence.
*
* @return the cloned Sequence
*/
public Sequence clone() {
try {
Sequence clone = (Sequence) super.clone();
clone.setSupportCount(m_SupportCount);
FastVector cloneElements = new FastVector(m_Elements.size());
for (int i = 0; i < m_Elements.size(); i++) {
Element helpElement = (Element) m_Elements.elementAt(i);
cloneElements.addElement(helpElement.clone());
}
clone.setElements(cloneElements);
return clone;
} catch (CloneNotSupportedException exc) {
exc.printStackTrace();
}
return null;
}
/**
* Deletes either the first or the last event/item of a Sequence. If the
* deleted event/item is the only value in the Element, it is removed, as well.
*
* @param position the position of the event/item (first or last)
* @return the Sequence with either the first or the last
* event/item deleted
*/
protected Sequence deleteEvent(String position) {
Sequence cloneSeq = clone();
if (position.equals("first")) {
Element element = (Element) cloneSeq.getElements().firstElement();
element.deleteEvent("first");
if (element.isEmpty()) {
cloneSeq.getElements().removeElementAt(0);
}
return cloneSeq;
}
if (position.equals("last")) {
Element element = (Element) cloneSeq.getElements().lastElement();
element.deleteEvent("last");
if (element.isEmpty()) {
cloneSeq.getElements().removeElementAt(m_Elements.size()-1);
}
return cloneSeq;
}
return null;
}
/**
* Checks if two Sequences are equal.
*
* @return true, if the two Sequences are equal, else false
*/
public boolean equals(Object obj) {
Sequence seq2 = (Sequence) obj;
FastVector seq2Elements = seq2.getElements();
for (int i = 0; i < m_Elements.size(); i++) {
Element thisElement = (Element) m_Elements.elementAt(i);
Element seq2Element = (Element) seq2Elements.elementAt(i);
if (!thisElement.equals(seq2Element)) {
return false;
}
}
return true;
}
/**
* Returns the Elements of the Sequence.
*
* @return the Elements
*/
protected FastVector getElements() {
return m_Elements;
}
/**
* Returns the support count of the Sequence.
*
* @return the support count
*/
protected int getSupportCount() {
return m_SupportCount;
}
/**
* Checks if the Sequence is subsequence of a given data sequence.
*
* @param dataSequence the data sequence to verify against
* @return true, if the Sequnce is subsequence of the data
* sequence, else false
*/
protected boolean isSubsequenceOf(Instances dataSequence) {
FastVector elements = getElements();
Enumeration elementEnum = elements.elements();
Element curElement = (Element) elementEnum.nextElement();
for (int i = 0; i < dataSequence.numInstances(); i++) {
if (curElement.isContainedBy(dataSequence.instance(i))) {
if (!elementEnum.hasMoreElements()) {
return true;
} else {
curElement = (Element) elementEnum.nextElement();
continue;
}
}
}
return false;
}
/**
* Sets the Elements of the Sequence.
*
* @param elements the Elements to set
*/
protected void setElements(FastVector elements) {
m_Elements = elements;
}
/**
* Sets the support count of the Sequence.
*
* @param supportCount the support count to set
*/
protected void setSupportCount(int supportCount) {
m_SupportCount = supportCount;
}
/**
* Returns a String representation of a Sequences where the numeric value
* of each event/item is represented by its respective nominal value.
*
* @param dataSet the corresponding data set containing the header
* information
* @return the String representation
*/
public String toNominalString(Instances dataSet) {
String result = "";
result += "<";
for (int i = 0; i < m_Elements.size(); i++) {
Element element = (Element) m_Elements.elementAt(i);
result += element.toNominalString(dataSet);
}
result += "> (" + getSupportCount() + ")\n";
return result;
}
/**
* Returns a String representation of a Sequence.
*
* @return the String representation
*/
public String toString() {
String result = "";
result += "Sequence Output\n";
result += "------------------------------\n";
result += "Support Count: " + getSupportCount() + "\n";
result += "contained elements/itemsets:\n";
for (int i = 0; i < m_Elements.size(); i++) {
Element element = (Element) m_Elements.elementAt(i);
result += element.toString();
}
result += "\n\n";
return result;
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 1.2 $");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy