All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.associations.gsp.Sequence Maven / Gradle / Ivy

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * Sequence.java
 * Copyright (C) 2007 Sebastian Beer
 *
 */

package weka.associations.gsp;

import weka.core.FastVector;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;

import java.io.Serializable;
import java.util.Enumeration;

/**
 * Class representing a sequence of elements/itemsets.
 * 
 * @author  Sebastian Beer
 * @version $Revision: 1.2 $
 */
public class Sequence
  implements Cloneable, Serializable, RevisionHandler {

  /** for serialization */
  private static final long serialVersionUID = -5001018056339156390L;

  /** the support count of the Sequence */
  protected int m_SupportCount;
  
  /** ordered list of the comprised elements/itemsets */
  protected FastVector m_Elements;

  /**
   * Constructor.
   */
  public Sequence() {
    m_SupportCount = 0;
    m_Elements = new FastVector();
  }

  /**
   * Constructor accepting a set of elements as parameter.
   * 
   * @param elements 		the Elements of the Sequence
   */
  public Sequence(FastVector elements) {
    m_SupportCount = 0;
    m_Elements = elements;
  }

  /**
   * Constructor accepting an int value as parameter to set the support count.
   * 
   * @param supportCount 	the support count to set
   */
  public Sequence(int supportCount) {
    m_SupportCount = supportCount;
    m_Elements = new FastVector();
  }

  /**
   * Generates all possible candidate k-Sequences and prunes the ones that 
   * contain an infrequent (k-1)-Sequence.
   * 
   * @param kMinusOneSequences 	the set of (k-1)-Sequences, used for verification
   * @return 			the generated set of k-candidates
   * @throws CloneNotSupportedException
   */
  public static FastVector aprioriGen(FastVector kMinusOneSequences) throws CloneNotSupportedException {
    FastVector allCandidates = generateKCandidates(kMinusOneSequences);
    FastVector prunedCandidates = pruneCadidates(allCandidates, kMinusOneSequences);

    return prunedCandidates;
  }

  /**
   * Deletes Sequences of a given set which don't meet the minimum support 
   * count threshold.
   * 
   * @param sequences 		the set Sequences to be checked
   * @param minSupportCount 	the minimum support count
   * @return 			the set of Sequences after deleting
   */
  public static FastVector deleteInfrequentSequences(FastVector sequences, long minSupportCount) {
    FastVector deletedSequences = new FastVector();
    Enumeration seqEnum = sequences.elements();

    while (seqEnum.hasMoreElements()) {
      Sequence currentSeq = (Sequence) seqEnum.nextElement();
      long curSupportCount = currentSeq.getSupportCount();

      if (curSupportCount >= minSupportCount) {
	deletedSequences.addElement(currentSeq);
      }
    }
    return deletedSequences;
  }

  /**
   * Generates candidate k-Sequences on the basis of a given (k-1)-Sequence set.
   * 
   * @param kMinusOneSequences 	the set of (k-1)-Sequences
   * @return 			the set of candidate k-Sequences
   * @throws CloneNotSupportedException
   */
  protected static FastVector generateKCandidates(FastVector kMinusOneSequences) throws CloneNotSupportedException {
    FastVector candidates = new FastVector();
    FastVector mergeResult = new FastVector();

    for (int i = 0; i < kMinusOneSequences.size(); i++) {
      for (int j = 0; j < kMinusOneSequences.size(); j++) {
	Sequence originalSeq1 = (Sequence) kMinusOneSequences.elementAt(i);
	Sequence seq1 = originalSeq1.clone();
	Sequence originalSeq2 = (Sequence) kMinusOneSequences.elementAt(j);
	Sequence seq2 = originalSeq2.clone();
	Sequence subseq1 = seq1.deleteEvent("first");
	Sequence subseq2 = seq2.deleteEvent("last");

	if (subseq1.equals(subseq2)) {
	  //seq1 and seq2 are 1-sequences
	  if ((subseq1.getElements().size() == 0) && (subseq2.getElements().size() == 0)) {
	    if (i >= j) {
	      mergeResult = merge(seq1, seq2, true, true);
	    } else {
	      mergeResult = merge(seq1, seq2, true, false);
	    }
	    //seq1 and seq2 are k-sequences
	  } else {
	    mergeResult = merge(seq1, seq2, false, false);
	  }
	  candidates.appendElements(mergeResult);
	}
      }
    }
    return candidates;
  }

  /**
   * Merges two Sequences in the course of candidate generation. Differentiates 
   * between merging 1-Sequences and k-Sequences, k > 1.
   * 
   * @param seq1 		Sequence at first position
   * @param seq2 		Sequence at second position
   * @param oneElements 	true, if 1-Elements should be merged, else false
   * @param mergeElements 	true, if two 1-Elements were not already merged 
   * 				(regardless of their position), else false
   * @return 			set of resulting Sequences
   */
  protected static FastVector merge(Sequence seq1, Sequence seq2, boolean oneElements, boolean mergeElements) {
    FastVector mergeResult = new FastVector();

    //merge 1-sequences
    if (oneElements) {
      Element element1 = (Element) seq1.getElements().firstElement();
      Element element2 = (Element) seq2.getElements().firstElement();
      Element element3 = null;
      if (mergeElements) {
	for (int i = 0; i < element1.getEvents().length; i++) {
	  if (element1.getEvents()[i] > -1) {
	    if (element2.getEvents()[i] > -1) {
	      break;
	    } else {
	      element3 = Element.merge(element1, element2);
	    }
	  }
	}
      }
      FastVector newElements1 = new FastVector();
      //generate <{x}{y}>
      newElements1.addElement(element1);
      newElements1.addElement(element2);
      mergeResult.addElement(new Sequence(newElements1));
      //generate <{x,y}>
      if (element3 != null) {
	FastVector newElements2 = new FastVector();
	newElements2.addElement(element3);
	mergeResult.addElement(new Sequence(newElements2));
      }

      return mergeResult;
      //merge k-sequences, k > 1
    } else {
      Element lastElementSeq1 = (Element) seq1.getElements().lastElement();
      Element lastElementSeq2 = (Element) seq2.getElements().lastElement();
      Sequence resultSeq = new Sequence();
      FastVector resultSeqElements = resultSeq.getElements();

      //if last two events/items belong to the same element/itemset
      if (lastElementSeq2.containsOverOneEvent()) {
	for (int i = 0; i < (seq1.getElements().size()-1); i++) {
	  resultSeqElements.addElement(seq1.getElements().elementAt(i));
	}
	resultSeqElements.addElement(Element.merge(lastElementSeq1, lastElementSeq2));
	mergeResult.addElement(resultSeq);

	return mergeResult;
	//if last two events/items belong to different elements/itemsets
      } else {
	for (int i = 0; i < (seq1.getElements().size()); i++) {
	  resultSeqElements.addElement(seq1.getElements().elementAt(i));
	}
	resultSeqElements.addElement(lastElementSeq2);
	mergeResult.addElement(resultSeq);

	return mergeResult;
      }
    }
  }

  /**
   * Converts a set of 1-Elements into a set of 1-Sequences.
   * 
   * @param elements 		the set of 1-Elements
   * @return 			the set of 1-Sequences
   */
  public static FastVector oneElementsToSequences(FastVector elements) {
    FastVector sequences = new FastVector();
    Enumeration elementEnum = elements.elements();

    while (elementEnum.hasMoreElements()) {
      Sequence seq = new Sequence();
      FastVector seqElements = seq.getElements();
      seqElements.addElement(elementEnum.nextElement());
      sequences.addElement(seq);
    }
    return sequences;
  }

  /**
   * Prints a set of Sequences as String output.
   * 
   * @param setOfSequences	the set of sequences
   */
  public static void printSetOfSequences(FastVector setOfSequences) {
    Enumeration seqEnum = setOfSequences.elements();
    int i = 1;

    while(seqEnum.hasMoreElements()) {
      Sequence seq = (Sequence) seqEnum.nextElement();
      System.out.print("[" + i++ + "]" + " " + seq.toString());
    }
  }

  /**
   * Prunes a k-Sequence of a given candidate set if one of its (k-1)-Sequences 
   * is infrequent.
   * 
   * @param allCandidates 	the set of all potential k-Sequences
   * @param kMinusOneSequences 	the set of (k-1)-Sequences for verification
   * @return 			the set of the pruned candidates
   */
  protected static FastVector pruneCadidates(FastVector allCandidates, FastVector kMinusOneSequences) {
    FastVector prunedCandidates = new FastVector();
    boolean isFrequent;
    //for each candidate
    for (int i = 0; i < allCandidates.size(); i++) {
      Sequence candidate = (Sequence) allCandidates.elementAt(i);
      isFrequent = true;
      FastVector canElements = candidate.getElements();
      //generate each possible (k-1)-sequence and verify if it's frequent
      for (int j = 0; j < canElements.size(); j++) {
	if(isFrequent) {
	  Element origElement = (Element) canElements.elementAt(j);
	  int[] origEvents = origElement.getEvents();

	  for (int k = 0; k < origEvents.length; k++) {
	    if (origEvents[k] > -1) {
	      int helpEvent = origEvents[k];
	      origEvents[k] = -1;

	      if (origElement.isEmpty()) {
		canElements.removeElementAt(j);
		//check if the (k-1)-sequence is contained in the set of kMinusOneSequences
		int containedAt = kMinusOneSequences.indexOf(candidate);
		if (containedAt != -1) {
		  origEvents[k] = helpEvent;
		  canElements.insertElementAt(origElement, j);
		  break;
		} else {
		  isFrequent = false;
		  break;
		}
	      } else {
		//check if the (k-1)-sequence is contained in the set of kMinusOneSequences
		int containedAt = kMinusOneSequences.indexOf(candidate);
		if (containedAt != -1) {
		  origEvents[k] = helpEvent;
		  continue;
		} else {
		  isFrequent = false;
		  break;
		}
	      }
	    }
	  }
	} else {
	  break;
	}
      }
      if (isFrequent) {
	prunedCandidates.addElement(candidate);
      }
    }
    return prunedCandidates;
  }

  /**
   * Returns a String representation of a set of Sequences where the numeric 
   * value of each event/item is represented by its respective nominal value.
   * 
   * @param setOfSequences 	the set of Sequences
   * @param dataSet 		the corresponding data set containing the header 
   * 				information
   * @param filterAttributes	the attributes to filter out
   * @return 			the String representation
   */
  public static String setOfSequencesToString(FastVector setOfSequences, Instances dataSet, FastVector filterAttributes) {
    StringBuffer resString = new StringBuffer();
    Enumeration SequencesEnum = setOfSequences.elements();
    int i = 1;
    boolean printSeq;

    while(SequencesEnum.hasMoreElements()) {
      Sequence seq = (Sequence) SequencesEnum.nextElement();
      Integer filterAttr = (Integer) filterAttributes.elementAt(0);
      printSeq = true;

      if (filterAttr.intValue() != -1) {
	for (int j=0; j < filterAttributes.size(); j++) {
	  filterAttr = (Integer) filterAttributes.elementAt(j);
	  FastVector seqElements = seq.getElements();

	  if (printSeq) {
	    for (int k=0; k < seqElements.size(); k++) {
	      Element currentElement = (Element) seqElements.elementAt(k);
	      int[] currentEvents = currentElement.getEvents();

	      if (currentEvents[filterAttr.intValue()] != -1) {
		continue;
	      } else {
		printSeq = false;
		break;
	      }
	    }
	  }
	}
      }
      if (printSeq) {
	resString.append("[" + i++ + "]" + " " + seq.toNominalString(dataSet));
      }
    }
    return resString.toString();
  }

  /**
   * Updates the support count of a set of Sequence candidates according to a 
   * given set of data sequences.
   * 
   * @param candidates 		the set of candidates
   * @param dataSequences 	the set of data sequences
   */
  public static void updateSupportCount(FastVector candidates, FastVector dataSequences) {
    Enumeration canEnumeration = candidates.elements();

    while(canEnumeration.hasMoreElements()){
      Enumeration dataSeqEnumeration = dataSequences.elements();
      Sequence candidate = (Sequence) canEnumeration.nextElement();

      while(dataSeqEnumeration.hasMoreElements()) {
	Instances dataSequence = (Instances) dataSeqEnumeration.nextElement();

	if (candidate.isSubsequenceOf(dataSequence)) {
	  candidate.setSupportCount(candidate.getSupportCount() + 1);
	}
      }
    }
  }

  /**
   * Returns a deep clone of a Sequence.
   * 
   * @return 		the cloned Sequence
   */
  public Sequence clone() {
    try {
      Sequence clone = (Sequence) super.clone();

      clone.setSupportCount(m_SupportCount);
      FastVector cloneElements = new FastVector(m_Elements.size());

      for (int i = 0; i < m_Elements.size(); i++) {
	Element helpElement = (Element) m_Elements.elementAt(i);
	cloneElements.addElement(helpElement.clone());
      }
      clone.setElements(cloneElements);

      return clone;
    } catch (CloneNotSupportedException exc) {
      exc.printStackTrace();
    }
    return null;
  }

  /**
   * Deletes either the first or the last event/item of a Sequence. If the 
   * deleted event/item is the only value in the Element, it is removed, as well.
   * 
   * @param position 		the position of the event/item (first or last)
   * @return 			the Sequence with either the first or the last 
   * 				event/item deleted
   */
  protected Sequence deleteEvent(String position) {
    Sequence cloneSeq = clone();

    if (position.equals("first")) {
      Element element = (Element) cloneSeq.getElements().firstElement();
      element.deleteEvent("first");
      if (element.isEmpty()) {
	cloneSeq.getElements().removeElementAt(0);
      }
      return cloneSeq;
    }
    if (position.equals("last")) {
      Element element = (Element) cloneSeq.getElements().lastElement();
      element.deleteEvent("last");
      if (element.isEmpty()) {
	cloneSeq.getElements().removeElementAt(m_Elements.size()-1);
      }
      return cloneSeq;
    }
    return null;
  }

  /**
   * Checks if two Sequences are equal.
   * 
   * @return 			true, if the two Sequences are equal, else false
   */
  public boolean equals(Object obj) {
    Sequence seq2 = (Sequence) obj;
    FastVector seq2Elements = seq2.getElements();

    for (int i = 0; i < m_Elements.size(); i++) {
      Element thisElement = (Element) m_Elements.elementAt(i);
      Element seq2Element = (Element) seq2Elements.elementAt(i);
      if (!thisElement.equals(seq2Element)) {
	return false;
      }
    }
    return true;
  }

  /**
   * Returns the Elements of the Sequence.
   * 
   * @return 			the Elements
   */
  protected FastVector getElements() {
    return m_Elements;
  }

  /**
   * Returns the support count of the Sequence.
   * 
   * @return 			the support count
   */
  protected int getSupportCount() {
    return m_SupportCount;
  }

  /**
   * Checks if the Sequence is subsequence of a given data sequence.
   * 
   * @param dataSequence 	the data sequence to verify against
   * @return 			true, if the Sequnce is subsequence of the data 
   * 				sequence, else false
   */
  protected boolean isSubsequenceOf(Instances dataSequence) {
    FastVector elements = getElements();
    Enumeration elementEnum = elements.elements();
    Element curElement = (Element) elementEnum.nextElement();

    for (int i = 0; i < dataSequence.numInstances(); i++) {
      if (curElement.isContainedBy(dataSequence.instance(i))) {
	if (!elementEnum.hasMoreElements()) {
	  return true;
	} else {
	  curElement = (Element) elementEnum.nextElement();
	  continue;
	}
      }
    }
    return false;
  }

  /**
   * Sets the Elements of the Sequence.
   * 
   * @param elements 		the Elements to set
   */
  protected void setElements(FastVector elements) {
    m_Elements = elements;
  }

  /**
   * Sets the support count of the Sequence.
   * 
   * @param supportCount 	the support count to set
   */
  protected void setSupportCount(int supportCount) {
    m_SupportCount = supportCount;
  }

  /**
   * Returns a String representation of a Sequences where the numeric value 
   * of each event/item is represented by its respective nominal value.
   * 
   * @param dataSet 		the corresponding data set containing the header 
   * 				information
   * @return 			the String representation
   */
  public String toNominalString(Instances dataSet) {
    String result = "";

    result += "<";

    for (int i = 0; i < m_Elements.size(); i++) {
      Element element = (Element) m_Elements.elementAt(i);
      result += element.toNominalString(dataSet);
    }
    result += "> (" + getSupportCount() + ")\n";

    return result;
  }

  /**
   * Returns a String representation of a Sequence.
   * 
   * @return 			the String representation
   */
  public String toString() {
    String result = "";

    result += "Sequence Output\n";
    result += "------------------------------\n";
    result += "Support Count: " + getSupportCount() + "\n";
    result += "contained elements/itemsets:\n";

    for (int i = 0; i < m_Elements.size(); i++) {
      Element element = (Element) m_Elements.elementAt(i);
      result += element.toString();
    }
    result += "\n\n";

    return result;
  }
  
  /**
   * Returns the revision string.
   * 
   * @return		the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.2 $");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy