All Downloads are FREE. Search and download functionalities are using the official Maven repository.

moa.clusterers.clustree.Entry Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

There is a newer version: 2024.07.0
Show newest version
/*
 *    Entry.java
 *    Copyright (C) 2010 RWTH Aachen University, Germany
 *    @author Sanchez Villaamil ([email protected])
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 *    
 *    
 */
package moa.clusterers.clustree;

import java.io.Serializable;

public class Entry implements Serializable {

    /**
     * The actual entry data.
     */
    public ClusKernel data;
    /**
     * The buffer of this entry. It can also be seen as the buffer of the child
     * node, it is here just to simplify the insertion recuersion.
     */
    private ClusKernel buffer;
    /**
     * A reference to the next node in the tree. null if we are
     * at a leaf, or this is an entry is part of a lying Node.
     */
    private Node child;
    /**
     *	A reference to the Entry's parent Entry
     */
    private Entry parentEntry;
    /**
     *	A reference to the Node, that contains this Entry
     */
    private Node node;
	/**
     * Last time this entry was changed.
     */
    private long timestamp;
    /**
     * The timestamp to be used when no operation has yet been done on this
     * entry.
     * @see #timestamp
     */
    private static final long defaultTimestamp = 0;

    /**
     * Constructor for the entry. To be used when we want to create an empty
     * entry. Notice that the timestamp will be set to zero, since there is no
     * reason to know when an empty entry was generated.
     * @param numberDimensions The dimensionality of the data point in tree
     * where this entry is used.
     */
    public Entry(int numberDimensions) {
        this.data = new ClusKernel(numberDimensions);
        this.buffer = new ClusKernel(numberDimensions);
        this.child = null;
        this.timestamp = Entry.defaultTimestamp;
    }

    /**
     * Constructor that creates an Entry that points to the given
     * node. The values of data will be calculated for this.
     * @param numberDimensions The dimensionality of the node.
     * @param node The node to which the new Entry should point.
     * @param currentTime The timestamp for the moment where this Entry was
     * was generated.
     * @see Node
     * @see #data
     */
    protected Entry(int numberDimensions,
            Node node, long currentTime, Entry parentEntry, Node containerNode) {
        this(numberDimensions);
        this.child = node;
        this.parentEntry = parentEntry;
        this.node = containerNode;
        Entry[] entries = node.getEntries();
        for (int i = 0; i < entries.length; i++) {
            Entry entry = entries[i];
            entry.setParentEntry(this);
            if (entry.isEmpty()) {
                break;
            }

            this.add(entry);
        }

        this.timestamp = currentTime;
    }



    /**
     * Constructuctor that creates an Entry with an empty buffer
     * and the data given by the Kernel.
     * @param numberDimensions The dimensionality of the information in the
     * cluster.
     * @param cluster The cluster from which the information is to be extracted.
     * @param currentTime The timestamp for the moment where this Entry was
     * was generated.
     * @see Kernel
     * @see #data
     */
    public Entry(int numberDimensions, ClusKernel cluster, long currentTime) {
        this(numberDimensions);
        this.data.add(cluster);
        this.timestamp = currentTime;
    }
    /**
     * extended constructor with containerNode and parentEntry
     * @param numberDimensions
     * @param cluster
     * @param currentTime
     * @param parentEntry
     * @param containerNode
     */
    protected Entry(int numberDimensions, ClusKernel cluster, long currentTime, Entry parentEntry, Node containerNode) {
        this(numberDimensions);
        this.parentEntry = parentEntry;
        this.data.add(cluster);
        this.node = containerNode;
        this.timestamp = currentTime;
    }
    /**
     * Copy constructor. Everythin is copied, including the child.
     * @param other
     */
    protected Entry(Entry other) {
    	this.parentEntry = other.parentEntry;
    	this.node = other.node;
        this.buffer = new ClusKernel(other.buffer);
        this.data = new ClusKernel(other.data);
        this.timestamp = other.timestamp;
        this.child = other.child;
    	if (other.getChild()!=null)
	    	for (Entry e : other.getChild().getEntries()){
	    		e.setParentEntry(this);
	    	}
    }

    public Node getNode() {
		return node;
	}

	public void setNode(Node node) {
		this.node = node;
	}

	/**
     * Clear the Entry. All points in the buffer and in the data cluster are
     * lost, the connection to the child is lost and the timestamp is set to
     * the default value.
     */
    protected void clear() {
        this.data.clear();
        this.buffer.clear();
        this.child = null;
        this.timestamp = Entry.defaultTimestamp;
    }

    /**
     * Clear the data and the buffer Custer in this
     * entry. This function does not clear the child of this Entry.
     * @see #data
     * @see #buffer
     * @see Kernel
     */
    protected void shallowClear() {
        this.buffer.clear();
        this.data.clear();
    }

    /**
     * Calculates the distance to the data in this entry.
     * @param cluster The Kernel cluster to which the distance is to be
     * calculated.
     * @return The distance to the data Kernel in this
     * Entry
     * @see Kernel
     * @see #data
     */
    protected double calcDistance(ClusKernel cluster) {
        return data.calcDistance(cluster);
    }

    /**
     * Calculates the distance to the data in this entry of the data in the
     * given entry.
     * @param other The Entry to which the distance is to be
     * calculated.
     * @return The distance to the data Kernel in this
     * Entry of the data Kernel in the other
     * Entry.
     * @see Kernel
     * @see #data
     */
    public double calcDistance(Entry other) {
        return this.getData().calcDistance(other.getData());
    }

    /**
     * When this entry is empty, give it it's first values. It makes sense to
     * have this operation separated from the aggregation, because the
     * aggregation first weights the values in data and
     * Kernel, which makes no sense in an empty entry.
     * @param other The entry with the information to be used to initialize
     * this entry.
     * @param currentTime The time at which this is happening.
     */
    protected void initializeEntry(Entry other, long currentTime) {
        assert (this.isEmpty());
        assert (other.getBuffer().isEmpty());
        this.data.add(other.data);
        this.timestamp = currentTime;
        this.child = other.child;
        if (child!=null){
        	for (Entry e : child.getEntries()){
        		e.setParentEntry(this);
        	}
        }
    }

    /**
     * Add the data cluster of another entry to the data cluster of this entry.
     * By using this function the timestamp does not get updated, nor does this
     * entry get older.
     * @param other The entry of which the data cluster should be added to
     * the local data cluster.
     * @see #data
     * @see Kernel#add(tree.Kernel) 
     */
    public void add(Entry other) {
        this.data.add(other.data);
    }

    /**
     * Aggregate the data in the Kernel of the other
     * Entry.
     * @param other The Entry to be aggregated.
     * @see #data
     * @see Kernel
     */
    protected void aggregateEntry(Entry other, long currentTime,
            double negLambda) {
        this.data.aggregate(other.data, currentTime - this.timestamp,
                negLambda);
        this.timestamp = currentTime;
    }

    /**
     * Aggregate the given Kernel to the data cluster
     * of this entry.
     * @param otherData The Entry to be aggregated.
     * @see #data
     * @see Kernel
     */
    protected void aggregateCluster(ClusKernel otherData, long currentTime,
            double negLambda) {
        this.getData().aggregate(otherData, currentTime - this.timestamp,
                negLambda);
        this.timestamp = currentTime;
    }

    /**
     * Aggregate the given Kernel to the buffer
     * cluster of this entry.
     * @param pointToInsert The cluster to aggregate to the buffer.
     * @param currentTime The time at which the aggregation occurs.
     * @param negLambda A parameter needed to weight the current state of the
     * buffer.
     */
    protected void aggregateToBuffer(ClusKernel pointToInsert, long currentTime,
            double negLambda) {
        ClusKernel currentBuffer = this.getBuffer();
        currentBuffer.aggregate(pointToInsert, currentTime - this.timestamp,
                negLambda);
        this.timestamp = currentTime;
    }

    /**
     * Merge this entry witht the given Entry. This adds the data 
     * cluster of the given Entry to the data cluster of this entry and sets the 
     * timestamp to the newest one of the the two entries.
     * @param other The entry from which the data cluster is added.
     * @see Kernel#add(tree.Kernel) 
     */
    protected void mergeWith(Entry other) {
        // We should only merge entries in leafs, and leafes should have empty
        // buffers.
        assert (this.child == null);
        assert (other.child == null);
        assert (other.buffer.isEmpty());

        this.data.add(other.data);
        if (this.timestamp < other.timestamp) {
            this.timestamp = other.timestamp;
        }
    }

    /**
     * Getter for the buffer. It is the real object, that means side effects are
     * possible!
     * @return A reference to the buffer in this entry.
     */
    protected ClusKernel getBuffer() {
        return buffer;
    }

    /**
     * Return the reference to the child of this Entry to navigate
     * in the tree.
     * @return A reference to the child of this Entry
     */
    public Node getChild() {
        return child;
    }

    /**
     * Getter for the data. It is the real object, that means side effects are
     * possible!
     * @return A reference to the data Kernel in this entry.
     * @see Kernel
     */
    protected ClusKernel getData() {
        return data;
    }
    public Entry getParentEntry() {
		return parentEntry;
	}

	public void setParentEntry(Entry parent) {
		this.parentEntry = parent;
	}

    /**
     * Setter for the child in this entry. Use to build the tree.
     * @param child The Node that should be a child of this
     * Entry
     * @see Node
     */
    public void setChild(Node child) {
        this.child = child;
    }

    /**
     * Return the current timestamp.
     * @return The current timestamp.
     */
    public long getTimestamp() {
        return timestamp;
    }

    /**
     * Clear the buffer in this entry and return a copy. No side effects are
     * possible (given that the copy constructor of Kernel makes
     * a deep copy).
     * @return A copy of the buffer.
     */
    protected ClusKernel emptyBuffer(long currentTime, double negLambda) {
        this.buffer.makeOlder(currentTime - this.timestamp, negLambda);
        ClusKernel bufferCopy = new ClusKernel(this.buffer);
        this.buffer.clear();
        return bufferCopy;
    }

    /**
     * Check if this Entry is empty or not. An Entry
     * is empty if the data Kernel is empty, since then the buffer
     * has to be empty.
     * @return true if the data cluster has no data points,
     * false otherwise.
     */
    protected boolean isEmpty() {
        // Assert that if the data cluster is empty, the buffer cluster is
        // empty too.
        assert ((this.data.isEmpty() && this.buffer.isEmpty())
                || !this.data.isEmpty());

        return this.data.isEmpty();
    }

    /**
     * Overwrites the LS, SS and weightedN in the data cluster of this
     * Entry to the values of the data cluster in the given
     * Entry, but adds N and classCount of the cluster in the given
     * Entry to the data cluster in this one. This function is useful when the
     * weight of an entry becomes to small, and we want to forget the
     * information of the old points.
     * @param newEntry The cluster that should overwrite the information.
     */
    protected void overwriteOldEntry(Entry newEntry) {
        assert (this.getBuffer().isEmpty());
        assert (newEntry.getBuffer().isEmpty());
        this.data.overwriteOldCluster(newEntry.data);
        newEntry.setParentEntry(this.parentEntry);
        if (newEntry.getChild()!=null)
        for (Entry e : newEntry.getChild().getEntries())
        	e.setParentEntry(this);
        //this.setParentEntry(newEntry.getParentEntry());
        this.child=newEntry.child;
    }

    /**
     * This functions reads every entry in the child node and calculates the
     * corresponding data Kernel. Timestamps are not changed.
     * @see #data
     * @see Kerne
     */
    protected void recalculateData() {
        Node currentChild = this.getChild();
        if (currentChild != null) {
            ClusKernel currentData = this.getData();
            currentData.clear();
            Entry[] entries = currentChild.getEntries();
            for (int i = 0; i < entries.length; i++) {
                currentData.add(entries[i].getData());
            }
        } else {
            this.clear();
        }
    }

    /**
     * Returns true if this entry is irrelevant with respecto the given
     * threshold. This is done by comparing the weighted N of this Entry to
     * the threshold, if it is smaller, than the entry is deemed to be
     * irrelevant.
     * @param threshold The threshold under which entries at leafs can be
     * erased.
     * @return True if this entry is deemed irrelevant, false otherwise.
     */
    protected boolean isIrrelevant(double threshold) {
        return this.getData().getWeight() < threshold;
    }

    /**
     * Ages this entrie's data AND buffer according to the given
     * time and aging constant.
     * @param currentTime   the current time
     * @param negLambda	    the aging constant
     */
    protected void makeOlder(long currentTime, double negLambda) {
//        assert (currentTime > this.timestamp) : "currentTime : "
//                + currentTime + ", this.timestamp: " + this.timestamp;

        long diff = currentTime - this.timestamp;
        this.buffer.makeOlder(diff, negLambda);
        this.data.makeOlder(diff, negLambda);
        this.timestamp = currentTime;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy