All Downloads are FREE. Search and download functionalities are using the official Maven repository.

streams.weka.Dataset Maven / Gradle / Ivy

/**
 * 
 */
package streams.weka;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import stream.Data;
import stream.Keys;
import stream.io.Stream;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.ProtectedProperties;

/**
 * @author Christian Bockermann
 *
 */
public class Dataset {

    static Logger log = LoggerFactory.getLogger(Dataset.class);

    final List items = new ArrayList();
    final Set attributes = new LinkedHashSet();
    Instances instances;

    public Dataset(Collection items) {
        this(items, null);
    }

    public Dataset(Collection items, Set features) {
        log.debug("Creating dataset from {} items", items.size());
        ArrayList attributes = new ArrayList();

        Attribute classAttribute = null;
        Keys keys = new Keys("*");
        Iterator it = items.iterator();
        Data item = it.next();
        for (String key : keys.select(item.keySet())) {

            if (features != null && !features.contains(key)) {
                log.debug("skipping feature '{}'", key);
                continue;
            }

            if (key.startsWith("@")) {
                if (key.equals("@label")) {
                    List vals = new ArrayList();
                    vals.add("gamma");
                    vals.add("proton");
                    log.debug("Adding nominal attribute '{}' with values {}", key, vals);
                    Attribute nom = new Attribute(key, vals, new ProtectedProperties(new Properties()));
                    attributes.add(nom);
                    classAttribute = nom;
                    this.attributes.add(key);
                }
                continue;
            }

            Serializable value = item.get(key);
            if (value instanceof Number) {
                log.debug("Adding new numeric attribute {}", key);
                Attribute a = new Attribute(key);
                attributes.add(a);
                this.attributes.add(key);
                continue;
            }

            log.debug("Adding new string attribute {}", key);
            List vals = new ArrayList();
            vals.add(value.toString());
            Attribute a = new Attribute(key, (List) null, new ProtectedProperties(new Properties()));
            attributes.add(a);
        }

        Instances instances = new Instances("DataSet", attributes, 1000);
        instances.setClass(classAttribute);

        while (item != null && it.hasNext()) {
            Instance instance = WekaUtils.createInstance(attributes, item);
            instances.add(instance);
            this.items.add(item);
            item = it.next();
        }

        this.instances = instances;
    }

    public Instances instances() {
        return instances;
    }

    public int size() {
        return items.size();
    }

    public Set features() {
        return Collections.unmodifiableSet(this.attributes);
    }

    public static Instances readInstances(Stream stream, Keys keys) throws Exception {
        log.info("Reading instances from {}", stream);
        ArrayList attributes = new ArrayList();

        Data item = stream.read();
        for (String key : keys.select(item.keySet())) {

            Serializable value = item.get(key);
            if (value instanceof Number) {
                log.info("Adding new numeric attribute {}", key);
                Attribute a = new Attribute(key);
                attributes.add(a);
                continue;
            }

            if (key.equals("@label")) {
                List vals = new ArrayList();
                vals.add("gamma");
                vals.add("proton");
                log.info("Adding nominal attribute '{}' with values {}", key, vals);
                Attribute nom = new Attribute(key, vals, new ProtectedProperties(new Properties()));
                attributes.add(nom);
                continue;
            }

            log.info("Adding new string attribute {}", key);
            List vals = new ArrayList();
            vals.add(value.toString());
            Attribute a = new Attribute(key, (List) null, new ProtectedProperties(new Properties()));
            attributes.add(a);
        }

        Instances instances = new Instances("DataSet[" + stream.getId() + "]", attributes, 1000);

        while (item != null) {
            Instance instance = WekaUtils.createInstance(attributes, item);
            instances.add(instance);
            item = stream.read();
        }

        log.info("Read {} instances.", instances.size());
        return instances;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy