![JAR search and dependency download from the Maven repository](/logo.png)
streams.weka.Dataset Maven / Gradle / Ivy
/**
*
*/
package streams.weka;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import stream.Data;
import stream.Keys;
import stream.io.Stream;
import weka.core.Attribute;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.ProtectedProperties;
/**
* @author Christian Bockermann
*
*/
public class Dataset {
static Logger log = LoggerFactory.getLogger(Dataset.class);
final List items = new ArrayList();
final Set attributes = new LinkedHashSet();
Instances instances;
public Dataset(Collection items) {
this(items, null);
}
public Dataset(Collection items, Set features) {
log.debug("Creating dataset from {} items", items.size());
ArrayList attributes = new ArrayList();
Attribute classAttribute = null;
Keys keys = new Keys("*");
Iterator it = items.iterator();
Data item = it.next();
for (String key : keys.select(item.keySet())) {
if (features != null && !features.contains(key)) {
log.debug("skipping feature '{}'", key);
continue;
}
if (key.startsWith("@")) {
if (key.equals("@label")) {
List vals = new ArrayList();
vals.add("gamma");
vals.add("proton");
log.debug("Adding nominal attribute '{}' with values {}", key, vals);
Attribute nom = new Attribute(key, vals, new ProtectedProperties(new Properties()));
attributes.add(nom);
classAttribute = nom;
this.attributes.add(key);
}
continue;
}
Serializable value = item.get(key);
if (value instanceof Number) {
log.debug("Adding new numeric attribute {}", key);
Attribute a = new Attribute(key);
attributes.add(a);
this.attributes.add(key);
continue;
}
log.debug("Adding new string attribute {}", key);
List vals = new ArrayList();
vals.add(value.toString());
Attribute a = new Attribute(key, (List) null, new ProtectedProperties(new Properties()));
attributes.add(a);
}
Instances instances = new Instances("DataSet", attributes, 1000);
instances.setClass(classAttribute);
while (item != null && it.hasNext()) {
Instance instance = WekaUtils.createInstance(attributes, item);
instances.add(instance);
this.items.add(item);
item = it.next();
}
this.instances = instances;
}
public Instances instances() {
return instances;
}
public int size() {
return items.size();
}
public Set features() {
return Collections.unmodifiableSet(this.attributes);
}
public static Instances readInstances(Stream stream, Keys keys) throws Exception {
log.info("Reading instances from {}", stream);
ArrayList attributes = new ArrayList();
Data item = stream.read();
for (String key : keys.select(item.keySet())) {
Serializable value = item.get(key);
if (value instanceof Number) {
log.info("Adding new numeric attribute {}", key);
Attribute a = new Attribute(key);
attributes.add(a);
continue;
}
if (key.equals("@label")) {
List vals = new ArrayList();
vals.add("gamma");
vals.add("proton");
log.info("Adding nominal attribute '{}' with values {}", key, vals);
Attribute nom = new Attribute(key, vals, new ProtectedProperties(new Properties()));
attributes.add(nom);
continue;
}
log.info("Adding new string attribute {}", key);
List vals = new ArrayList();
vals.add(value.toString());
Attribute a = new Attribute(key, (List) null, new ProtectedProperties(new Properties()));
attributes.add(a);
}
Instances instances = new Instances("DataSet[" + stream.getId() + "]", attributes, 1000);
while (item != null) {
Instance instance = WekaUtils.createInstance(attributes, item);
instances.add(instance);
item = stream.read();
}
log.info("Read {} instances.", instances.size());
return instances;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy