![JAR search and dependency download from the Maven repository](/logo.png)
com.github.TKnudsen.ComplexDataObject.model.tools.WekaConversion Maven / Gradle / Ivy
package com.github.TKnudsen.ComplexDataObject.model.tools;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import com.github.TKnudsen.ComplexDataObject.data.complexDataObject.ComplexDataContainer;
import com.github.TKnudsen.ComplexDataObject.data.complexDataObject.ComplexDataObject;
import com.github.TKnudsen.ComplexDataObject.data.features.AbstractFeatureVector;
import com.github.TKnudsen.ComplexDataObject.data.features.Feature;
import com.github.TKnudsen.ComplexDataObject.data.features.FeatureContainer;
import com.github.TKnudsen.ComplexDataObject.data.features.FeatureType;
import com.github.TKnudsen.ComplexDataObject.data.features.FeatureVectorContainerTools;
import com.github.TKnudsen.ComplexDataObject.data.features.mixedData.MixedDataFeatureVector;
import com.github.TKnudsen.ComplexDataObject.data.features.numericalData.NumericalFeatureVector;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
/**
*
* Title: WekaConversion
*
*
*
* Description: helper tools that ease the use of WEKA data structures, i.e., Instances and Instance objects.
*
*
*
* Copyright: Copyright (c) 2016-2017
*
*
* @author Juergen Bernard
* @version 1.06
*/
public class WekaConversion {
public static Instances getInstances(ComplexDataContainer container) {
if (container == null)
return null;
List attrs = new ArrayList();
Map attributeMap = new HashMap<>();
int dims = container.getAttributeNames().size();
if (!container.getAttributeNames().contains("Name")) {
dims++;
Attribute a = new Attribute("Name", (List) null);
attrs.add(a);
attributeMap.put("Name", a);
}
if (!container.getAttributeNames().contains("Description")) {
dims++;
Attribute a = new Attribute("Description", (List) null);
attrs.add(a);
attributeMap.put("Description", a);
}
for (Iterator iterator = container.getAttributeNames().iterator(); iterator.hasNext();) {
String string = iterator.next();
Attribute a = null;
if (container.isNumeric(string))
a = new Attribute(string);
else if (container.isBoolean(string))
a = new Attribute(string);
else
a = new Attribute(string, (List) null);
attrs.add(a);
attributeMap.put(string, a);
}
Instances instances = new Instances("ComplexDataContainer " + container.toString(), (ArrayList) attrs, container.size());
// create instance objects
for (ComplexDataObject cdo : container) {
Instance instance = new DenseInstance(dims);
Iterator attNames = cdo.iterator();
while (attNames.hasNext()) {
String attName = attNames.next();
Attribute attribute = attributeMap.get(attName);
Object value = cdo.getAttribute(attName);
if (container.isNumeric(attName)) {
if (value != null)
instance.setValue(attribute, ((Number) value).doubleValue());
else
instance.setMissing(attribute);
} else if (container.isBoolean(attName)) {
if (value != null) {
Integer i = ((Boolean) value).booleanValue() ? 1 : 0;
instance.setValue(attribute, ((Number) i).doubleValue());
} else
instance.setMissing(attribute);
} else if (value != null)
instance.setValue(attribute, value.toString());
else
instance.setMissing(attribute);
}
Attribute nameAttribute = attributeMap.get("Name");
instance.setValue(nameAttribute, cdo.getName());
Attribute descripptionAttribute = attributeMap.get("Description");
instance.setValue(descripptionAttribute, cdo.getDescription());
instances.add(instance);
}
return instances;
}
/**
*
* @param fvs
* @return
* @deprecated use
*/
public static , FV extends AbstractFeatureVector> Instances getInstances(FeatureContainer featureContainer) {
List attrs = new ArrayList(featureContainer.getFeatureNames().size());
for (String featureName : featureContainer.getFeatureNames()) {
Attribute a = null;
if (featureContainer.isNumeric(featureName))
a = new Attribute(featureName);
else
a = new Attribute(featureName, (List) null);
attrs.add(a);
}
Instances instances = new Instances("asdf", (ArrayList) attrs, featureContainer.size());
addInstances(featureContainer, instances);
return instances;
}
/**
*
* @param featureContainer
* @param stringToNominal
* @return
*/
public static , FV extends AbstractFeatureVector> Instances getInstances(FeatureContainer featureContainer, boolean stringToNominal) {
List attributes = createAttributes(FeatureVectorContainerTools.getObjectList(featureContainer), stringToNominal);
// List attrs = new
// ArrayList(featureContainer.getFeatureNames().size());
// for (String featureName : featureContainer.getFeatureNames()) {
// Attribute a = null;
// if (featureContainer.isNumeric(featureName))
// a = new Attribute(featureName);
// else
// a = new Attribute(featureName, (List) null);
// attrs.add(a);
// }
Instances instances = new Instances("asdf", (ArrayList) attributes, featureContainer.size());
addInstances(featureContainer, instances);
return instances;
}
/**
*
* @param fvs
* @param stringToNominal
* decides whether string values are represented as nominal values (with a concrete alphabet of observations)
* @return
*/
public static >> Instances getInstances(List fvs, boolean stringToNominal) {
return getInstances(fvs, stringToNominal, null);
}
public static >> Instances getInstances(List fvs, boolean stringToNominal, Map> featureAlphabet) {
if (fvs == null)
return null;
if (fvs.size() == 0)
return null;
List attrs;
if (featureAlphabet == null)
attrs = createAttributes(fvs, stringToNominal);
else
attrs = createAttributes(fvs, featureAlphabet);
Instances data = new Instances(fvs.get(0).getClass().getName(), (ArrayList) attrs, fvs.size());
addInstances(fvs, data);
return data;
}
/**
* creates a list of WEKA attributes for a given list of FVs.
*
* @param fvs
* @param stringToNominal
* @return
*/
private static >> List createAttributes(List fvs, boolean stringToNominal) {
if (fvs == null)
return null;
if (fvs.size() == 0)
return new ArrayList<>();
int length = fvs.get(0).getDimensions();
List attributes = new ArrayList(length);
for (int i = 0; i < length; i++) {
Attribute a = null;
if (fvs.get(0).getFeature(i).getFeatureType().equals(FeatureType.DOUBLE))
a = new Attribute(i + 1 + "");
else if (fvs.get(0).getFeature(i).getFeatureType().equals(FeatureType.BOOLEAN))
a = new Attribute(i + 1 + "");
else if (!stringToNominal)
a = new Attribute(i + 1 + "", (List) null);
else {
// collect alphabet
SortedSet alphabet = new TreeSet<>();
for (FV fv : fvs)
if (fv.getDimensions() == length)
if (fv.getFeature(i) != null && fv.getFeature(i).getFeatureValue() != null)
alphabet.add(fv.getFeature(i).getFeatureValue().toString());
else {
Feature feature = fv.getFeature(fvs.get(0).getFeature(i).getFeatureName());
if (feature != null && feature.getFeatureValue() != null)
alphabet.add(feature.getFeatureValue().toString());
}
a = new Attribute(i + 1 + "", new ArrayList<>(alphabet));
}
attributes.add(a);
}
return attributes;
}
private static >> List createAttributes(List fvs, Map> featureAlphabet) {
if (fvs == null)
return null;
if (fvs.size() == 0)
return new ArrayList<>();
int length = fvs.get(0).getDimensions();
List attributes = new ArrayList(length);
for (int i = 0; i < length; i++) {
Attribute a = null;
if (fvs.get(0).getFeature(i).getFeatureType().equals(FeatureType.DOUBLE))
a = new Attribute(i + 1 + "");
else if (fvs.get(0).getFeature(i).getFeatureType().equals(FeatureType.BOOLEAN))
a = new Attribute(i + 1 + "");
else if (!featureAlphabet.containsKey(fvs.get(0).getFeature(i).getFeatureName()))
a = new Attribute(i + 1 + "", (List) null);
else {
a = new Attribute(i + 1 + "", new ArrayList<>(featureAlphabet.get(fvs.get(0).getFeature(i).getFeatureName())));
}
attributes.add(a);
}
return attributes;
}
public static >> void addInstances(List fvs, Instances data) {
if (fvs == null || fvs.size() == 0)
return;
int dim = fvs.get(0).getVectorRepresentation().size();
for (FV fv : fvs) {
int length = fv.getVectorRepresentation().size();
if (dim != length)
throw new IllegalArgumentException("List of input FV has different features.");
data.add(new DenseInstance(length));
Instance ins = data.get(data.size() - 1);
fillInstanceByIndex(ins, fv, length);
}
}
public static >> void addInstances(FeatureContainer featureContainer, Instances instances) {
if (featureContainer == null || featureContainer.size() == 0)
return;
for (FV fv : featureContainer) {
instances.add(new DenseInstance(fv.getDimensions()));
Instance ins = instances.get(instances.size() - 1);
fillInstanceByIndex(ins, fv, fv.getDimensions());
}
}
public static void addMixedInstances(List mfvs, Instances data) {
for (MixedDataFeatureVector mfv : mfvs) {
int length = mfv.getVectorRepresentation().size();
data.add(new DenseInstance(length));
Instance ins = data.get(data.size() - 1);
fillInstanceByIndex(ins, mfv, length);
}
}
/**
*
* @param fvs
* @param classAttribute
* attribute in the features with the class information. Note: the Weka class attribute will be 'class', though.
* @return
*/
public static Instances getLabeledInstancesNumerical(List fvs, String classAttribute) {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy