All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.TKnudsen.ComplexDataObject.model.tools.WekaConversion Maven / Gradle / Ivy

Go to download

A library that models real-world objects in Java, referred to as ComplexDataObjects. Other features: IO and preprocessing of ComplexDataObjects.

There is a newer version: 0.2.13
Show newest version
package com.github.TKnudsen.ComplexDataObject.model.tools;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.github.TKnudsen.ComplexDataObject.data.complexDataObject.ComplexDataContainer;
import com.github.TKnudsen.ComplexDataObject.data.complexDataObject.ComplexDataObject;
import com.github.TKnudsen.ComplexDataObject.data.features.AbstractFeatureVector;
import com.github.TKnudsen.ComplexDataObject.data.features.Feature;
import com.github.TKnudsen.ComplexDataObject.data.features.FeatureContainer;
import com.github.TKnudsen.ComplexDataObject.data.features.FeatureType;
import com.github.TKnudsen.ComplexDataObject.data.features.mixedData.MixedDataFeatureVector;
import com.github.TKnudsen.ComplexDataObject.data.features.numericalData.NumericalFeatureVector;

import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;

/**
 * 

* Title: WekaConversion *

* *

* Description: helper tools that ease the use of WEKA data structures, i.e., * Instances and Instance objects. *

* *

* Copyright: Copyright (c) 2016 *

* * @author Juergen Bernard * @version 1.04 */ public class WekaConversion { public static Instances getInstances(ComplexDataContainer container) { if (container == null) return null; List attrs = new ArrayList(); Map attributeMap = new HashMap<>(); int dims = container.getAttributeNames().size(); if (!container.getAttributeNames().contains("Name")) { dims++; Attribute a = new Attribute("Name", (List) null); attrs.add(a); attributeMap.put("Name", a); } if (!container.getAttributeNames().contains("Description")) { dims++; Attribute a = new Attribute("Description", (List) null); attrs.add(a); attributeMap.put("Description", a); } for (Iterator iterator = container.getAttributeNames().iterator(); iterator.hasNext();) { String string = iterator.next(); Attribute a = null; if (container.isNumeric(string)) a = new Attribute(string); else if (container.isBoolean(string)) a = new Attribute(string); else a = new Attribute(string, (List) null); attrs.add(a); attributeMap.put(string, a); } Instances instances = new Instances("ComplexDataContainer " + container.toString(), (ArrayList) attrs, container.size()); // create instance objects for (ComplexDataObject cdo : container) { Instance instance = new DenseInstance(dims); Iterator attNames = cdo.iterator(); while (attNames.hasNext()) { String attName = attNames.next(); Attribute attribute = attributeMap.get(attName); Object value = cdo.getAttribute(attName); if (container.isNumeric(attName)) { if (value != null) instance.setValue(attribute, ((Number) value).doubleValue()); } else if (container.isBoolean(attName)) { if (value != null) { Integer i = ((Boolean) value).booleanValue() ? 1 : 0; instance.setValue(attribute, ((Number) i).doubleValue()); } } else if (value != null) instance.setValue(attribute, value.toString()); } Attribute nameAttribute = attributeMap.get("Name"); instance.setValue(nameAttribute, cdo.getName()); Attribute descripptionAttribute = attributeMap.get("Description"); instance.setValue(descripptionAttribute, cdo.getDescription()); instances.add(instance); } return instances; } /** * * @param fvs * @return * @deprecated use */ public static >> Instances getInstances(List fvs) { int length = fvs.get(0).getDimensions(); List attrs = new ArrayList(length); for (int i = 0; i < length; i++) { Attribute a = null; if (fvs.get(0).getFeature(i).getFeatureType().equals(FeatureType.DOUBLE)) a = new Attribute(i + 1 + ""); else a = new Attribute(i + 1 + "", (List) null); attrs.add(a); } Instances data = new Instances(fvs.get(0).getClass().getName(), (ArrayList) attrs, fvs.size()); addInstances(fvs, data); return data; } /** * * @param fvs * @return * @deprecated use */ public static , FV extends AbstractFeatureVector> Instances getInstances(FeatureContainer featureContainer) { // FeatureContainer featureContainer = new // FeatureContainer<>(fvs); List attrs = new ArrayList(featureContainer.getFeatureNames().size()); for (String featureName : featureContainer.getFeatureNames()) { Attribute a = null; if (featureContainer.isNumeric(featureName)) a = new Attribute(featureName); else a = new Attribute(featureName, (List) null); attrs.add(a); } Instances instances = new Instances("asdf", (ArrayList) attrs, featureContainer.size()); addInstances(featureContainer, instances); return instances; } /** * * @param fvs * @param stringToNominal * decides whether string values are represented as nominal * values (with a concrete alphabet of observations) * @return */ public static >> Instances getInstances(List fvs, boolean stringToNominal) { // TODO System.out.println("WekaConversion.getInstances: unimplemented method. there is a difference between strings and nominals in WEKA which has to be considered in future"); System.exit(-1); int length = fvs.get(0).getDimensions(); List attrs = new ArrayList(length); for (int i = 0; i < length; i++) { Attribute a = null; if (fvs.get(0).getFeature(i).getFeatureType().equals(FeatureType.DOUBLE)) a = new Attribute(i + 1 + ""); else a = new Attribute(i + 1 + "", (List) null); attrs.add(a); } Instances data = new Instances(fvs.get(0).getClass().getName(), (ArrayList) attrs, fvs.size()); addInstances(fvs, data); return data; } public static >> void addInstances(List fvs, Instances data) { if (fvs == null || fvs.size() == 0) return; int dim = fvs.get(0).getVectorRepresentation().size(); for (FV fv : fvs) { int length = fv.getVectorRepresentation().size(); if (dim != length) throw new IllegalArgumentException("List of input FV has different features."); data.add(new DenseInstance(length)); Instance ins = data.get(data.size() - 1); List> vectorRepresentation = fv.getVectorRepresentation(); for (int i = 0; i < length; i++) { if (vectorRepresentation.get(i).getFeatureType() == FeatureType.DOUBLE) ins.setValue(i, (Double) vectorRepresentation.get(i).getFeatureValue()); else if (vectorRepresentation.get(i).getFeatureType() == FeatureType.STRING) { String str = vectorRepresentation.get(i).getFeatureValue().toString(); try { ins.setValue(i, str.toString()); } catch (Exception e) { ins.setValue(i, ""); } } else if (vectorRepresentation.get(i).getFeatureType() == FeatureType.BOOLEAN) { Boolean b = (Boolean) vectorRepresentation.get(i).getFeatureValue(); ins.setValue(i, b.toString()); } else { System.out.println(""); } // TODO check whether WEKA automatically maps string to nominal. } } } public static >> void addInstances(FeatureContainer featureContainer, Instances instances) { if (featureContainer == null || featureContainer.size() == 0) return; for (FV fv : featureContainer) { instances.add(new DenseInstance(fv.getDimensions())); Instance ins = instances.get(instances.size() - 1); for (String featureName : featureContainer.getFeatureNames()) { Feature feature = fv.getFeature(featureName); Attribute attribute = instances.attribute(featureName); if (feature.getFeatureType() == FeatureType.DOUBLE) { Number n = (Number) feature.getFeatureValue(); ins.setValue(attribute, n.doubleValue()); } else if (feature.getFeatureType() == FeatureType.STRING) { String string = feature.getFeatureValue().toString(); try { ins.setValue(attribute, string); } catch (Exception e) { ins.setValue(attribute, ""); } } else if (feature.getFeatureType() == FeatureType.BOOLEAN) { Boolean b = (Boolean) feature.getFeatureValue(); ins.setValue(attribute, b.toString()); } else { System.out.println(""); } // TODO check whether WEKA automatically maps string to nominal. } } } public static void addMixedInstances(List mfvs, Instances data) { for (MixedDataFeatureVector mfv : mfvs) { int length = mfv.getVectorRepresentation().size(); data.add(new DenseInstance(length)); Instance ins = data.get(data.size() - 1); for (int i = 0; i < length; i++) { if (mfv.getVectorRepresentation().get(i).getFeatureType() == FeatureType.DOUBLE) ins.setValue(i, (Double) mfv.getVectorRepresentation().get(i).getFeatureValue()); else if (mfv.getVectorRepresentation().get(i).getFeatureType() == FeatureType.STRING) { String str = (String) mfv.getVectorRepresentation().get(i).getFeatureValue(); ins.setValue(i, str); } } } } public static Instances getLabeledInstancesNumerical(List fvs, String classAttribute) { List labels = new ArrayList<>(); for (int i = 0; i < fvs.size(); i++) if (fvs.get(i).getAttribute(classAttribute) instanceof String) labels.add((String) fvs.get(i).getAttribute(classAttribute)); else labels.add(fvs.get(i).getAttribute(classAttribute).toString()); Instances insances = getInstances(fvs); return addLabelsToInstances(insances, labels); } /** * * @param fvs * @param classAttribute * @return */ public static >> Instances getLabeledInstances(List fvs, String classAttribute) { return getLabeledInstances(fvs, null, classAttribute); } /** * creates instances with weights for a given List of FVs. * * @param fvs * @param weights * @param classAttribute * @return */ public static >> Instances getLabeledInstances(List fvs, List weights, String classAttribute) { List labels = new ArrayList<>(); for (int i = 0; i < fvs.size(); i++) if (fvs.get(i).getAttribute(classAttribute) instanceof String) labels.add((String) fvs.get(i).getAttribute(classAttribute)); else labels.add(fvs.get(i).getAttribute(classAttribute).toString()); Instances insances = getInstances(fvs); if (weights != null && weights.size() == insances.size()) addWeightsToInstances(insances, weights); return addLabelsToInstances(insances, labels); } /** * Uses WEKAs ability to assign weights to Instances. * * @param instances * @param weights * @return */ private static Instances addWeightsToInstances(Instances instances, List weights) { if (instances == null || weights == null) return instances; if (instances.size() != weights.size()) throw new IllegalArgumentException(); for (int i = 0; i < weights.size(); i++) { double w = weights.get(i); if (Double.isNaN(w)) w = 0; instances.instance(i).setWeight(w); } return instances; } public static Instances getLabeledInstances(List fvs, List labels) { Instances insances = getInstances(fvs); return addLabelsToInstances(insances, labels); } public static Instances getNumericLabeledMixInstances(List mfvs, List numLabels) { Instances insances = getInstances(mfvs); return addNumericLabelsToInstances(insances, numLabels); } private static Instances addNumericLabelsToInstances(Instances insances, List numLabels) { Attribute classAtt = new Attribute("num"); insances.insertAttributeAt(classAtt, insances.numAttributes()); insances.setClassIndex(insances.numAttributes() - 1); for (int i = 0; i < numLabels.size(); i++) insances.instance(i).setValue(insances.numAttributes() - 1, numLabels.get(i)); return insances; } public static Instances addLabelAttributeToInstance(Instances instances, List labels) { List distinctLabels = distinctListCreator(labels); Attribute classAtt = new Attribute("class", distinctLabels); instances.insertAttributeAt(classAtt, instances.numAttributes()); instances.setClass(classAtt); instances.setClassIndex(instances.numAttributes() - 1); return instances; } public static Instances addNumericLabelAttributeToInstance(Instances insances) { Attribute classAtt = new Attribute("num"); insances.insertAttributeAt(classAtt, insances.numAttributes()); insances.setClass(classAtt); insances.setClassIndex(insances.numAttributes() - 1); return insances; } private static Instances addLabelsToInstances(Instances instances, List labels) { Instances inst2 = addLabelAttributeToInstance(instances, labels); for (int i = 0; i < labels.size(); i++) inst2.instance(i).setClassValue(labels.get(i)); return inst2; } /** * Improve this piece of code! * * @param list * @return */ public static List distinctListCreator(List list) { List distinctList = new ArrayList(); if (list == null) return distinctList; for (String str : list) if (!distinctList.contains(str)) distinctList.add(str); return distinctList; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy