All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.labs.samoa.instances.SamoaToWekaInstanceConverter Maven / Gradle / Ivy

Go to download

Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.

There is a newer version: 2024.07.0
Show newest version
/*
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * 	        http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied. See the License for the specific
 * language governing permissions and limitations under the
 * License.  
 */
package com.yahoo.labs.samoa.instances;

import java.util.ArrayList;
import java.io.Serializable;

/**
 * The Class SamoaToWekaInstanceConverter.
 *
 * @author abifet
 */
public class SamoaToWekaInstanceConverter implements Serializable {

    /** The weka instance information. */
    protected weka.core.Instances wekaInstanceInformation;

     /**
     * Weka instance.
     *
     * @param inst the inst
     * @return the weka.core. instance
     */
    public weka.core.Instance wekaInstance(Instance inst) {
        weka.core.Instance wekaInstance;
        if (((InstanceImpl) inst).instanceData instanceof SparseInstanceData) {
            InstanceImpl instance = (InstanceImpl) inst;
            SparseInstanceData sparseInstanceData = (SparseInstanceData) instance.instanceData;
            wekaInstance = new weka.core.SparseInstance(instance.weight(), sparseInstanceData.getAttributeValues(),
                    sparseInstanceData.getIndexValues(), sparseInstanceData.getNumberAttributes());
            /*if (this.wekaInstanceInformation == null) {
                this.wekaInstanceInformation = this.wekaInstancesInformation(inst.dataset());
            }
            wekaInstance.insertAttributeAt(inst.classIndex());
            wekaInstance.setDataset(wekaInstanceInformation);
            wekaInstance.setClassValue(inst.classValue());
            //wekaInstance.setValueSparse(wekaInstance.numAttributes(), inst.classValue());*/
        } else {
            Instance instance = inst;
            wekaInstance = new weka.core.DenseInstance(instance.weight(), instance.toDoubleArray());
           /* if (this.wekaInstanceInformation == null) {
                this.wekaInstanceInformation = this.wekaInstancesInformation(inst.dataset());
            }
            //We suppose that the class is the last attibute. We should deal when this is not the case
            wekaInstance.insertAttributeAt(inst.classIndex());
            wekaInstance.setDataset(wekaInstanceInformation);
            wekaInstance.setClassValue(inst.classValue());*/
        }
        if (this.wekaInstanceInformation == null) {
            this.wekaInstanceInformation = this.wekaInstancesInformation(inst.dataset());
        }
        //wekaInstance.insertAttributeAt(inst.classIndex());
        wekaInstance.setDataset(wekaInstanceInformation);
        if (inst.numOutputAttributes() == 1){
            wekaInstance.setClassValue(inst.classValue());
        }
        
        return wekaInstance;
    }
    
     /**
     * Weka instances.
     *
     * @param instances the instances
     * @return the weka.core. instances
     */
    public weka.core.Instances wekaInstances(Instances instances) {
        weka.core.Instances wekaInstances = wekaInstancesInformation(instances);
        //We assume that we have only one WekaInstanceInformation for SamoaToWekaInstanceConverter
        this.wekaInstanceInformation = wekaInstances;
        for (int i = 0; i < instances.numInstances(); i++) {
            wekaInstances.add(wekaInstance(instances.instance(i)));
        }
        return wekaInstances;
    }

     /**
     * Weka instances information.
     *
     * @param instances the instances
     * @return the weka.core. instances
     */
    public weka.core.Instances wekaInstancesInformation(Instances instances) {
        weka.core.Instances wekaInstances;
        ArrayList attInfo = new ArrayList();
        for (int i = 0; i < instances.numAttributes(); i++) {
            attInfo.add(wekaAttribute(i, instances.attribute(i)));
        }
        wekaInstances = new weka.core.Instances(instances.getRelationName(), attInfo, 0);
        if (instances.instanceInformation.numOutputAttributes() == 1){
            wekaInstances.setClassIndex(instances.classIndex());
        }
        else {
            //Assign a classIndex to a MultiLabel instance for compatibility reasons
            wekaInstances.setClassIndex(instances.instanceInformation.numOutputAttributes()-1); //instances.numAttributes()-1); //Last
        }
        //System.out.println(attInfo.get(3).name());
        //System.out.println(attInfo.get(3).isNominal());
        //System.out.println(wekaInstances.attribute(3).name());
        //System.out.println(wekaInstances.attribute(3).isNominal());
        return wekaInstances;
    }

     /**
     * Weka attribute.
     *
     * @param index the index
     * @param attribute the attribute
     * @return the weka.core. attribute
     */
    protected weka.core.Attribute wekaAttribute(int index, Attribute attribute) {
        weka.core.Attribute wekaAttribute;
        if (attribute.isNominal()) {
            wekaAttribute = new weka.core.Attribute(attribute.name(), attribute.getAttributeValues(), index);
          
        } else {
            wekaAttribute = new weka.core.Attribute(attribute.name(), index);
        }
        //System.out.println(wekaAttribute.name());
        //System.out.println(wekaAttribute.isNominal());
        return wekaAttribute;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy