moa.classifiers.rules.multilabel.core.LearningLiteralRegression Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of moa Show documentation
Massive On-line Analysis is an environment for massive data mining. MOA provides a framework for data stream mining and includes tools for evaluation and a collection of machine learning algorithms. Related to the WEKA project, also written in Java, while scaling to more demanding problems.
There is a newer version: 2024.07.0
Show newest version
package moa.classifiers.rules.multilabel.core;

import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

import moa.classifiers.MultiLabelLearner;
import moa.classifiers.rules.core.NumericRulePredicate;
import moa.classifiers.rules.core.Utils;
import moa.classifiers.rules.multilabel.attributeclassobservers.AttributeStatisticsObserver;
import moa.classifiers.rules.multilabel.attributeclassobservers.NominalStatisticsObserver;
import moa.classifiers.rules.multilabel.attributeclassobservers.NumericStatisticsObserver;
import moa.classifiers.rules.multilabel.core.splitcriteria.MultiLabelSplitCriterion;
import moa.classifiers.rules.multilabel.functions.AMRulesFunction;
import moa.classifiers.rules.multilabel.instancetransformers.InstanceOutputAttributesSelector;
import moa.classifiers.rules.multilabel.instancetransformers.InstanceTransformer;
import moa.core.AutoExpandVector;
import moa.core.DoubleVector;
import moa.core.ObjectRepository;
import moa.tasks.TaskMonitor;

import com.yahoo.labs.samoa.instances.Instance;
import com.yahoo.labs.samoa.instances.InstanceInformation;
import com.yahoo.labs.samoa.instances.InstancesHeader;
import com.yahoo.labs.samoa.instances.MultiLabelInstance;
import com.yahoo.labs.samoa.instances.Prediction;

public class LearningLiteralRegression extends LearningLiteral {

	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;

	double [] varianceShift; //for proper computation of variance

	public LearningLiteralRegression() {
		super();
	}

	public LearningLiteralRegression(int [] outputsToLearn) {
		super(outputsToLearn);
	}



	protected double [] getNormalizedErrors(Prediction prediction, Instance instance) {
		double [] errors= new double[outputsToLearn.length];

		for (int i=0; i 0.0000001) {
			normalizedY = (value - meanY) / (sdY);
		}
		return normalizedY;
	}


	@Override
	protected void prepareForUseImpl(TaskMonitor monitor,
			ObjectRepository repository) {

	}

	@Override
	public boolean tryToExpand(double splitConfidence, double tieThreshold) {
		boolean shouldSplit=false;
		//find the best split per attribute and rank the results
		AttributeExpansionSuggestion[] bestSplitSuggestions	= this.getBestSplitSuggestions(splitCriterion);

		double sumMerit=0;
		meritPerInput= new double[attributesMask.length];
		for (int i=0; i0){
				meritPerInput[bestSplitSuggestions[i].predicate.getAttributeIndex()]=merit;
				sumMerit+=merit;
			}
		}

		//if merit==0 it means the split have not enough examples in the smallest branch
		if(sumMerit==0)
			meritPerInput=null; //this indicates that no merit should be considered (e.g. for feature ranking)

		Arrays.sort(bestSplitSuggestions);

		//disable attributes that are not relevant
		int []oldInputs=inputsToLearn.clone();
		inputsToLearn=inputSelector.getNextInputIndices(bestSplitSuggestions); //
		Arrays.sort(this.inputsToLearn);
		for (int i=0; i 0) && (bestSplitSuggestions[0].merit > 0)); 
			bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1];
			shouldSplit = true;
		} // Otherwise, consider which of the splits proposed may be worth trying
		else {
			double hoeffdingBound = computeHoeffdingBound(splitCriterion.getRangeOfMerit(this.literalStatistics), splitConfidence, weightSeen);
			//debug("Hoeffding bound " + hoeffdingBound, 4);
			// Determine the top two ranked splitting suggestions
			bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1];
			AttributeExpansionSuggestion secondBestSuggestion
			= bestSplitSuggestions[bestSplitSuggestions.length - 2];
			if ((((bestSuggestion.merit-secondBestSuggestion.merit)) > hoeffdingBound) || (hoeffdingBound < tieThreshold)) {
				//if ((((secondBestSuggestion.merit/bestSuggestion.merit) + hoeffdingBound) < 1) || (hoeffdingBound < tieThreshold)) {
				//debug("Expanded ", 5);
				shouldSplit = true;
				//System.out.println(bestSuggestion.merit);
			}
		}
		if(shouldSplit)
		{
			//check which branch is better and update bestSuggestion (in amrules the splits are binary )
			DoubleVector[][] resultingStatistics=bestSuggestion.getResultingNodeStatistics();
			//if not or higher is better, change predicate (negate condition)
			double [] branchMerits=splitCriterion.getBranchesSplitMerits(resultingStatistics);
			DoubleVector[] newLiteralStatistics;
			if(branchMerits[1]>branchMerits[0]){
				bestSuggestion.getPredicate().negateCondition();
				newLiteralStatistics=getBranchStatistics(resultingStatistics,1);
			}else{
				newLiteralStatistics=getBranchStatistics(resultingStatistics,0);
			}
			//
			int [] newOutputs=outputSelector.getNextOutputIndices(newLiteralStatistics,literalStatistics, outputsToLearn);
			Arrays.sort(newOutputs); //Must be ordered for latter correspondence algorithm to work


			//set other branch (only used if default rule expands)
			otherBranchLearningLiteral=new LearningLiteralRegression();
			otherBranchLearningLiteral.instanceHeader=instanceHeader;
			otherBranchLearningLiteral.learner=(MultiLabelLearner)learner.copy();
			otherBranchLearningLiteral.instanceTransformer=(InstanceTransformer)this.instanceTransformer;

			//keep a rule learning to the complement set of newOutputs


			//Set expanding branch
			//if is AMRulesFunction and the  number of output attributes changes, start learning a new predictor
			//should we do the same for input attributes (attributesMask)?. It would have impact in RandomAMRules
			if(learner instanceof AMRulesFunction){ //Reset learning
				if(newOutputs.length != outputsToLearn.length){
					//other outputs
					int [] otherOutputs=Utils.complementSet(outputsToLearn,newOutputs);
					int [] indices;
					if(otherOutputs.length>0){
						otherOutputsLearningLiteral=new LearningLiteralRegression(otherOutputs);
						MultiLabelLearner otherOutputsLearner=(MultiLabelLearner)learner.copy();
						indices=Utils.getIndexCorrespondence(outputsToLearn,otherOutputs);
						((AMRulesFunction) otherOutputsLearner).selectOutputsToLearn(indices);
						((AMRulesFunction) otherOutputsLearner).resetWithMemory();
						otherOutputsLearningLiteral.learner=otherOutputsLearner;
						otherOutputsLearningLiteral.instanceHeader=instanceHeader;
						otherOutputsLearningLiteral.instanceTransformer=new InstanceOutputAttributesSelector(instanceHeader,otherOutputs);
					}
					//expanded
					indices=Utils.getIndexCorrespondence(outputsToLearn,newOutputs);
					((AMRulesFunction) learner).selectOutputsToLearn(indices);
				}

				((AMRulesFunction) learner).resetWithMemory();
			}
			//just reset learning
			else{
				//other outputs //TODO JD: Test for general learner (other than AMRules functions
				if(newOutputs.length != outputsToLearn.length){
					int [] otherOutputs=Utils.complementSet(outputsToLearn,newOutputs);
					if(otherOutputs.length>0){
						otherOutputsLearningLiteral=new LearningLiteralRegression();
						MultiLabelLearner otherOutputsLearner=(MultiLabelLearner)learner.copy();
						otherOutputsLearner.resetLearning();
						otherOutputsLearningLiteral.learner=otherOutputsLearner;
						otherOutputsLearningLiteral.instanceHeader=instanceHeader;
						otherOutputsLearningLiteral.instanceTransformer=new InstanceOutputAttributesSelector(instanceHeader,otherOutputs);
					}
				}
				//expanded
				learner.resetLearning();
			}
			expandedLearningLiteral=new LearningLiteralRegression(newOutputs);
			expandedLearningLiteral.learner=(MultiLabelLearner)this.learner.copy();	
			expandedLearningLiteral.instanceHeader=instanceHeader;
			expandedLearningLiteral.instanceTransformer=new InstanceOutputAttributesSelector(instanceHeader,newOutputs);
		}
		return shouldSplit;
	}


	private DoubleVector[] getBranchStatistics(DoubleVector[][] resultingStatistics, int indexBranch) {
		DoubleVector[] selBranchStats=new DoubleVector[resultingStatistics.length];
		for(int i=0; i bestSuggestions = new LinkedList();
		for (int i = 0; i < this.inputsToLearn.length; i++) {
			if(attributesMask[inputsToLearn[i]]){ //Should always be true (check trainOnInstance(). Remove?
				AttributeStatisticsObserver obs = this.attributeObservers.get(inputsToLearn[i]);
				if (obs != null) {
					AttributeExpansionSuggestion bestSuggestion = obs.getBestEvaluatedSplitSuggestion(criterion, literalStatistics, inputsToLearn[i]);

					if (bestSuggestion == null) {
						//ALL attributes must have a best suggestion. Adding dummy suggestion with minimal merit.
						bestSuggestion=new  AttributeExpansionSuggestion(new NumericRulePredicate(inputsToLearn[i],0,true),null,-Double.MAX_VALUE);
					}
					bestSuggestions.add(bestSuggestion);
				}
			}
		}
		return bestSuggestions.toArray(new AttributeExpansionSuggestion[bestSuggestions.size()]);
	}

	@Override
	public void trainOnInstance(MultiLabelInstance instance)  {
		int numInputs=0;
		if (attributesMask==null)
			numInputs=initializeAttibutesMask(instance);

		//learn for all output attributes if not specified at construction time
		int numOutputs=instance.numberOutputTargets();

		if(!hasStarted)
		{
			if(this.learner.isRandomizable())
				this.learner.setRandomSeed(this.randomGenerator.nextInt());
			if(outputsToLearn==null)
			{
				outputsToLearn=new int[numOutputs];
				for (int i=0; i();
		for(int i=0; i