All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.classifiers.trees.j48.TreeConverter Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2000-2009 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 *
 */
package weka.classifiers.trees.j48;

import java.util.ArrayList;
import java.util.List;

import marytts.cart.CART;
import marytts.cart.DecisionNode;
import marytts.cart.LeafNode;
import marytts.cart.Node;
import marytts.cart.StringPredictionTree;
import marytts.features.FeatureDefinition;
import weka.core.Instances;

public class TreeConverter {

	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;

	/**
	 * 
	 * This converts the WEKA-style ClassifierTree into a Mary CART tree. The FeatureDefinition and the Instances should conform
	 * with respect to the possible attributes and values.
	 * 
	 * @param c45Tree
	 *            c45Tree
	 * @param aFeatDef
	 *            a FeatureDefinition storing possible attributes and values as they are used within MARY.
	 * @param inst
	 *            a container for storing WEKA instances. Also describing possible attributes and values.
	 * @return an StringPredictionTree
	 */
	public static StringPredictionTree c45toStringPredictionTree(C45PruneableClassifierTree c45Tree, FeatureDefinition aFeatDef,
			Instances inst) {

		if (!(c45Tree.m_toSelectModel instanceof BinC45ModelSelection))
			throw new IllegalArgumentException("Can only convert binary trees.");

		// call the recursive toNode() in order to construct tree structure
		Node rootNode = toStringPredictionTreeNode(c45Tree, null, -1, aFeatDef, inst);
		rootNode.setIsRoot(true);

		// get the number of values possible to be predicted
		int numVals = inst.classAttribute().numValues();

		String[] targetVals = new String[numVals];

		// make a mapping between indices and values
		for (int symNr = 0; symNr < numVals; symNr++) {
			targetVals[symNr] = inst.classAttribute().value(symNr);
		}

		return new StringPredictionTree(rootNode, aFeatDef, targetVals);
	}

	/**
	 * 
	 * This converts the WEKA-style ClassifierTree into a Mary CART tree. The FeatureDefinition and the Instances should conform
	 * with respect to the possible attributes and values.
	 * 
	 * @param c45Tree
	 *            c45Tree
	 * @param aFeatDef
	 *            a FeatureDefinition storing possible attributes and values as they are used within MARY.
	 * @param inst
	 *            a container for storing WEKA instances. Also describing possible attributes and values.
	 * @return an CART with StringAndFloatLeafNode leaf nodes
	 */
	public static CART c45toStringCART(C45PruneableClassifierTree c45Tree, FeatureDefinition aFeatDef, Instances inst) {

		if (!(c45Tree.m_toSelectModel instanceof BinC45ModelSelection))
			throw new IllegalArgumentException("Can only convert binary trees.");

		// feature id is the index of the name of the class feature
		int fid = aFeatDef.getFeatureIndex(inst.classAttribute().name());

		// get the number of values possible to be predicted
		int numVals = inst.classAttribute().numValues();

		String[] targetVals = new String[numVals];

		// make a mapping between indices and values
		for (int symNr = 0; symNr < numVals; symNr++) {
			targetVals[symNr] = inst.classAttribute().value(symNr);
		}

		// call the recursive toNode() in order to construct tree structure
		Node rootNode = toStringTreeNode(c45Tree, null, -1, aFeatDef, inst, fid, targetVals);
		rootNode.setIsRoot(true);

		return new CART(rootNode, aFeatDef);
	}

	private static Node toStringPredictionTreeNode(C45PruneableClassifierTree c45Tree, Node aMother, int aNodeIndex,
			FeatureDefinition fd, Instances inst) {
		Node returnNode;

		if (c45Tree.m_isLeaf) {
			// get the distribution at this leaf
			Distribution dist = c45Tree.m_localModel.distribution();

			// create indices corresponding to the classes...
			// they simply are the number of the class, so the array
			// contains the numbering of its fields...
			int[] data = new int[dist.numClasses()];

			// the probability distribution
			float[] probs = new float[dist.numClasses()];

			for (int classNr = 0; classNr < dist.numClasses(); classNr++) {
				data[classNr] = classNr;
				probs[classNr] = (float) dist.prob(classNr);
			}

			// a return node is made and everything set
			returnNode = new LeafNode.IntAndFloatArrayLeafNode(data, probs);
		} else {
			// TODO: perform test if attributes are nominal/numerical... (both in instances and feature def)
			// we preliminarily assume binary split with nomimal attribute

			// the left side of the condition contains the name of the attribute
			String attName = c45Tree.m_localModel.leftSide(inst);

			String rightSide = c45Tree.m_localModel.rightSide(0, inst);

			if (!(c45Tree.m_localModel instanceof BinC45Split))
				throw new IllegalStateException("Cannot convert non-binary WEKA tree to wagon format");

			if (!rightSide.startsWith(" = "))
				throw new RuntimeException("Weka question in binary tree does not start with \" = \"");

			String attVal = rightSide.substring(3);

			// TODO: also handle other than byte features...
			if (!fd.isByteFeature(attName))
				throw new RuntimeException("Can not handle non-byte features");

			// make node and set fields..
			returnNode = new DecisionNode.BinaryByteDecisionNode(attName, attVal, fd);
			// set the recursively generated child nodes
			// TODO: be aware at this point when allowing for non-binary splits
			((DecisionNode) returnNode).addDaughter(toStringPredictionTreeNode((C45PruneableClassifierTree) c45Tree.m_sons[0],
					returnNode, 0, fd, inst));
			((DecisionNode) returnNode).addDaughter(toStringPredictionTreeNode((C45PruneableClassifierTree) c45Tree.m_sons[1],
					returnNode, 1, fd, inst));
		}
		return returnNode;
	}

	private static Node toStringTreeNode(C45PruneableClassifierTree c45Tree, Node aMother, int aNodeIndex, FeatureDefinition fd,
			Instances inst, int fid, String[] targetVals) {
		Node returnNode;

		if (c45Tree.m_isLeaf) {
			// get the distribution at this leaf
			Distribution dist = c45Tree.m_localModel.distribution();

			List dataList = new ArrayList();
			List probList = new ArrayList();

			// bring everything into the feature definition world
			for (int classNr = 0; classNr < dist.numClasses(); classNr++) {

				float p = (float) dist.prob(classNr);

				if (p > 0f) {
					// Mapping from weka class value to feature definition index
					int fdInd = fd.getFeatureValueAsShort(fid, targetVals[classNr]);
					dataList.add(fdInd);
					probList.add(p);
				}
			}

			assert (dataList.size() == probList.size());

			// convert to arrays
			int[] data = new int[dataList.size()];
			float[] probs = new float[probList.size()];

			for (int i = 0; i < data.length; i++) {
				data[i] = dataList.get(i);
				probs[i] = probList.get(i);
			}

			// a return node is made and everything set
			// System.err.println("creating StringAnfFloatLeafNode");
			returnNode = new LeafNode.StringAndFloatLeafNode(data, probs);
		} else {
			// TODO: perform test if attributes are nominal/numerical... (both in instances and feature def)
			// we preliminarily assume binary split with nomimal attribute

			// the left side of the condition contains the name of the attribute
			String attName = c45Tree.m_localModel.leftSide(inst);

			String rightSide = c45Tree.m_localModel.rightSide(0, inst);

			if (!(c45Tree.m_localModel instanceof BinC45Split))
				throw new IllegalStateException("Cannot convert non-binary WEKA tree to wagon format");

			if (!rightSide.startsWith(" = "))
				throw new RuntimeException("Weka question in binary tree does not start with \" = \"");

			String attVal = rightSide.substring(3);

			// TODO: also handle other than byte features...
			if (!fd.isByteFeature(attName))
				throw new RuntimeException("Can not handle non-byte features");

			// make node and set fields..
			returnNode = new DecisionNode.BinaryByteDecisionNode(attName, attVal, fd);
			// set the recursively generated child nodes
			// TODO: be aware at this point when allowing for non-binary splits
			((DecisionNode) returnNode).addDaughter(toStringTreeNode((C45PruneableClassifierTree) c45Tree.m_sons[0], returnNode,
					0, fd, inst, fid, targetVals));
			((DecisionNode) returnNode).addDaughter(toStringTreeNode((C45PruneableClassifierTree) c45Tree.m_sons[1], returnNode,
					1, fd, inst, fid, targetVals));
		}
		return returnNode;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy