All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dkpro.tc.ml.weka.writer.WekaFeatureEncoder Maven / Gradle / Ivy

/**
 * Copyright 2018
 * Ubiquitous Knowledge Processing (UKP) Lab
 * Technische Universität Darmstadt
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see http://www.gnu.org/licenses/.
 */
package org.dkpro.tc.ml.weka.writer;

import java.util.ArrayList;
import java.util.Collection;

import org.dkpro.tc.api.exception.TextClassificationException;
import org.dkpro.tc.api.features.Feature;
import org.dkpro.tc.api.features.FeatureType;
import org.dkpro.tc.api.features.Instance;
import org.dkpro.tc.ml.weka.util.AttributeStore;

import weka.core.Attribute;
import weka.core.Utils;

/*
 * Converts the TC feature representation into the Weka representation.
 * 
 * Copyright (c) 2012, Regents of the University of Colorado 
* All rights reserved. */ public class WekaFeatureEncoder { public static AttributeStore getAttributeStore(Collection instances) throws TextClassificationException { AttributeStore attributeStore = new AttributeStore(); for (Instance instance : instances) { for (Feature feature : instance.getFeatures()) { if (!attributeStore.containsAttributeName(feature.getName())) { Attribute attribute = featureToAttribute(feature); attributeStore.addAttribute(feature.getName(), attribute); } } } return attributeStore; } public static Attribute featureToAttributeUsingFeatureDescription(String featureName, FeatureType value, String enumType) throws TextClassificationException { String name = Utils.quote(featureName); Attribute attribute; // if value is a number then create a numeric attribute if (value.equals(FeatureType.NUMERIC) || value.equals(FeatureType.BOOLEAN)) { attribute = new Attribute(name); } else if (value.equals(FeatureType.STRING)) { attribute = new Attribute(name, true); } // if value is an Enum thene create a nominal attribute else if (value.equals(FeatureType.NOMINAL)) { Class forName = null; try { forName = Class.forName(enumType); } catch (ClassNotFoundException e) { throw new TextClassificationException(e); } Object[] enumConstants = forName.getEnumConstants(); ArrayList attributeValues = new ArrayList(enumConstants.length); for (Object enumConstant : enumConstants) { attributeValues.add(enumConstant.toString()); } attribute = new Attribute(name, attributeValues); } else { attribute = new Attribute(name, (ArrayList) null); } return attribute; } public static Attribute featureToAttribute(Feature feature) throws TextClassificationException { String name = Utils.quote(feature.getName()); Object value = feature.getValue(); Attribute attribute; // if value is a number then create a numeric attribute if (value instanceof Number) { attribute = new Attribute(name); } // if value is a boolean then create a numeric attribute else if (value instanceof Boolean) { attribute = new Attribute(name); } // if value is an Enum thene create a nominal attribute else if (value instanceof Enum) { Object[] enumConstants = value.getClass().getEnumConstants(); ArrayList attributeValues = new ArrayList(enumConstants.length); for (Object enumConstant : enumConstants) { attributeValues.add(enumConstant.toString()); } attribute = new Attribute(name, attributeValues); } else { attribute = new Attribute(name, (ArrayList) null); } return attribute; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy