![JAR search and dependency download from the Maven repository](/logo.png)
meka.classifiers.multilabel.meta.SubsetMapper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of meka Show documentation
Show all versions of meka Show documentation
The MEKA project provides an open source implementation of methods for multi-label classification and evaluation. It is based on the WEKA Machine Learning Toolkit. Several benchmark methods are also included, as well as the pruned sets and classifier chains methods, other methods from the scientific literature, and a wrapper to the MULAN framework.
The newest version!
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package meka.classifiers.multilabel.meta;
import meka.classifiers.multilabel.BR;
import meka.classifiers.multilabel.ProblemTransformationMethod;
import meka.core.MLUtils;
import weka.core.*;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import java.util.*;
/**
* Maps the output of a multi-label classifier to a known label combination using the hamming distance.
* described in Improved Boosting Algorithms Using Confidence-rated Predictions by Schapire, Robert E. and Singer, Yoram
*
* @author Jesse Read ([email protected])
*/
public class SubsetMapper extends ProblemTransformationMethod
implements TechnicalInformationHandler {
/** for serialization. */
private static final long serialVersionUID = -6587406787943635084L;
/**
* Description to display in the GUI.
*
* @return the description
*/
@Override
public String globalInfo() {
return
"Maps the output of a multi-label classifier to a known label combination using the hamming distance."
+ "For more information see:\n"
+ getTechnicalInformation().toString();
}
public SubsetMapper() {
// default classifier for GUI
this.m_Classifier = new BR();
}
@Override
protected String defaultClassifierString() {
// default classifier for CLI
return "meka.classifiers.multilabel.BR";
}
@Override
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.ARTICLE);
result.setValue(Field.AUTHOR, "Robert E. Schapire, Yoram Singer ");
result.setValue(Field.TITLE, "Improved Boosting Algorithms Using Confidence-rated Predictions");
result.setValue(Field.JOURNAL, "Machine Learning Journal");
result.setValue(Field.YEAR, "1999");
result.setValue(Field.VOLUME, "37");
result.setValue(Field.NUMBER, "3");
result.setValue(Field.PAGES, "297-336");
return result;
}
protected HashMap m_Count = new HashMap();
protected double[] nearestSubset(double d[]) throws Exception {
String comb = MLUtils.toBitString(doubles2ints(d));
// If combination exists
if (m_Count.get(comb) != null) {
return MLUtils.fromBitString(comb);
}
int closest_count = 0;
int min_distance = Integer.MAX_VALUE;
String nearest = comb;
for(String current : shuffle(m_Count.keySet())) {
int distance = hammingDistance(current,comb);
if (distance == min_distance) {
int count = m_Count.get(current);
if (count > closest_count) {
nearest = current;
closest_count = count;
}
}
if (distance < min_distance) {
min_distance = distance;
nearest = current;
closest_count = m_Count.get(nearest);
}
}
return MLUtils.fromBitString(nearest);
}
private Collection shuffle(Set labelSubsets)
{
int seed = 1;
Vector result = new Vector(labelSubsets.size());
result.addAll(labelSubsets);
Collections.shuffle(result, new Random(seed));
return result;
}
@Override
public void buildClassifier(Instances D) throws Exception {
testCapabilities(D);
for (int i = 0; i < D.numInstances(); i++) {
m_Count.put(MLUtils.toBitString(D.instance(i),D.classIndex()),0);
}
m_Classifier.buildClassifier(D);
}
@Override
public double[] distributionForInstance(Instance TestInstance) throws Exception {
double r[] = ((ProblemTransformationMethod)m_Classifier).distributionForInstance(TestInstance);
return nearestSubset(r);
}
private static final int[] doubles2ints(double d[]) {
int b[] = new int[d.length];
for(int i = 0; i < d.length; i++) {
b[i] = (int)Math.round(d[i]);
}
return b;
}
private static final int hammingDistance(String s1, String s2) {
int dist = 0;
for(int i = 0; i < Math.min(s1.length(),s2.length()); i++) {
dist += Math.abs(MLUtils.char2int(s1.charAt(i)) - MLUtils.char2int(s2.charAt(i)));
}
return dist;
}
@Override
public String getRevision() {
return RevisionUtils.extract("$Revision: 9117 $");
}
public static void main(String args[]) {
ProblemTransformationMethod.evaluation(new SubsetMapper(), args);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy