All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.core.InstanceComparator Maven / Gradle / Ivy

/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * InstanceComparator.java
 * Copyright (C) 2005-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.Serializable;
import java.util.Collections;
import java.util.Comparator;

/**
 * A comparator for the Instance class. it can be used with or without the
 * class label. Missing values are sorted at the beginning.
* Can be used as comparator in the sorting and binary search algorithms of * Arrays and Collections. * Relational values are compared instance by instance with a nested * InstanceComparator. * * @see Instance * @author FracPete (fracpete at cs dot waikato dot ac dot nz) * @version $Revision: 14912 $ * @see java.util.Arrays * @see java.util.Collections */ public class InstanceComparator implements Comparator, Serializable, RevisionHandler { /** for serialization */ private static final long serialVersionUID = -6589278678230949683L; /** whether to include the class in the comparison */ protected boolean m_IncludeClass; /** the range of attributes to use for comparison. */ protected Range m_Range; /** * Initializes the comparator and includes the class in the comparison * and all attributes included. */ public InstanceComparator() { this(true); } /** * Initializes the comparator with all attributes included. * * @param includeClass whether to include the class in the comparison */ public InstanceComparator(boolean includeClass) { this(includeClass, "first-last", false); } /** * Initializes the comparator. * * @param includeClass whether to include the class in the comparison * @param range the attribute range string * @param invert whether to invert the matching sense of the att range */ public InstanceComparator(boolean includeClass, String range, boolean invert) { super(); m_Range = new Range(); setIncludeClass(includeClass); setRange(range); setInvert(invert); } /** * Sets whether the class should be included in the comparison. * * @param includeClass true if to include the class in the comparison */ public void setIncludeClass(boolean includeClass) { m_IncludeClass = includeClass; } /** * Returns whether the class is included in the comparison. * * @return true if the class is included */ public boolean getIncludeClass() { return m_IncludeClass; } /** * Sets the attribute range to use for comparison. * * @param value the attribute range */ public void setRange(String value) { m_Range.setRanges(value); } /** * Returns the attribute range to use in the comparison. * * @return the attribute range */ public String getRange() { return m_Range.getRanges(); } /** * Sets whether to invert the matching sense of the attribute range. * * @param invert true if to invert the matching sense */ public void setInvert(boolean invert) { m_Range.setInvert(invert); } /** * Returns whether the matching sense of the attribute range is inverted. * * @return true if the matching sense is inverted */ public boolean getInvert() { return m_Range.getInvert(); } /** * compares the two instances, returns -1 if o1 is smaller than o2, 0 * if equal and +1 if greater. The method assumes that both instance objects * have the same attributes, they don't have to belong to the same dataset. * * @param inst1 the first instance to compare * @param inst2 the second instance to compare * @return returns -1 if inst1 is smaller than inst2, 0 if equal and +1 * if greater */ public int compare(Instance inst1, Instance inst2) { int result; int classindex; int i; Instances data1; Instances data2; int n; InstanceComparator comp; m_Range.setUpper(inst1.numAttributes() - 1); // get class index if (inst1.classIndex() == -1) classindex = inst1.numAttributes() - 1; else classindex = inst1.classIndex(); result = 0; for (i = 0; i < inst1.numAttributes(); i++) { // in selected range? if (!m_Range.isInRange(i)) continue; // exclude class? if (!getIncludeClass() && (i == classindex)) continue; // comparing attribute values // 1. special handling if missing value (NaN) is involved: if (inst1.isMissing(i) || inst2.isMissing(i)) { if (inst1.isMissing(i) && inst2.isMissing(i)) { continue; } else { if (inst1.isMissing(i)) result = -1; else result = 1; break; } } // 2. regular values: else { switch (inst1.attribute(i).type()) { case Attribute.STRING: result = inst1.stringValue(i).compareTo(inst2.stringValue(i)); break; case Attribute.RELATIONAL: data1 = inst1.relationalValue(i); data2 = inst2.relationalValue(i); n = 0; comp = new InstanceComparator(); while ((n < data1.numInstances()) && (n < data2.numInstances()) && (result == 0)) { result = comp.compare(data1.instance(n), data2.instance(n)); n++; } break; default: if (inst1.value(i) == inst2.value(i)) { continue; } else { if (inst1.value(i) < inst2.value(i)) result = -1; else result = 1; break; } } } if (result != 0) break; } return result; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 14912 $"); } /** * for testing only. takes an ARFF-filename as first argument to perform * some tests. */ public static void main(String[] args) throws Exception { Instances inst; Comparator comp; if (args.length == 0) return; // read instances inst = new Instances(new BufferedReader(new FileReader(args[0]))); inst.setClassIndex(inst.numAttributes() - 1); // compare incl. class comp = new InstanceComparator(); System.out.println("\nIncluding the class"); System.out.println("comparing 1. instance with 1.: " + comp.compare(inst.instance(0), inst.instance(0))); System.out.println("comparing 1. instance with 2.: " + comp.compare(inst.instance(0), inst.instance(1))); System.out.println("comparing 2. instance with 1.: " + comp.compare(inst.instance(1), inst.instance(0))); // compare excl. class comp = new InstanceComparator(false); System.out.println("\nExcluding the class"); System.out.println("comparing 1. instance with 1.: " + comp.compare(inst.instance(0), inst.instance(0))); System.out.println("comparing 1. instance with 2.: " + comp.compare(inst.instance(0), inst.instance(1))); System.out.println("comparing 2. instance with 1.: " + comp.compare(inst.instance(1), inst.instance(0))); // sort the data on all attributes Instances tmp = new Instances(inst); Collections.sort(tmp, new InstanceComparator(false)); System.out.println("\nSorted on all attributes"); System.out.println(tmp); // sort the data on 2nd attribute tmp = new Instances(inst); Collections.sort(tmp, new InstanceComparator(false, "2", false)); System.out.println("\nSorted on 2nd attribute"); System.out.println(tmp); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy