All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weka.filters.unsupervised.instance.SubsetByExpression Maven / Gradle / Ivy

Go to download

The Waikato Environment for Knowledge Analysis (WEKA), a machine learning workbench. This is the stable version. Apart from bugfixes, this version does not receive any other updates.

There is a newer version: 3.8.6
Show newest version
/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see .
 */

/*
 * SubsetByExpression.java
 * Copyright (C) 2008-2014 University of Waikato, Hamilton, New Zealand
 */

package weka.filters.unsupervised.instance;

import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;

import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.expressionlanguage.common.IfElseMacro;
import weka.core.expressionlanguage.common.JavaMacro;
import weka.core.expressionlanguage.common.MacroDeclarationsCompositor;
import weka.core.expressionlanguage.common.MathFunctions;
import weka.core.expressionlanguage.common.Primitives.BooleanExpression;
import weka.core.expressionlanguage.core.Node;
import weka.core.expressionlanguage.parser.Parser;
import weka.core.expressionlanguage.weka.InstancesHelper;
import weka.filters.SimpleBatchFilter;

/**
 * 
 * Filters instances according to a user-specified expression.
*
* Examples:
* - extracting only mammals and birds from the 'zoo' UCI dataset:
* (CLASS is 'mammal') or (CLASS is 'bird')
* - extracting only animals with at least 2 legs from the 'zoo' UCI dataset:
* (ATT14 >= 2)
* - extracting only instances with non-missing 'wage-increase-second-year'
* from the 'labor' UCI dataset:
* not ismissing(ATT3)
*

* * * * Valid options are:

* *

 -E <expr>
 *  The expression to use for filtering
 *  (default: true).
* *
 -F
 *  Apply the filter to instances that arrive after the first
 *  (training) batch. The default is to not apply the filter (i.e.
 *  always return the instance)
* *
 -output-debug-info
 *  If set, filter is run in debug mode and
 *  may output additional info to the console
* *
 -do-not-check-capabilities
 *  If set, filter capabilities are not checked when input format is set
 *  (use with caution).
* * * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 12037 $ */ public class SubsetByExpression extends SimpleBatchFilter { /** for serialization. */ private static final long serialVersionUID = 5628686110979589602L; /** the expresion to use for filtering. */ protected String m_Expression = "true"; /** Whether to filter instances after the first batch has been processed */ protected boolean m_filterAfterFirstBatch = false; /** * Returns a string describing this filter. * * @return a description of the filter suitable for displaying in the * explorer/experimenter gui */ @Override public String globalInfo() { return "Filters instances according to a user-specified expression.\n\n" + "\n" + "Examples:\n" + "- extracting only mammals and birds from the 'zoo' UCI dataset:\n" + " (CLASS is 'mammal') or (CLASS is 'bird')\n" + "- extracting only animals with at least 2 legs from the 'zoo' UCI dataset:\n" + " (ATT14 >= 2)\n" + "- extracting only instances with non-missing 'wage-increase-second-year'\n" + " from the 'labor' UCI dataset:\n" + " not ismissing(ATT3)\n"; } /** * SubsetByExpression may return false from input() (thus not making an * instance available immediately) even after the first batch has been * completed if the user has opted to apply the filter to instances after the * first batch (rather than just passing them through). * * @return true this filter may remove (consume) input instances after the * first batch has been completed. */ @Override public boolean mayRemoveInstanceAfterFirstBatchDone() { return true; } /** * Input an instance for filtering. Filter requires all training instances be * read before producing output (calling the method batchFinished() makes the * data available). If this instance is part of a new batch, m_NewBatch is set * to false. * * @param instance the input instance * @return true if the filtered instance may now be collected with output(). * @throws IllegalStateException if no input structure has been defined * @throws Exception if something goes wrong * @see #batchFinished() */ @Override public boolean input(Instance instance) throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (m_NewBatch) { resetQueue(); m_NewBatch = false; } bufferInput(instance); int numReturnedFromParser = 0; if (isFirstBatchDone()) { Instances inst = new Instances(getInputFormat()); inst = process(inst); numReturnedFromParser = inst.numInstances(); for (int i = 0; i < inst.numInstances(); i++) { push(inst.instance(i), false); // No need to copy instance } flushInput(); } return (numReturnedFromParser > 0); } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration




© 2015 - 2024 Weber Informatics LLC | Privacy Policy