All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.arosbio.data.transform.feature_selection.FeatureSelectUtils Maven / Gradle / Ivy

Go to download

Conformal AI package, including all data IO, transformations, machine learning models and predictor classes. Without inclusion of chemistry-dependent code.

There is a newer version: 2.0.0
Show newest version
/*
 * Copyright (C) Aros Bio AB.
 *
 * CPSign is an Open Source Software that is dual licensed to allow you to choose a license that best suits your requirements:
 *
 * 1) GPLv3 (GNU General Public License Version 3) with Additional Terms, including an attribution clause as well as a limitation to use the software for commercial purposes.
 *
 * 2) CPSign Proprietary License that allows you to use CPSign for commercial activities, such as in a revenue-generating operation or environment, or integrate CPSign in your proprietary software without worrying about disclosing the source code of your proprietary software, which is required if you choose to use the software under GPLv3 license. See arosbio.com/cpsign/commercial-license for details.
 */
package com.arosbio.data.transform.feature_selection;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

import org.apache.commons.math3.stat.descriptive.moment.Mean;
import org.apache.commons.math3.stat.descriptive.rank.Median;

import com.arosbio.commons.CollectionUtils;

public class FeatureSelectUtils {
	
	/**
	 * Sorts and returns the smallest values, but keeping n in the list. 
	 * The returned indices are thus the once that should be removed by the {@link FeatureSelector}
	 * @param values the values for all features
	 * @param n the number to return
	 * @return the {@code n} indices with the smallest values
	 */
	public static List getSmallestKeepingN(List values, int n){
		if (n >= values.size()) {
			// Return empty list - we cannot remove any more if we should keep n
			return new ArrayList<>();
		} else if (n == 0) {
			// remove ALL
			return extractIndicesAndSort(values);
		}
		
		// Sort descending 
		Collections.sort(values, Comparator.reverseOrder());
		
		// take from n->last index (the smallest values)
		List smallest = extractIndicesAndSort(values.subList(n, values.size()));
		
		// Check that there's still no 0's in the part that should be kept
		if (values.get(n-1).value < 1e-10) {
			// there's still 0os in the N we should keep,
			smallest.addAll(getSmallerThanThreshold(values.subList(0, n), 1e-10));
			Collections.sort(smallest);
		}
		
		return smallest;
	}
	
	public static List getSmallerThanThreshold(List values, double threshold) {
		List filteredIndices = new ArrayList<>();
		for (CollectionUtils.IndexedValue iv : values) {
			if (iv.value <= threshold) {
				filteredIndices.add(iv.index);
			}
		}
		Collections.sort(filteredIndices);
		
		return filteredIndices;
	}
	
	public static List getSmallerThanMean(List values){
		Mean mean = new Mean();
		
		for (CollectionUtils.IndexedValue iv : values) {
			mean.increment(iv.value);
		}
		return getSmallerThanThreshold(values, mean.getResult());
	}
	
	public static List getSmallerThanMedian(List values){
		Median median = new Median();
		
		double[] valArray = new double[values.size()];
		for (int i=0; i extractIndicesAndSort(List values){
		List res = new ArrayList<>();
		for (CollectionUtils.IndexedValue v : values) {
			res.add(v.index);
		}
		Collections.sort(res);
		return res;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy