All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.data.measure.CategoricalMeasure Maven / Gradle / Ivy

There is a newer version: 4.2.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.data.measure;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;

import smile.data.type.DataType;
import smile.data.type.DataTypes;

/**
 * Categorical data can be stored into groups or categories with the aid of
 * names or labels. Also known as qualitative data, each element of a
 * categorical data can be placed in only one category according to
 * its qualities, where each of the categories is mutually exclusive.
 * 

* Categorical data may be subdivided into nominal data and ordinal data. *

* Both integer and string variables can be made into categorical measure, * but a categorical measure's levels will always be string values. * * @author Haifeng Li */ public abstract class CategoricalMeasure implements Measure { /** * The valid values. */ final int[] values; /** * The levels of measurement. */ final String[] levels; /** * Map an integer to string. */ final Map value2level; /** * Map a string to an integer level. */ final Map level2value; /** * The flag if the values are of standard factor [0, 1, 2, ..., k). */ final boolean factor; /** * Constructor. * @param levels the levels of discrete values. */ public CategoricalMeasure(String... levels) { this(IntStream.range(0, levels.length).toArray(), levels); } /** * Constructor. * @param levels the levels of discrete values. */ public CategoricalMeasure(List levels) { this(levels.toArray(new String[0])); } /** * Constructor. * @param values the valid values. */ public CategoricalMeasure(int[] values) { this(values, Arrays.stream(values).mapToObj(Integer::toString).toArray(String[]::new)); } /** * Constructor. * @param values the valid values. * @param levels the levels of discrete values. */ public CategoricalMeasure(int[] values, String[] levels) { if (values.length != levels.length) { throw new IllegalArgumentException("The size of values and levels don't match"); } this.values = values; this.levels = levels; this.value2level = new HashMap<>(); this.level2value = new HashMap<>(); if (levels.length <= Byte.MAX_VALUE + 1) { for (int i = 0; i < values.length; i++) { value2level.put(values[i], levels[i]); level2value.put(levels[i], (byte) values[i]); } } else if (levels.length <= Short.MAX_VALUE + 1) { for (int i = 0; i < values.length; i++) { value2level.put(values[i], levels[i]); level2value.put(levels[i], (short) values[i]); } } else { for (int i = 0; i < values.length; i++) { value2level.put(values[i], levels[i]); level2value.put(levels[i], values[i]); } } boolean factor = true; for (int i = 0; i < values.length; i++) { if (values[i] != i) { factor = false; break; } } this.factor = factor; } /** * Returns the ordinal values of an enum. * @param clazz the Class of an enum. * @return the ordinal values of an enum. */ static int[] values(Class> clazz) { return Arrays.stream(clazz.getEnumConstants()) .mapToInt(Enum::ordinal) .toArray(); } /** * Returns the string values of an enum. * @param clazz the Class of an enum. * @return the string values of an enum. */ static String[] levels(Class> clazz) { return Arrays.stream(clazz.getEnumConstants()) .map(Object::toString) .toArray(String[]::new); } /** * Returns the number of levels. * @return the number of levels. */ public int size() { return levels.length; } /** * Returns the valid value set. * @return the valid value set. */ public int[] values() { return values; } /** * Returns the levels. * @return the levels. */ public String[] levels() { return levels; } /** * Returns the level string representation. * @param value the level value. * @return the level string representation. */ public String level(int value) { return value2level.get(value); } /** * Returns the factor value (in range [0, size)) of level. * @param value the level value. * @return the factor value. */ public int factor(int value) { if (factor) return value; for (int j = 0; j < values.length; j++) { if (values[j] == value) return j; } throw new IllegalArgumentException("Invalid level: " + value); } /** * Returns the data type that is suitable for this measure scale. * @return the data type that is suitable for this measure scale. */ public DataType type() { if (levels.length <= Byte.MAX_VALUE + 1) { return DataTypes.ByteType; } else if (levels.length <= Short.MAX_VALUE + 1) { return DataTypes.ShortType; } else { return DataTypes.IntegerType; } } /** * Returns the string value of a level. * @param value the level value. * @return the string value of a level. */ public String toString(int value) { return level(value); } @Override public String toString(Object o) { return level(((Number) o).intValue()); } @Override public Number valueOf(String s) { return level2value.get(s); } @Override public boolean equals(Object o) { if (o instanceof CategoricalMeasure measure) { return Arrays.equals(levels, measure.levels) && Arrays.equals(values, measure.values); } return false; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy