zhao.algorithmMagic.algorithm.classificationAlgorithm.UDFDistanceClassification Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithmStar Show documentation
algorithmStar-java
There is a newer version: 1.42
package zhao.algorithmMagic.algorithm.classificationAlgorithm;

import zhao.algorithmMagic.algorithm.OperationAlgorithm;
import zhao.algorithmMagic.algorithm.OperationAlgorithmManager;
import zhao.algorithmMagic.exception.TargetNotRealizedException;
import zhao.algorithmMagic.operands.matrix.DoubleMatrix;
import zhao.algorithmMagic.operands.matrix.IntegerMatrix;
import zhao.algorithmMagic.operands.vector.DoubleVector;
import zhao.algorithmMagic.operands.vector.IntegerVector;
import zhao.algorithmMagic.utils.ASClass;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
 * UDFDistance分类计算组件，在其中提供了分类函数，您可以采用任意一种距离计算组件进行距离的计算。
 * 
 * UDFDistance classification calculation component, which provides classification function. You can use any distance calculation component to calculate the distance.
 *
 * @author zhao
 */
public class UDFDistanceClassification extends DistanceClassification implements SampleClassification {

    protected UDFDistanceClassification(String name) {
        super(name);
    }

    /**
     * 获取到该算法的类对象。
     * 

     * Get the class object of the algorithm.
     *
     * @param Name 该算法的名称
     * @return 算法类对象
     * @throws TargetNotRealizedException 当您传入的算法名称对应的组件不能被成功提取的时候会抛出异常
     *                                    

     *                                    An exception will be thrown when the component corresponding to the algorithm name you passed in cannot be successfully extracted
     */
    public static UDFDistanceClassification getInstance(String Name) {
        if (OperationAlgorithmManager.containsAlgorithmName(Name)) {
            OperationAlgorithm operationAlgorithm = OperationAlgorithmManager.getInstance().get(Name);
            if (operationAlgorithm instanceof UDFDistanceClassification) {
                return ASClass.transform(operationAlgorithm);
            } else {
                throw new TargetNotRealizedException("您提取的[" + Name + "]算法被找到了，但是它不属于 UDFDistanceClassification 类型，请您为这个算法重新定义一个名称。\n" +
                        "The [" + Name + "] algorithm you ParameterCombination has been found, but it does not belong to the UDFDistanceClassification type. Please redefine a name for this algorithm.");
            }
        } else {
            UDFDistanceClassification UDFDistanceClassification = new UDFDistanceClassification(Name);
            OperationAlgorithmManager.getInstance().register(UDFDistanceClassification);
            return UDFDistanceClassification;
        }
    }

    /**
     * 计算一个矩阵中所有行或列的数据类别，并将计算之后的数据类别样本返回出去。
     * 

     * Calculate the data categories of all rows or columns in a matrix, and return the calculated data category samples.
     *
     * @param data           需要被计算的特征数据组成的矩阵。
     *                       

     *                       Matrix composed of characteristic data to be calculated.
     * @param categorySample 本次类别计算的类别样本，用于区别各类数据样本，其中的key就是类别，value就是数据特征向量序列，例如：
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     *                       

     *                       The category sample of this category calculation is used to distinguish various data samples. The key is the category, and the value is the data feature vector sequence, for example:
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     * @return 按照指定类别进行分类的数据，例如：
     * {"person", ["tom的特征向量", "zhao的特征向量"], "insect", ["蜘蛛的特征向量", "蜗牛的特征向量"]}
     * 

     * Data classified according to the specified category, for example:
     * {"person", ["Tom's feature vector", "Zhao's feature vector"], "insert", ["spider's feature vector", "snail's feature vector"]}
     */
    @Override
    public HashMap> classification(double[][] data, Map categorySample) {
        Set stringSet = categorySample.keySet();
        HashMap> hashMap = new HashMap<>(categorySample.size() + 16);
        // 开始进行分类，迭代每一个data中的元素，并将其存储与类别样本中的数据进行比较
        for (double[] datum : data) {
            String MIN_KEY = null;
            double[] MIN_VALUE = null;
            double MIN = Double.MAX_VALUE;
            for (String key : stringSet) {
                // 获取到最小距离的key
                double[] doubles = categorySample.get(key);
                if (doubles != null) {
                    double trueDistance = distanceAlgorithm.getTrueDistance(datum, doubles);
                    if (trueDistance < MIN) {
                        MIN_KEY = key;
                        MIN_VALUE = datum;
                        MIN = trueDistance;
                    }
                }
            }
            // 将最小的序列添加到类别对应的value中
            if (MIN_KEY != null) {
                ArrayList doubleVectors = hashMap.get(MIN_KEY);
                if (doubleVectors == null) {
                    doubleVectors = new ArrayList<>();
                    doubleVectors.add(DoubleVector.parse(MIN_VALUE));
                    hashMap.put(MIN_KEY, doubleVectors);
                } else {
                    doubleVectors.add(DoubleVector.parse(MIN_VALUE));
                }
            }
        }
        return hashMap;
    }

    /**
     * 计算一个矩阵中所有行或列的数据类别，并将计算之后的数据类别样本返回出去。
     * 

     * Calculate the data categories of all rows or columns in a matrix, and return the calculated data category samples.
     *
     * @param data           需要被计算的特征数据组成的矩阵。
     *                       

     *                       Matrix composed of characteristic data to be calculated.
     * @param categorySample 本次类别计算的类别样本，用于区别各类数据样本，其中的key就是类别，value就是数据特征向量序列，例如：
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     *                       

     *                       The category sample of this category calculation is used to distinguish various data samples. The key is the category, and the value is the data feature vector sequence, for example:
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     * @return 按照指定类别进行分类的数据，例如：
     * {"person", ["tom的特征向量", "zhao的特征向量"], "insect", ["蜘蛛的特征向量", "蜗牛的特征向量"]}
     * 

     * Data classified according to the specified category, for example:
     * {"person", ["Tom's feature vector", "Zhao's feature vector"], "insert", ["spider's feature vector", "snail's feature vector"]}
     */
    @Override
    public HashMap> classification(int[][] data, Map categorySample) {
        Set stringSet = categorySample.keySet();
        HashMap> hashMap = new HashMap<>(categorySample.size() + 16);
        // 开始进行分类，迭代每一个data中的元素，并将其存储与类别样本中的数据进行比较
        for (int[] datum : data) {
            String MIN_KEY = null;
            int[] MIN_VALUE = null;
            double MIN = Double.MAX_VALUE;
            // 获取到最小距离的key
            for (String key : stringSet) {
                int[] doubles = categorySample.get(key);
                if (doubles != null) {
                    double trueDistance = distanceAlgorithm.getTrueDistance(datum, doubles);
                    if (trueDistance < MIN) {
                        MIN_KEY = key;
                        MIN_VALUE = datum;
                        MIN = trueDistance;
                    }
                }
            }
            // 将最小的序列添加到类别对应的value中
            if (MIN_KEY != null) {
                ArrayList doubleVectors = hashMap.get(MIN_KEY);
                if (doubleVectors == null) {
                    doubleVectors = new ArrayList<>();
                    doubleVectors.add(IntegerVector.parse(MIN_VALUE));
                    hashMap.put(MIN_KEY, doubleVectors);
                } else {
                    doubleVectors.add(IntegerVector.parse(MIN_VALUE));
                }
            }
        }
        return hashMap;
    }

    /**
     * 计算一个矩阵中所有行或列的数据类别，并将计算之后的数据类别样本返回出去。
     * 

     * Calculate the data categories of all rows or columns in a matrix, and return the calculated data category samples.
     *
     * @param data           需要被计算的特征数据组成的矩阵。
     *                       

     *                       Matrix composed of characteristic data to be calculated.
     * @param categorySample 本次类别计算的类别样本，用于区别各类数据样本，其中的key就是类别，value就是数据特征向量序列，例如：
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     *                       

     *                       The category sample of this category calculation is used to distinguish various data samples. The key is the category, and the value is the data feature vector sequence, for example:
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     * @return 按照指定类别进行分类的数据，例如：
     * {"person", ["tom的特征向量", "zhao的特征向量"], "insect", ["蜘蛛的特征向量", "蜗牛的特征向量"]}
     * 

     * Data classified according to the specified category, for example:
     * {"person", ["Tom's feature vector", "Zhao's feature vector"], "insert", ["spider's feature vector", "snail's feature vector"]}
     */
    @Override
    public HashMap> classification(DoubleMatrix data, Map categorySample) {
        return classification(data.toArrays(), categorySample);
    }

    /**
     * 计算一个矩阵中所有行或列的数据类别，并将计算之后的数据类别样本返回出去。
     * 

     * Calculate the data categories of all rows or columns in a matrix, and return the calculated data category samples.
     *
     * @param data           需要被计算的特征数据组成的矩阵。
     *                       

     *                       Matrix composed of characteristic data to be calculated.
     * @param categorySample 本次类别计算的类别样本，用于区别各类数据样本，其中的key就是类别，value就是数据特征向量序列，例如：
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     *                       

     *                       The category sample of this category calculation is used to distinguish various data samples. The key is the category, and the value is the data feature vector sequence, for example:
     *                       {"person", [1,2,3,4,5]},{"insect", [3, 2, 3, 4, 5]}
     * @return 按照指定类别进行分类的数据，例如：
     * {"person", ["tom的特征向量", "zhao的特征向量"], "insect", ["蜘蛛的特征向量", "蜗牛的特征向量"]}
     * 
     * Data classified according to the specified category, for example:
     * {"person", ["Tom's feature vector", "Zhao's feature vector"], "insert", ["spider's feature vector", "snail's feature vector"]}
     */
    @Override
    public HashMap> classification(IntegerMatrix data, Map categorySample) {
        return classification(data.toArrays(), categorySample);
    }
}