zhao.algorithmMagic.algorithm.classificationAlgorithm.KnnClassification Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithmStar Show documentation
algorithmStar-java
There is a newer version: 1.42
package zhao.algorithmMagic.algorithm.classificationAlgorithm;

import zhao.algorithmMagic.algorithm.OperationAlgorithm;
import zhao.algorithmMagic.algorithm.OperationAlgorithmManager;
import zhao.algorithmMagic.exception.OperatorOperationException;
import zhao.algorithmMagic.exception.TargetNotRealizedException;
import zhao.algorithmMagic.operands.matrix.ColumnDoubleMatrix;
import zhao.algorithmMagic.operands.matrix.ColumnIntegerMatrix;
import zhao.algorithmMagic.operands.matrix.DoubleMatrix;
import zhao.algorithmMagic.operands.matrix.IntegerMatrix;
import zhao.algorithmMagic.operands.vector.ColumnDoubleVector;
import zhao.algorithmMagic.operands.vector.ColumnIntegerVector;
import zhao.algorithmMagic.operands.vector.DoubleVector;
import zhao.algorithmMagic.operands.vector.IntegerVector;
import zhao.algorithmMagic.utils.ASClass;

import java.util.ArrayList;
import java.util.HashMap;

/**
 * KNN 近邻算法，通过与自己指定的数据样本进行距离计算而决定处自己的类别！在这里的k 代表的是在数据样本组中的索引最近距离，例如 k=7 代表当前未知类型的数据样本与 前后各7个数据样本进行度量计算，并推断类型。
 * 
 * KNN nearest neighbor algorithm determines its own category by calculating the distance from the data sample you specify! Here, k represents the nearest index distance in the data sample group. For example, k=7 represents the current unknown type of data sample and 7 data samples before and after the measurement calculation, and infers the type.
 *
 * @author zhao
 */
public class KnnClassification extends DistanceClassification implements NoSampleClassification {

    private final static String UnknownCategory = "?";
    protected int k = 5;

    protected KnnClassification(String name) {
        super(name);
    }

    /**
     * 获取到该算法的类对象。
     * 

     * Get the class object of the algorithm.
     *
     * @param Name 该算法的名称
     * @return 算法类对象
     * @throws TargetNotRealizedException 当您传入的算法名称对应的组件不能被成功提取的时候会抛出异常
     *                                    

     *                                    An exception will be thrown when the component corresponding to the algorithm name you passed in cannot be successfully extracted
     */
    public static KnnClassification getInstance(String Name) {
        if (OperationAlgorithmManager.containsAlgorithmName(Name)) {
            OperationAlgorithm operationAlgorithm = OperationAlgorithmManager.getInstance().get(Name);
            if (operationAlgorithm instanceof KnnClassification) {
                return ASClass.transform(operationAlgorithm);
            } else {
                throw new TargetNotRealizedException("您提取的[" + Name + "]算法被找到了，但是它不属于 KnnClassification 类型，请您为这个算法重新定义一个名称。\n" +
                        "The [" + Name + "] algorithm you ParameterCombination has been found, but it does not belong to the KnnClassification type. Please redefine a name for this algorithm.");
            }
        } else {
            KnnClassification KnnClassification = new KnnClassification(Name);
            OperationAlgorithmManager.getInstance().register(KnnClassification);
            return KnnClassification;
        }
    }

    /**
     * @return KNN 算法的搜索范围参数。该参数代表了待分类样本中的搜索范围，范围越大，结果数值越精确
     * 

     * The search scope parameter of the algorithm. This parameter represents the search range in the samples to be classified. The larger the range, the more accurate the result value
     */
    public int k() {
        return k;
    }

    /**
     * @param k 设置的新的算法搜索参数。如果不设置，默认为5
     *          

     *          Set the new algorithm search parameters for. If not set, the default is 5
     */
    public void setK(int k) {
        this.k = k;
    }

    /**
     * 无样本的距离计算，您在此进行分类，不需要传递很多的数据样本，只需要由实现类按照自己的算法进行类别推断即可。
     * 

     * For distance calculation without samples, you can classify here. You don't need to pass a lot of data samples. You only need to infer the category by the implementation class according to its own algorithm.
     *
     * @param keys 指定的一些数据类别，按照索引与 ints 参数一一对应，其中如果为 ? 代表是未知类别
     *             

     *             Some specified data categories correspond to the ints parameter one by one according to the index, in which, if it is? Represents an unknown category
     * @param ints 指定的类别索引对应的数据特征本身，是需要分类的关键对象。
     *             

     *             The data feature corresponding to the specified category index is the key object to be classified.
     * @return 分类结果。
     * 

     * Classification results.
     */
    @Override
    public HashMap> classification(String[] keys, int[][] ints) {
        if (keys.length != ints.length)
            throw new OperatorOperationException("您传入的类别数量与待分类特征数据数量要保持一致！！！\nThe number of categories you pass in should be consistent with the number of characteristic data to be classified!!!\n" +
                    "Number of categories = " + keys.length + "\tCharacteristic number = " + ints.length);
        final HashMap> hashMap = new HashMap<>();
        for (int i = 0; i < keys.length; i++) {
            String key = keys[i];
            // 如果当前类别是已知的就不进行操作
            if (UnknownCategory.equals(key)) {
                /*
                   如果是未知的就判断周边特征与当前特征的距离
                   并获取到周边k个特征结果数值中最小的一个特征结果数值
                   并将其作为目标值
                 */
                // 获取到坐标的需要被判断的左闭右开区间
                final int leftS = i - k, rightE = i + k;
                // 开始迭代左右两边的值 找到最小的度量对应的索引
                int MIN_index = -1;
                final int[] ints1 = ints[i];
                {
                    MIN_index = min_index1(leftS, ints, rightE, keys, ints1, MIN_index);
                }
                // 找到之后最近的值开始进行结果添加
                String key1 = keys[MIN_index];
                ArrayList integerMatrices = hashMap.get(key1);
                if (integerMatrices == null) {
                    integerMatrices = new ArrayList<>(ints.length + 8);
                    integerMatrices.add(IntegerVector.parse(ints1));
                    hashMap.put(key1, integerMatrices);
                } else {
                    integerMatrices.add(IntegerVector.parse(ints1));
                }
            }
        }
        return hashMap;
    }

    /**
     * 无样本的距离计算，您在此进行分类，不需要传递很多的数据样本，只需要由实现类按照自己的算法进行类别推断即可。
     * 

     * For distance calculation without samples, you can classify here. You don't need to pass a lot of data samples. You only need to infer the category by the implementation class according to its own algorithm.
     *
     * @param keys    指定的一些数据类别，按照索引与 ints 参数一一对应，其中如果为 ? 代表是未知类别
     *                

     *                Some specified data categories correspond to the ints parameter one by one according to the index, in which, if it is? Represents an unknown category
     * @param doubles 指定的类别索引对应的数据特征本身，是需要分类的关键对象。
     *                

     *                The data feature corresponding to the specified category index is the key object to be classified.
     * @return 分类结果。
     * 

     * Classification results.
     */
    @Override
    public HashMap> classification(String[] keys, double[][] doubles) {
        if (keys.length != doubles.length)
            throw new OperatorOperationException("您传入的类别数量与待分类特征数据数量要保持一致！！！\nThe number of categories you pass in should be consistent with the number of characteristic data to be classified!!!\n" +
                    "Number of categories = " + keys.length + "\tCharacteristic number = " + doubles.length);
        final HashMap> hashMap = new HashMap<>();
        for (int i = 0; i < keys.length; i++) {
            String key = keys[i];
            // 如果当前类别是已知的就不进行操作
            if (UnknownCategory.equals(key)) {
                /*
                   如果是未知的就判断周边特征与当前特征的距离
                   并获取到周边k个特征结果数值中最小的一个特征结果数值
                   并将其作为目标值
                 */
                // 获取到坐标的需要被判断的左闭右开区间
                final int leftS = i - k, rightE = i + k;
                // 开始迭代左右两边的值 找到最小的度量对应的索引
                int MIN_index = -1;
                final double[] doubles1 = doubles[i];
                {
                    MIN_index = min_index2(leftS, doubles, rightE, keys, doubles1, MIN_index);
                }
                // 找到之后最近的值开始进行结果添加
                String key1 = keys[MIN_index];
                ArrayList integerMatrices = hashMap.get(key1);
                if (integerMatrices == null) {
                    integerMatrices = new ArrayList<>(doubles.length + 8);
                    integerMatrices.add(DoubleVector.parse(doubles1));
                    hashMap.put(key1, integerMatrices);
                } else {
                    integerMatrices.add(DoubleVector.parse(doubles1));
                }
            }
        }
        return hashMap;
    }

    private int min_index2(int leftS, double[][] doubles, int rightE, String[] keys, double[] doubles1, int MIN_index) {
        double MIN = Double.MAX_VALUE;
        // 找到左边相邻的最小距离特征索引
        for (int i1 = Math.max(leftS, 0); i1 < Math.min(doubles.length, rightE); ++i1) {
            if ("?".equals(keys[i1])) continue;
            double trueDistance = distanceAlgorithm.getTrueDistance(doubles1, doubles[i1]);
            if (MIN > trueDistance) {
                MIN_index = i1;
                MIN = trueDistance;
            }
        }
        return MIN_index;
    }

    private int min_index1(int leftS, int[][] ints, int rightE, String[] keys, int[] ints1, int MIN_index) {
        double MIN = Double.MAX_VALUE;
        // 找到左边相邻的最小距离特征索引
        for (int i1 = Math.max(leftS, 0); i1 < Math.min(ints.length, rightE); i1++) {
            if (UnknownCategory.equals(keys[i1])) continue;
            double trueDistance = distanceAlgorithm.getTrueDistance(ints1, ints[i1]);
            if (MIN > trueDistance) {
                MIN_index = i1;
                MIN = trueDistance;
            }
        }
        return MIN_index;
    }

    /**
     * 无样本的距离计算，您在此进行分类，不需要传递很多的数据样本，只需要由实现类按照自己的算法进行类别推断即可。
     * 

     * For distance calculation without samples, you can classify here. You don't need to pass a lot of data samples. You only need to infer the category by the implementation class according to its own algorithm.
     *
     * @param keys 指定的一些数据类别，按照索引与 ints 参数一一对应，其中如果为 ? 代表是未知类别
     *             

     *             Some specified data categories correspond to the ints parameter one by one according to the index, in which, if it is? Represents an unknown category
     * @param ints 指定的类别索引对应的数据特征本身，是需要分类的关键对象。
     *             

     *             The data feature corresponding to the specified category index is the key object to be classified.
     * @return 分类结果。
     * 

     * Classification results.
     */
    @Override
    public HashMap> classification(String[] keys, IntegerMatrix ints) {
        return classification(keys, ints.toArrays());
    }

    /**
     * 无样本的距离计算，您在此进行分类，不需要传递很多的数据样本，只需要由实现类按照自己的算法进行类别推断即可。
     * 

     * For distance calculation without samples, you can classify here. You don't need to pass a lot of data samples. You only need to infer the category by the implementation class according to its own algorithm.
     *
     * @param keys    指定的一些数据类别，按照索引与 ints 参数一一对应，其中如果为 ? 代表是未知类别
     *                

     *                Some specified data categories correspond to the ints parameter one by one according to the index, in which, if it is? Represents an unknown category
     * @param doubles 指定的类别索引对应的数据特征本身，是需要分类的关键对象。
     *                

     *                The data feature corresponding to the specified category index is the key object to be classified.
     * @return 分类结果。
     * 

     * Classification results.
     */
    @Override
    public HashMap> classification(String[] keys, DoubleMatrix doubles) {
        return classification(keys, doubles.toArrays());
    }

    /**
     * 无样本的距离计算，您在此进行分类，不需要传递很多的数据样本，只需要由实现类按照自己的算法进行类别推断即可。
     * 

     * For distance calculation without samples, you can classify here. You don't need to pass a lot of data samples. You only need to infer the category by the implementation class according to its own algorithm.
     *
     * @param keys                指定的一些数据类别，按照索引与 columnIntegerMatrix 参数一一对应，其中如果为 ? 代表是未知类别
     *                            

     *                            Some specified data categories correspond to the columnIntegerMatrix parameter one by one according to the index, in which, if it is? Represents an unknown category
     * @param columnIntegerMatrix 指定的类别索引对应的数据特征本身，是需要分类的关键对象。
     *                            

     *                            The data feature corresponding to the specified category index is the key object to be classified.
     * @return 分类结果。
     * 

     * Classification results.
     */
    @Override
    public HashMap> classification(String[] keys, ColumnIntegerMatrix columnIntegerMatrix) {
        int[][] ints = columnIntegerMatrix.toArrays();
        if (keys.length != ints.length)
            throw new OperatorOperationException("您传入的类别数量与待分类特征数据数量要保持一致！！！\nThe number of categories you pass in should be consistent with the number of characteristic data to be classified!!!\n" +
                    "Number of categories = " + keys.length + "\tCharacteristic number = " + ints.length);
        final HashMap> hashMap = new HashMap<>();
        String[] colFieldNames = columnIntegerMatrix.getColFieldNames();
        for (int i = 0; i < keys.length; i++) {
            String key = keys[i];
            // 如果当前类别是已知的就不进行操作
            if (UnknownCategory.equals(key)) {
                /*
                   如果是未知的就判断周边特征与当前特征的距离
                   并获取到周边k个特征结果数值中最小的一个特征结果数值
                   并将其作为目标值
                 */
                // 获取到坐标的需要被判断的左闭右开区间
                final int leftS = i - k, rightE = i + k;
                // 开始迭代左右两边的值 找到最小的度量对应的索引
                final int[] ints1 = ints[i];
                int MIN_index = min_index1(leftS, ints, rightE, keys, ints1, -1);
                // 找到之后最近的值开始进行结果添加
                String key1 = keys[MIN_index];
                ArrayList integerMatrices = hashMap.get(key1);
                if (integerMatrices == null) {
                    integerMatrices = new ArrayList<>(ints.length + 8);
                    integerMatrices.add(ColumnIntegerVector.parse(key1, colFieldNames, ints1));
                    hashMap.put(key1, integerMatrices);
                } else {
                    integerMatrices.add(ColumnIntegerVector.parse(key1, colFieldNames, ints1));
                }
            }
        }
        return hashMap;
    }

    /**
     * 无样本的距离计算，您在此进行分类，不需要传递很多的数据样本，只需要由实现类按照自己的算法进行类别推断即可。
     * 

     * For distance calculation without samples, you can classify here. You don't need to pass a lot of data samples. You only need to infer the category by the implementation class according to its own algorithm.
     *
     * @param keys               指定的一些数据类别，按照索引与 columnDoubleMatrix 参数一一对应，其中如果为 ? 代表是未知类别
     *                           

     *                           Some specified data categories correspond to the columnDoubleMatrix parameter one by one according to the index, in which, if it is? Represents an unknown category
     * @param columnDoubleMatrix 指定的类别索引对应的数据特征本身，是需要分类的关键对象。
     *                           

     *                           The data feature corresponding to the specified category index is the key object to be classified.
     * @return 分类结果。
     * 
     * Classification results.
     */
    @Override
    public HashMap> classification(String[] keys, ColumnDoubleMatrix columnDoubleMatrix) {
        double[][] doubles = columnDoubleMatrix.toArrays();
        if (keys.length != doubles.length)
            throw new OperatorOperationException("您传入的类别数量与待分类特征数据数量要保持一致！！！\nThe number of categories you pass in should be consistent with the number of characteristic data to be classified!!!\n" +
                    "Number of categories = " + keys.length + "\tCharacteristic number = " + doubles.length);
        final HashMap> hashMap = new HashMap<>();
        String[] colFieldNames = columnDoubleMatrix.getColFieldNames();
        for (int i = 0; i < keys.length; i++) {
            String key = keys[i];
            // 如果当前类别是已知的就不进行操作
            if (UnknownCategory.equals(key)) {
                /*
                   如果是未知的就判断周边特征与当前特征的距离
                   并获取到周边k个特征结果数值中最小的一个特征结果数值
                   并将其作为目标值
                 */
                // 获取到坐标的需要被判断的左闭右开区间
                final int leftS = i - k, rightE = i + k;
                // 开始迭代左右两边的值 找到最小的度量对应的索引
                int MIN_index;
                final double[] doubles1 = doubles[i];
                {
                    MIN_index = min_index2(leftS, doubles, rightE, keys, doubles1, -1);
                }
                // 找到之后最近的值开始进行结果添加
                String key1 = keys[MIN_index];
                ArrayList integerMatrices = hashMap.get(key1);
                if (integerMatrices == null) {
                    integerMatrices = new ArrayList<>(doubles.length + 8);
                    integerMatrices.add(ColumnDoubleVector.parse(key1, colFieldNames, doubles1));
                    hashMap.put(key1, integerMatrices);
                } else {
                    integerMatrices.add(ColumnDoubleVector.parse(key1, colFieldNames, doubles1));
                }
            }
        }
        return hashMap;
    }
}