All Downloads are FREE. Search and download functionalities are using the official Maven repository.

zhao.algorithmMagic.operands.matrix.ColumnIntegerMatrix Maven / Gradle / Ivy

There is a newer version: 1.42
Show newest version
package zhao.algorithmMagic.operands.matrix;

import zhao.algorithmMagic.core.ASDynamicLibrary;
import zhao.algorithmMagic.exception.OperatorOperationException;
import zhao.algorithmMagic.io.OutputComponent;
import zhao.algorithmMagic.operands.RCNOperands;
import zhao.algorithmMagic.operands.vector.IntegerVector;
import zhao.algorithmMagic.utils.ASClass;
import zhao.algorithmMagic.utils.ASIO;
import zhao.algorithmMagic.utils.ASMath;
import zhao.algorithmMagic.utils.dataContainer.IntegerAndInts;

import java.io.File;
import java.io.IOException;
import java.util.*;

/**
 * 带有字段名称设置的矩阵对象,在此对象中,每个维度的数值都有与之相对应的字段描述。
 * 

* A matrix object with field name settings. In this object, the numerical value of each dimension has its corresponding field description. * * @author zhao */ public class ColumnIntegerMatrix extends IntegerMatrix implements RCNOperands, SaveMatrix { private final String[] Field1; private final String[] Field2; private final HashMap rowIndex; private final HashMap colIndex; /** * 构造一个具有列名属性的整形矩阵 * * @param colNames 该矩阵中所对应的列名称 * @param rowNames 该矩阵中所对应的行名称 * @param ints 该矩阵中需要维护的数组 */ public ColumnIntegerMatrix(String[] colNames, String[] rowNames, int[]... ints) { super(ints); if (ints.length > 0) { int length = ints[0].length; if (rowNames != null && rowNames.length > 0) { if (rowNames.length == ints.length) { Field2 = rowNames; // 构造行索引 rowIndex = new HashMap<>(Field2.length); ASClass.extractedIndexMap(rowIndex, Field2); } else { throw new OperatorOperationException("构造字段矩阵时需要注意字段名的数量与字段数据的列数一一对应!!!ERROR => RowField.length = " + rowNames.length + "\tints.rowCount = " + ints.length); } } else { Field2 = new String[0]; rowIndex = new HashMap<>(); } if (colNames != null && colNames.length > 0) { if (colNames.length == length) { Field1 = colNames; // 构造列索引 colIndex = new HashMap<>(Field1.length); ASClass.extractedIndexMap(colIndex, Field1); } else { throw new OperatorOperationException("构造字段矩阵时需要注意字段名的数量与字段数据的列数一一对应!!!ERROR => ColField.length = " + colNames.length + "\tints.colCount = " + length); } } else { Field1 = new String[0]; colIndex = new HashMap<>(); } } else { throw new OperatorOperationException("The array of construction matrix cannot be empty"); } } /** * 构造一个矩阵,矩阵的列数量以矩阵的第一行为准! *

* Construct a matrix, the number of columns of the matrix is based on the first row of the matrix! * * @param colNames 该矩阵中所对应的列名称 * @param rowNames 该矩阵中所对应的行名称 * @param ints 该矩阵中需要维护的数组 * @return matrix object */ public static ColumnIntegerMatrix parse(String[] colNames, String[] rowNames, int[]... ints) { if (ints.length > 0) { return new ColumnIntegerMatrix(colNames, rowNames, ints); } else { throw new OperatorOperationException("The array of construction matrix cannot be empty"); } } protected static void deleteRelatedFunction(double thresholdLeft, double thresholdRight, int[][] ints, int[] mid, ArrayList res, ArrayList res_f2, String[] field2) { if (ASDynamicLibrary.isUseC()) { for (int i = 0; i < ints.length; i++) { int[] anInt = ints[i]; double num = ASMath.correlationCoefficient_C(anInt, mid, anInt.length); if (num < thresholdLeft || num > thresholdRight) { // 这个情况代表是不符合删除区间的,也就是不需要被删除的 int[] res1 = new int[anInt.length]; System.arraycopy(anInt, 0, res1, 0, anInt.length); res_f2.add(field2[i]); res.add(res1); } } } else { for (int i = 0; i < ints.length; i++) { int[] anInt = ints[i]; double num = ASMath.correlationCoefficient(anInt, mid); if (num < thresholdLeft || num > thresholdRight) { // 这个情况代表是不符合删除区间的,也就是不需要被删除的 int[] res1 = new int[anInt.length]; System.arraycopy(anInt, 0, res1, 0, anInt.length); res_f2.add(field2[i]); res.add(res1); } } } } protected static void ex(Random random1, int[][] res, String[] rowNames, int maxIndex, int i) { int i1 = random1.nextInt(maxIndex); int[] temp = res[i]; res[i] = res[i1]; res[i1] = temp; String tempS = rowNames[i]; rowNames[i] = rowNames[i1]; rowNames[i1] = tempS; } /** * @return 该矩阵中所对应的列名称 */ public String[] getColFieldNames() { return Field1.clone(); } /** * @return 该矩阵中对应的行名称 */ public String[] getRowFieldNames() { return Field2.clone(); } @Override public HashMap toHashMap() { HashMap hashMap = new HashMap<>(getRowCount() + 10); int[][] ints = toArrays(); // 开始添加数据 for (int i = 0; i < this.Field1.length; i++) { // 将当前字段的每一个元素添加到当前字段对应的数组中 int[] tempCol = new int[ints.length]; int count = -1; for (int[] anInt : ints) { tempCol[++count] = anInt[i]; } hashMap.put(this.Field1[i], IntegerVector.parse(tempCol)); } return hashMap; } @Override public String toString() { StringBuilder stringBuilder = new StringBuilder(); String[] colFieldNames = this.getColFieldNames(); if (colFieldNames.length != 0) { // 添加列字段 for (String colFieldName : colFieldNames) { stringBuilder.append(colFieldName).append('\t'); } if (this.Field2.length != 0) stringBuilder.append("rowColName"); stringBuilder.append('\n'); } // 添加行字段与行数据 String[] rowFieldNames = this.getRowFieldNames(); int[][] ints = this.toArrays(); if (rowFieldNames.length != 0) { for (int i = 0; i < ints.length; i++) { stringBuilder .append(Arrays.toString(ints[i])).append('\t') .append(rowFieldNames[i]).append('\n'); } } else { for (int[] aInt : ints) { stringBuilder.append(Arrays.toString(aInt)).append("\n"); } } return "------------IntegerMatrixStart-----------\n" + stringBuilder + "------------IntegerMatrixEnd------------\n"; } /** * 去除冗余特征维度,将当前矩阵中的每一个维度都进行方差或无向差计算,并将过于稳定的冗余特征去除。 *

* Remove redundant feature dimensions, calculate variance or undirected difference of each dimension in the current matrix, and remove redundant features that are too stable. * * @param threshold 冗余去除阈值,代表去除的百分比,这个值应是一个小于1的数值,例如设置为0.4 代表去除掉冗余程度倒序排行中,最后40% 的维度。 *

* Redundancy removal threshold, which represents the percentage of removal, should be a value less than 1. For example, set to 0.4 to remove the last 40% of the dimensions in the reverse order of redundancy. * @return 去除冗余特征维度之后的新矩阵 *

* New matrix after removing redundant feature dimensions */ @Override public ColumnIntegerMatrix featureSelection(double threshold) { if (threshold >= 1) throw Matrix.OPERATOR_OPERATION_EXCEPTION; // 计算出本次要去除的维度数量 int num = (int) (getRowCount() * threshold); if (num <= 0) { return ColumnIntegerMatrix.parse(getColFieldNames(), getRowFieldNames(), copyToNewArrays()); } else { // 计算出本次剩余的维度数量 num = getRowCount() - num; // 准备好一个排序集合,存储所有的离散值结果与数组 TreeMap treeMap = new TreeMap<>(Comparator.reverseOrder()); // 将每一个维度的向量的方差计算出来 int count = -1; for (int[] ints : this.toArrays()) { // 计算出离散值,并将离散值与当前行编号以及当前数组添加到集合中 treeMap.put(ASMath.undirectedDifference(ints), new IntegerAndInts(++count, ints)); } // 开始获取到前 num 个数组 int index = -1; // 构建列与数据的存储控件 String[] rowNames = this.Field2.length == 0 ? null : new String[num]; int[][] res = new int[num][getColCount()]; if (rowNames == null) { for (IntegerAndInts value : treeMap.values()) { System.arraycopy(value.ints, 0, res[++index], 0, value.ints.length); --num; if (num == 0) break; } } else { for (IntegerAndInts value : treeMap.values()) { System.arraycopy(value.ints, 0, res[++index], 0, value.ints.length); rowNames[index] = this.Field2[value.anInt]; --num; if (num == 0) break; } } return ColumnIntegerMatrix.parse(getColFieldNames(), rowNames, res); } } /** * 删除与目标索引维度相关的所有行维度,并返回新矩阵对象。 *

* Delete all row dimensions related to the target index dimension and return a new matrix object. * * @param index 需要被作为相关系数中心点的行编号。 *

* The row number to be used as the center point of the correlation coefficient. * @param thresholdLeft 相关系数阈值,需要被删除的相关系数阈值区间左边界。 *

* The correlation coefficient threshold is the left boundary of the correlation coefficient threshold interval to be deleted. * @param thresholdRight 相关系数阈值,需要被删除的相关系数阈值区间右边界。 *

* The correlation coefficient threshold is the right boundary of the correlation coefficient threshold interval to be deleted. * @return 进行了相关维度删除之后构造出来的新矩阵 *

* The new matrix constructed after deleting relevant dimensions */ @Override public ColumnIntegerMatrix deleteRelatedDimensions(int index, double thresholdLeft, double thresholdRight) { int rowCount = getRowCount(); if (index >= 0 && index < rowCount) { int[][] ints = toArrays(); // 获取到当前的相关系数中心序列 int[] mid = ints[index]; boolean b1 = this.Field1.length != 0; boolean b2 = this.Field2.length != 0; ArrayList res = new ArrayList<>(rowCount); ArrayList res_f2 = new ArrayList<>(b2 ? this.Field2.length : 16); // 开始进行计算 if (b1 && b2) { deleteRelatedFunction(thresholdLeft, thresholdRight, ints, mid, res, res_f2, Field2); return ColumnIntegerMatrix.parse( this.Field1.clone(), res_f2.toArray(new String[0]), res.toArray(new int[0][]) ); } else if (b2) { // 说明第 1 字段不需要添加数据 deleteRelatedFunction(thresholdLeft, thresholdRight, ints, mid, res, res_f2, Field2); return ColumnIntegerMatrix.parse( null, res_f2.toArray(new String[0]), res.toArray(new int[0][]) ); } else if (b1) { if (ASDynamicLibrary.isUseC()) { // 说明第二字段不需要加数据 for (int[] anInt : ints) { double num = ASMath.correlationCoefficient_C(anInt, mid, anInt.length); if (num < thresholdLeft || num > thresholdRight) { // 这个情况代表是不符合删除区间的,也就是不需要被删除的 int[] res1 = new int[anInt.length]; System.arraycopy(anInt, 0, res1, 0, anInt.length); res.add(res1); } } } else { // 说明第二字段不需要加数据 for (int[] anInt : ints) { double num = ASMath.correlationCoefficient(anInt, mid); if (num < thresholdLeft || num > thresholdRight) { // 这个情况代表是不符合删除区间的,也就是不需要被删除的 int[] res1 = new int[anInt.length]; System.arraycopy(anInt, 0, res1, 0, anInt.length); res.add(res1); } } } return ColumnIntegerMatrix.parse( this.Field1.clone(), null, res.toArray(new int[0][]) ); } else { // 说明都不需要字段名数据 IntegerMatrix.ex(thresholdLeft, thresholdRight, ints, mid, res); return ColumnIntegerMatrix.parse( null, null, res.toArray(new int[0][]) ); } } else { return ColumnIntegerMatrix.parse(this.Field1.clone(), this.Field2.clone(), copyToNewArrays()); } } /** * 获取到指定名称的行数组 *

* Get the row array with the specified name * * @param name 指定的行目标名称 *

* Specified row target name * @return 一个包含当前行元素的新数组,是支持修改的。 *

* A new array containing the elements of the current row supports modification. */ @Override public int[] getArrayByRowName(String name) { Integer index = rowIndex.get(name); if (index == null) { throw new OperatorOperationException("No rows found " + name); } return getArrayByRowIndex(index); } /** * 获取到指定名称的列数组 *

* Get the col array with the specified name * * @param name 指定的列目标名称 *

* Specified col target name * @return 一个包含当前列元素的新数组,是支持修改的。 *

* A new array containing the elements of the current col supports modification. */ @Override public int[] getArrayByColName(String name) { Integer index = colIndex.get(name); if (index == null) { throw new OperatorOperationException("No columns found " + name); } return getArrayByColIndex(colIndex.get(name)); } /** * 将现有矩阵的转置矩阵获取到 *

* Get the transpose of an existing matrix into * * @return 矩阵转置之后的新矩阵 *

* new matrix after matrix transpose */ @Override public ColumnIntegerMatrix transpose() { return ColumnIntegerMatrix.parse( this.Field2.clone(), this.Field1.clone(), super.transpose().toArrays() ); } /** * 将本对象中的所有数据进行洗牌打乱,随机分布数据行的排列。 *

* Shuffle all the data in this object and randomly distribute the arrangement of data rows. * * @param seed 打乱算法中所需要的随机种子。 *

* Disrupt random seeds required in the algorithm. * @return 打乱之后的对象。 *

* Objects after disruption. */ @Override public ColumnIntegerMatrix shuffle(long seed) { // 行是否无字段 if (this.Field2.length == 0) { return ColumnIntegerMatrix.parse( this.Field1.length == 0 ? null : this.Field1.clone(), null, ASMath.shuffle(this.copyToNewArrays(), seed, false)); } else { // 带着行一起迭代 String[] rowNames = this.Field2.clone(); int[][] res = this.copyToNewArrays(); // 生成随机数对象 Random random = new Random(); int maxIndex = res.length - 1; random.setSeed(seed); for (int i = 0; i < res.length; i++) { ex(random, res, rowNames, maxIndex, i); } return ColumnIntegerMatrix.parse(this.Field1.clone(), rowNames, res); } } /** * 将本对象中的所有数据进行洗牌打乱,随机分布数据行的排列。 *

* Shuffle all the data in this object and randomly distribute the arrangement of data rows. * * @param random1 打乱算法中所需要的随机种子。 *

* Disrupt random seeds required in the algorithm. * @param copy 打乱时是否需要产生一个新矩阵对象,与当前对象完全脱离关系。 *

* Whether it is necessary to generate a new matrix object when disrupting, which is completely separated from the current object. * @param length 打乱时注重的打乱次数,最终打乱会导致最多 length * 2 个元素发生位置变化。 *

* The number of disruptions that should be paid attention to when disrupting, and the final disruption will result in the position change of up to 2 elements of length *. * @return 打乱之后的对象。 *

* Objects after disruption. */ public ColumnIntegerMatrix shuffle(Random random1, boolean copy, int length) { length = Math.min(getRowCount(), length); // 行是否无字段 if (this.Field2.length == 0) { return ColumnIntegerMatrix.parse( this.Field1.length == 0 ? null : this.Field1.clone(), null, ASMath.shuffleFunction(random1, this.getRowCount(), copy ? this.copyToNewArrays() : this.toArrays(), length)); } else { // 带着行一起迭代 int[][] res; String[] rowNames; if (!copy) { rowNames = this.Field2; res = this.toArrays(); // 生成随机数对象 int maxIndex = res.length - 1; for (int i = 0; i < length; i++) { ex(random1, res, rowNames, maxIndex, i); } // 字段发生变化了,重新构建索引 ASClass.extractedIndexMap(rowIndex, Field2); return this; } else { res = this.copyToNewArrays(); rowNames = this.Field2.clone(); // 生成随机数对象 int maxIndex = res.length - 1; for (int i = 0; i < length; i++) { ex(random1, res, rowNames, maxIndex, i); } return ColumnIntegerMatrix.parse(this.Field1.clone(), rowNames, res); } } } /** * 将数据所维护的数组左移n个位置,并获取到结果数值 *

* Move the array maintained by the data to the left n positions and get the result value * * @param n 被左移的次数,该数值应取值于 [0, getRowCount] *

* The number of times it is moved to the left. The value should be [0, getRowCount] * @param copy 本次左移的作用参数,如果设置为true,代表本次位移会创建出一个新的数组,于当前数组毫无关联。 *

* If the action parameter of this left shift is set to true, it means that this shift will create a new array, which has no association with the current array. * @return 位移之后的AS操作数对象,其类型与调用者数据类型一致。 *

* The AS operand object after displacement has the same type as the caller data type. */ @Override public IntegerMatrix leftShift(int n, boolean copy) { if (copy) { return ColumnIntegerMatrix.parse( this.Field1.length == 0 ? null : this.Field1.clone(), this.Field2.length == 0 ? null : ASMath.leftShift(this.Field2.clone(), n), ASMath.leftShift(this.copyToNewArrays(), n) ); } else { if (this.Field2.length != 0) { ASMath.leftShift(this.Field2, n); // 字段发生变化了,重新构建索引 ASClass.extractedIndexMap(rowIndex, this.Field2); } ASMath.leftShift(this.toArrays(), n); return this; } } /** * 将数据所维护的数组右移n个位置,并获取到结果数值 *

* Move the array maintained by the data to the right n positions and get the result value * * @param n 被右移的次数,该数值应取值于 [0, getRowCount] *

* The number of times it is moved to the right. The value should be [0, getRowCount] * @param copy 本次右移的作用参数,如果设置为true,代表本次位移会创建出一个新的数组,于当前数组毫无关联。 *

* If the action parameter of this right shift is set to true, it means that this shift will create a new array, which has no association with the current array. * @return 位移之后的AS操作数对象,其类型与调用者数据类型一致。 *

* The AS operand object after displacement has the same type as the caller data type. */ @Override public IntegerMatrix rightShift(int n, boolean copy) { if (copy) { return ColumnIntegerMatrix.parse( this.Field1.length == 0 ? null : this.Field1.clone(), this.Field2.length == 0 ? null : ASMath.rightShift(this.Field2.clone(), n), ASMath.rightShift(this.copyToNewArrays(), n) ); } else { if (this.Field2.length != 0) { ASMath.rightShift(this.Field2, n); // 字段发生变化了,重新构建索引 ASClass.extractedIndexMap(rowIndex, this.Field2); } ASMath.rightShift(this.toArrays(), n); return this; } } /** * 将当前对象中的元素从左向右的方向进行元素索引为宗旨的反转,实现更多的效果。 *

* Invert the element index of the current object from left to right to achieve more results. * * @param isCopy 如果设置为true 代表反转操作会作用到一个新数组中,并不会更改源数组中的元素位置。反之则是直接更改源数组。 *

* If set to true, the inversion operation will be applied to a new array, and the position of the elements in the source array will not be changed. On the contrary, you can directly change the source array. * @return 被反转之后的对象,该对象的数据类型与函数调用者是一致的。 *

* The data type of the reversed object is the same as that of the function caller. */ @Override public ColumnIntegerMatrix reverseLR(boolean isCopy) { if (!isCopy) { ASMath.arrayReverse(this.getColFieldNames()); for (int[] ints : this.toArrays()) { ASMath.arrayReverse(ints); } return this; } else { int[][] ints1 = this.copyToNewArrays(); for (int[] ints : ints1) { ASMath.arrayReverse(ints); } return ColumnIntegerMatrix.parse( ASMath.arrayReverse(this.getColFieldNames().clone()), this.getRowFieldNames(), ints1 ); } } /** * 将当前对象中的元素从上向下的方向进行元素索引为宗旨的反转,实现更多的效果。 *

* Invert the element index of the current object from Above to below to achieve more results. * * @param isCopy 如果设置为true 代表反转操作会作用到一个新数组中,并不会更改源数组中的元素位置。反之则是直接更改源数组。 *

* If set to true, the inversion operation will be applied to a new array, and the position of the elements in the source array will not be changed. On the contrary, you can directly change the source array. * @return 被反转之后的对象,该对象的数据类型与函数调用者是一致的。 *

* The data type of the reversed object is the same as that of the function caller. */ @Override public ColumnIntegerMatrix reverseBT(boolean isCopy) { if (isCopy) { return ColumnIntegerMatrix.parse( this.getColFieldNames().clone(), ASMath.arrayReverse(this.getRowFieldNames().clone()), ASMath.arrayReverse(this.copyToNewArrays()) ); } else { ASMath.arrayReverse(this.toArrays()); ASMath.arrayReverse(this.getRowFieldNames()); return this; } } /** * 刷新操作数对象的所有字段 */ @Override protected void reFresh() { if (this.Field1.length != 0) { ASClass.extractedIndexMap(colIndex, this.Field1); } else colIndex.clear(); if (this.Field2.length != 0) { ASClass.extractedIndexMap(rowIndex, this.Field2); } else rowIndex.clear(); } /** * 将矩阵对象使用不同的处理方式保存到指定的路径中。 *

* Save the matrix object to the specified path using different processing methods. * * @param path 目标文件所在路径。 *

* Directory path to save. */ @Override public void save(String path) { save(path, ','); } /** * 将矩阵使用指定分隔符保存到文件系统的指定路径的文件中。 *

* Save the matrix to a file in the specified path of the file system using the specified separator. * * @param path 需要保存的目录路径。 *

* Directory path to save. * @param sep 保存时使用的分隔符。 */ @Override public void save(String path, char sep) { save(new File(path), sep); } /** * 将矩阵使用指定分隔符保存到文件系统的指定路径的文件中。 *

* Save the matrix to a file in the specified path of the file system using the specified separator. * * @param path 需要保存的目录路径。 *

* Directory path to save. * @param sep 保存时使用的分隔符。 */ @Override public void save(File path, char sep) { ASIO.writer(path, stream -> { int rowCount = -1; try { // 输出列 stream.write("colName"); for (String colName : this.Field1) { stream.write(sep); stream.write(colName); } stream.newLine(); for (int[] ints : this.toArrays()) { stream.write(this.Field2[++rowCount]); for (int aInt : ints) { stream.write(sep); stream.write(String.valueOf(aInt)); } } } catch (IOException e) { throw new OperatorOperationException("Write data exception!", e); } }); } /** * 将对象交由第三方数据输出组件进行数据的输出。 *

* Submit the object to a third-party data output component for data output. * * @param outputComponent 第三方数据输出设备对象实现。 *

* Implementation of third-party data output device objects. */ @Override public void save(OutputComponent outputComponent) { if (!outputComponent.isOpen()) { if (!outputComponent.open()) throw new OperatorOperationException("您的数据输出组件打开失败。\nYour data output component failed to open."); } outputComponent.writeMat(this); ASIO.close(outputComponent); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy