All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.data.BinarySparseDataset Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * Copyright (c) 2010 Haifeng Li
 *   
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package smile.data;

import java.util.Arrays;
import smile.math.Math;
import smile.math.matrix.SparseMatrix;

/**
 * Binary sparse dataset. Each item is stored as an integer array, which
 * are the indices of nonzero elements in ascending order.
 *
 * @author Haifeng Li
 */
public class BinarySparseDataset extends Dataset {

    /**
     * The number of nonzero entries.
     */
    private int n;
    /**
     * The number of columns.
     */
    private int numColumns;
    /**
     * The number of nonzero entries in each column.
     */
    private int[] colSize;

    /**
     * Constructor.
     */
    public BinarySparseDataset() {
        this("Binary Sparse Dataset");
    }
    
    /**
     * Constructor.
     * @param name the name of dataset.
     */
    public BinarySparseDataset(String name) {
        super(name);
        numColumns = 0;
        colSize = new int[100];
    }

    /**
     * Constructor.
     * @param name the name of dataset.
     * @param response the attribute type of response variable.
     */
    public BinarySparseDataset(String name, Attribute response) {
        super(name, response);
        numColumns = 0;
        colSize = new int[100];
    }

    /**
     * Constructor.
     * @param ncols the number of columns in the matrix.
     */
    public BinarySparseDataset(int ncols) {
        numColumns = ncols;
        colSize = new int[ncols];
    }

    /**
     * Returns the number of columns.
     */
    public int ncols() {
        return numColumns;
    }

    /**
     * Add a datum item into the dataset.
     * @param datum a datum item. The indices of nonzero elements will be sorted
     * into ascending order.
     */
    @Override
    public void add(Datum datum) {
        int[] x = datum.x;
        
        for (int xi : x) {
            if (xi < 0) {
                throw new IllegalArgumentException("Negative index of nonzero element: " + xi);
            }
        }
        
        Arrays.sort(x);
        for (int i = 1; i < x.length; i++) {
            if (x[i] == x[i-1]) {
                throw new IllegalArgumentException("Duplicated indices of nonzero elements: " + x[i]);
            }
        }
        
        n += x.length;
        
        int max = Math.max(x);
        if (numColumns <= max) {
            numColumns = max + 1;
            if (numColumns > colSize.length) {
                int[] size = new int[3 * numColumns / 2];
                System.arraycopy(colSize, 0, size, 0, colSize.length);
                colSize = size;
            }
        }
        
        for (int xi : x) {
            colSize[xi]++;
        }
        
        super.add(datum);
    }
    
    /**
     * Returns the value at entry (i, j) by binary search.
     * @param i the row index.
     * @param j the column index.
     */
    public int get(int i, int j) {
        if (i < 0 || i >= size()) {
            throw new IllegalArgumentException("Invalid index: i = " + i);
        }

        int[] x = get(i).x;
        if (x.length == 0) {
            return 0;
        }
        
        int low = 0;
        int high = x.length - 1;
        int mid = (low + high) / 2;
        
        while (j != x[mid] && low <= high) {
            mid = (low + high) / 2;
            if (j < x[mid]) 
                high = mid - 1;
            else
                low = mid + 1;
        }
        
        if (j == x[mid]) {
            return 1;
        } else {
            return 0;
        }
    }

    /**
     * Convert into Harwell-Boeing column-compressed sparse matrix format.
     */
    public SparseMatrix toSparseMatrix() {
        int[] pos = new int[numColumns];
        int[] colIndex = new int[numColumns + 1];
        for (int i = 0; i < numColumns; i++) {
            colIndex[i + 1] = colIndex[i] + colSize[i];
        }

        int nrows = size();
        int[] rowIndex = new int[n];
        double[] x = new double[n];

        for (int i = 0; i < nrows; i++) {
            for (int j : get(i).x) {
                int k = colIndex[j] + pos[j];

                rowIndex[k] = i;
                x[k] = 1;
                pos[j]++;
            }
        }

        return new SparseMatrix(nrows, numColumns, x, rowIndex, colIndex);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy