All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.data.BinarySparseDatasetImpl Maven / Gradle / Ivy

There is a newer version: 4.2.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.data;

import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.stream.Stream;
import smile.math.MathEx;
import smile.math.matrix.SparseMatrix;

/**
 * Binary sparse dataset. Each item is stored as an integer array, which
 * are the indices of nonzero elements in ascending order.
 *
 * @param  the target type.
 *
 * @author Haifeng Li
 */
class BinarySparseDatasetImpl implements BinarySparseDataset {
    private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(BinarySparseDatasetImpl.class);

    /**
     * The sample instances.
     */
    private final ArrayList> instances;
    /**
     * The number of nonzero entries.
     */
    private int n;
    /**
     * The number of columns.
     */
    private final int ncol;
    /**
     * The number of nonzero entries in each column.
     */
    private final int[] colSize;

    /**
     * Constructor.
     * @param data The sample instances.
     */
    public BinarySparseDatasetImpl(Collection> data) {
        this.instances = new ArrayList<>(data);

        int p = 0;
        for (var instance : instances) {
            p = Math.max(p, MathEx.max(instance.x()));
        }
        ncol = p + 1;
        colSize = new int[ncol];

        for (var instance : instances) {
            var x = instance.x();
            Arrays.sort(x);

            int prev = -1; // index of previous element
            for (int xi : x) {
                if (xi < 0) {
                    throw new IllegalArgumentException(String.format("Negative index of nonzero element: %d", xi));
                }

                if (xi == prev) {
                    logger.warn("Ignore duplicated indices: {} in {}", xi, Arrays.toString(x));
                } else {
                    colSize[xi]++;
                    n++;
                    prev = xi;
                }
            }
        }
    }

    @Override
    public int size() {
        return instances.size();
    }

    @Override
    public int length() {
        return n;
    }

    @Override
    public int ncol() {
        return ncol;
    }

    @Override
    public SampleInstance get(int i) {
        return instances.get(i);
    }

    @Override
    public Stream> stream() {
        return instances.stream();
    }

    @Override
    public Iterator> iterator() {
        return instances.iterator();
    }

    @Override
    public SparseMatrix toMatrix() {
        int[] pos = new int[ncol];
        int[] colIndex = new int[ncol + 1];
        for (int i = 0; i < ncol; i++) {
            colIndex[i + 1] = colIndex[i] + colSize[i];
        }

        int nrow = instances.size();
        int[] rowIndex = new int[n];
        double[] x = new double[n];

        for (int i = 0; i < nrow; i++) {
            for (int j : instances.get(i).x()) {
                int k = colIndex[j] + pos[j];

                rowIndex[k] = i;
                x[k] = 1;
                pos[j]++;
            }
        }

        return new SparseMatrix(nrow, ncol, x, rowIndex, colIndex);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy