All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.data.SparseDatasetImpl Maven / Gradle / Ivy

There is a newer version: 4.2.0
Show newest version
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.data;

import java.util.*;
import java.util.stream.Stream;
import smile.util.SparseArray;

/**
 * List of Lists sparse matrix format. LIL stores one list per row,
 * where each entry stores a column index and value. Typically, these
 * entries are kept sorted by column index for faster lookup.
 * This format is good for incremental matrix construction.
 * 

* LIL is typically used to construct the matrix. Once the matrix is * constructed, it is typically converted to a format, such as Harwell-Boeing * column-compressed sparse matrix format, which is more efficient for matrix * operations. * * @author Haifeng Li */ class SparseDatasetImpl implements SparseDataset { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(SparseDatasetImpl.class); /** * The data objects. */ private final ArrayList> instances; /** * The number of nonzero entries. */ private int n; /** * The number of columns. */ private final int ncol; /** * The number of nonzero entries in each column. */ private int[] colSize; /** * Constructor. * @param data The sample instances. */ public SparseDatasetImpl(Collection> data) { this(data, 1 + data.stream().flatMapToInt(instance -> instance.x().indexStream()).max().orElse(0)); } /** * Constructor. * @param data The sample instances. * @param ncol The number of columns. */ public SparseDatasetImpl(Collection> data, int ncol) { this.instances = new ArrayList<>(data); this.ncol = ncol; colSize = new int[ncol]; for (var instance : data) { var x = instance.x(); x.sort(); // sort array index into ascending order. int i = -1; // index of previous element for (SparseArray.Entry e : x) { if (e.index() < 0) { throw new IllegalArgumentException(String.format("Negative index of nonzero element: %d", e.index())); } if (e.index() == i) { logger.warn("Ignore duplicated indices: {} in {}", e.index(), x); } else { if (ncol <= e.index()) { ncol = e.index() + 1; int[] newColSize = new int[3 * ncol / 2]; System.arraycopy(colSize, 0, newColSize, 0, colSize.length); colSize = newColSize; } colSize[e.index()]++; n++; i = e.index(); } } } } @Override public int size() { return instances.size(); } @Override public int nz() { return n; } @Override public int nz(int j) { return colSize[j]; } @Override public int ncol() { return ncol; } @Override public SampleInstance get(int i) { return instances.get(i); } @Override public Stream> stream() { return instances.stream(); } @Override public Iterator> iterator() { return instances.iterator(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy