org.apache.mahout.math.FileBasedMatrix Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mahout-math Show documentation
Show all versions of mahout-math Show documentation
High performance scientific and technical computing data structures and methods,
mostly based on CERN's
Colt Java API
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.DoubleBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.List;
/**
* Provides a way to get data from a file and treat it as if it were a matrix, but avoids putting all that
* data onto the Java heap. Instead, the file is mapped into non-heap memory as a DoubleBuffer and we access
* that instead.
*/
public final class FileBasedMatrix extends AbstractMatrix {
private final int rowsPerBlock;
private final List content = Lists.newArrayList();
/**
* Constructs an empty matrix of the given size.
*
* @param rows The number of rows in the result.
* @param columns The number of columns in the result.
*/
public FileBasedMatrix(int rows, int columns) {
super(rows, columns);
long maxRows = ((1L << 31) - 1) / (columns * 8);
if (rows > maxRows) {
rowsPerBlock = (int) maxRows;
} else {
rowsPerBlock = rows;
}
}
private void addData(DoubleBuffer content) {
this.content.add(content);
}
public void setData(File f, boolean loadNow) throws IOException {
Preconditions.checkArgument(f.length() == rows * columns * 8L, "File " + f + " is wrong length");
for (int i = 0; i < (rows + rowsPerBlock - 1) / rowsPerBlock; i++) {
long start = i * rowsPerBlock * columns * 8L;
long size = rowsPerBlock * columns * 8L;
MappedByteBuffer buf = new FileInputStream(f).getChannel().map(FileChannel.MapMode.READ_ONLY, start,
Math.min(f.length() - start, size));
if (loadNow) {
buf.load();
}
addData(buf.asDoubleBuffer());
}
}
public static void writeMatrix(File f, Matrix m) throws IOException {
Preconditions.checkArgument(f.canWrite(), "Can't write to output file");
FileOutputStream fos = new FileOutputStream(f);
try {
ByteBuffer buf = ByteBuffer.allocate(m.columnSize() * 8);
for (MatrixSlice row : m) {
buf.clear();
for (Vector.Element element : row.vector().all()) {
buf.putDouble(element.get());
}
buf.flip();
fos.write(buf.array());
}
} finally {
fos.close();
}
}
/**
* Assign the other vector values to the column of the receiver
*
* @param column the int row to assign
* @param other a Vector
* @return the modified receiver
* @throws org.apache.mahout.math.CardinalityException
* if the cardinalities differ
*/
@Override
public Matrix assignColumn(int column, Vector other) {
throw new UnsupportedOperationException("Default operation");
}
/**
* Assign the other vector values to the row of the receiver
*
* @param row the int row to assign
* @param other a Vector
* @return the modified receiver
* @throws org.apache.mahout.math.CardinalityException
* if the cardinalities differ
*/
@Override
public Matrix assignRow(int row, Vector other) {
throw new UnsupportedOperationException("Default operation");
}
/**
* Return the value at the given indexes, without checking bounds
*
* @param row an int row index
* @param column an int column index
* @return the double at the index
*/
@Override
public double getQuick(int row, int column) {
int block = row / rowsPerBlock;
return content.get(block).get((row % rowsPerBlock) * columns + column);
}
/**
* Return an empty matrix of the same underlying class as the receiver
*
* @return a Matrix
*/
@Override
public Matrix like() {
throw new UnsupportedOperationException("Default operation");
}
/**
* Returns an empty matrix of the same underlying class as the receiver and of the specified size.
*
* @param rows the int number of rows
* @param columns the int number of columns
*/
@Override
public Matrix like(int rows, int columns) {
return new DenseMatrix(rows, columns);
}
/**
* Set the value at the given index, without checking bounds
*
* @param row an int row index into the receiver
* @param column an int column index into the receiver
* @param value a double value to set
*/
@Override
public void setQuick(int row, int column, double value) {
throw new UnsupportedOperationException("Default operation");
}
/**
* Return a view into part of a matrix. Changes to the view will change the
* original matrix.
*
* @param offset an int[2] offset into the receiver
* @param size the int[2] size of the desired result
* @return a matrix that shares storage with part of the original matrix.
* @throws org.apache.mahout.math.CardinalityException
* if the length is greater than the cardinality of the receiver
* @throws org.apache.mahout.math.IndexException
* if the offset is negative or the offset+length is outside of the receiver
*/
@Override
public Matrix viewPart(int[] offset, int[] size) {
throw new UnsupportedOperationException("Default operation");
}
}