All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.api.Table Maven / Gradle / Ivy

There is a newer version: 0.43.1
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.api;

import static tech.tablesaw.aggregate.AggregateFunctions.*;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.apache.commons.lang3.RandomUtils;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;

import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.ints.IntComparator;
import it.unimi.dsi.fastutil.ints.IntIterable;
import it.unimi.dsi.fastutil.ints.IntIterator;
import tech.tablesaw.aggregate.AggregateFunction;
import tech.tablesaw.aggregate.SummaryFunction;
import tech.tablesaw.columns.Column;
import tech.tablesaw.filtering.Filter;
import tech.tablesaw.io.DataFrameReader;
import tech.tablesaw.io.DataFrameWriter;
import tech.tablesaw.io.html.HtmlTableWriter;
import tech.tablesaw.sorting.Sort;
import tech.tablesaw.sorting.Sort.Order;
import tech.tablesaw.store.StorageManager;
import tech.tablesaw.store.TableMetadata;
import tech.tablesaw.table.Projection;
import tech.tablesaw.table.Relation;
import tech.tablesaw.table.Rows;
import tech.tablesaw.table.ViewGroup;
import tech.tablesaw.util.BitmapBackedSelection;
import tech.tablesaw.util.IntComparatorChain;
import tech.tablesaw.util.ReversingIntComparator;
import tech.tablesaw.util.Selection;

/**
 * A table of data, consisting of some number of columns, each of which has the same number of rows.
 * All the data in a column has the same type: integer, float, category, etc., but a table may contain an arbitrary
 * number of columns of any type.
 * 

* Tables are the main data-type and primary focus of Tablesaw. */ public class Table extends Relation implements IntIterable { /** * The columns that hold the data in this table */ private final List columnList = new ArrayList<>(); /** * The name of the table */ private String name; /** * Returns a new table initialized with the given name */ private Table(String name) { this.name = name; } /** * Returns a new table initialized with data from the given TableMetadata object *

* The metadata is used by the storage module to save tables and read their data from disk */ private Table(TableMetadata metadata) { this.name = metadata.getName(); } /** * Returns a new Table initialized with the given names and columns * * @param name The name of the table * @param columns One or more columns, all of which must have either the same length or size 0 */ protected Table(String name, Column... columns) { this(name); for (Column column : columns) { this.addColumn(column); } } /** * Returns a new, empty table (without rows or columns) with the given name */ public static Table create(String tableName) { return new Table(tableName); } /** * Returns a new, empty table constructed according to the given metadata */ public static Table create(TableMetadata metadata) { return new Table(metadata); } /** * Returns a new table with the given columns and given name * * @param columns One or more columns, all of the same @code{column.size()} */ public static Table create(String tableName, Column... columns) { return new Table(tableName, columns); } /** * Returns a sort Key that can be used for simple or chained comparator sorting *

* You can extend the sort key by using .next() to fill more columns to the sort order */ private static Sort first(String columnName, Sort.Order order) { return Sort.on(columnName, order); } /** * Returns an object that can be used to sort this table in the order specified for by the given column names */ @VisibleForTesting public static Sort getSort(String... columnNames) { Sort key = null; for (String s : columnNames) { if (key == null) { key = first(s, Order.DESCEND); } else { key.next(s, Order.DESCEND); } } return key; } public static Table readTable(String tableNameAndPath) { Table t; try { t = StorageManager.readTable(tableNameAndPath); } catch (IOException e) { System.err.println("Unable to load table from Tablesaw table format"); e.printStackTrace(); return null; } return t; } public static DataFrameReader read() { return new DataFrameReader(); } public DataFrameWriter write() { return new DataFrameWriter(this); } /** * Returns an randomly generated array of ints of size N where Max is the largest possible value */ static int[] generateUniformBitmap(int N, int Max) { if (N > Max) { throw new IllegalArgumentException("Illegal arguments: N (" + N + ") greater than Max (" + Max + ")"); } int[] ans = new int[N]; if (N == Max) { for (int k = 0; k < N; ++k) ans[k] = k; return ans; } BitSet bs = new BitSet(Max); int cardinality = 0; while (cardinality < N) { int v = RandomUtils.nextInt(0, Max); if (!bs.get(v)) { bs.set(v); cardinality++; } } int pos = 0; for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) { ans[pos++] = i; } return ans; } /** * Adds the given column to this table */ @Override public Table addColumn(Column... cols) { for (Column c : cols) { validateColumn(c); columnList.add(c); } return this; } /** * Throws an IllegalArgumentException if a column with the given name is already in the table */ private void validateColumn(Column newColumn) { Preconditions.checkNotNull(newColumn, "Attempted to add a null to the columns in table " + name); List stringList = new ArrayList<>(); for (String name : columnNames()) { stringList.add(name.toLowerCase()); } if (stringList.contains(newColumn.name().toLowerCase())) { String message = String.format("Cannot add column with duplicate name %s to table %s", newColumn, name); throw new IllegalArgumentException(message); } } /** * Adds the given column to this table at the given position in the column list * * @param index Zero-based index into the column list * @param column Column to be added */ public Table addColumn(int index, Column column) { validateColumn(column); columnList.add(index, column); return this; } /** * Sets the name of the table */ @Override public Table setName(String name) { this.name = name; return this; } /** * Returns the column at the given index in the column list * * @param columnIndex an integer at least 0 and less than number of columns in the table */ @Override public Column column(int columnIndex) { return columnList.get(columnIndex); } /** * Returns the number of columns in the table */ @Override public int columnCount() { return columnList.size(); } /** * Returns the number of rows in the table */ @Override public int rowCount() { int result = 0; if (!columnList.isEmpty()) { // all the columns have the same number of elements, so we can check any of them result = columnList.get(0).size(); } return result; } /** * Returns the list of columns */ @Override public List columns() { return columnList; } /** * Returns only the columns whose names are given in the input array */ public List columns(String... columnNames) { List columns = new ArrayList<>(); for (String columnName : columnNames) { columns.add(column(columnName)); } return columns; } /** * Returns the index of the column with the given name * * @throws IllegalArgumentException if the input string is not the name of any column in the table */ public int columnIndex(String columnName) { int columnIndex = -1; for (int i = 0; i < columnList.size(); i++) { if (columnList.get(i).name().equalsIgnoreCase(columnName)) { columnIndex = i; break; } } if (columnIndex == -1) { throw new IllegalArgumentException(String.format("Column %s is not present in table %s", columnName, name)); } return columnIndex; } /** * Returns the index of the given column (its position in the list of columns) *

* * @throws IllegalArgumentException if the column is not present in this table */ public int columnIndex(Column column) { int columnIndex = -1; for (int i = 0; i < columnList.size(); i++) { if (columnList.get(i).equals(column)) { columnIndex = i; break; } } if (columnIndex == -1) { throw new IllegalArgumentException( String.format("Column %s is not present in table %s", column.name(), name)); } return columnIndex; } /** * Returns the name of the table */ @Override public String name() { return name; } /** * Returns a List of the names of all the columns in this table */ public List columnNames() { List names = new ArrayList<>(columnList.size()); names.addAll(columnList.stream().map(Column::name).collect(Collectors.toList())); return names; } /** * Returns a string representation of the value at the given row and column indexes * * @param r the row index, 0 based * @param c the column index, 0 based */ @Override public String get(int r, int c) { Column column = column(c); return column.getString(r); } /** * Returns a table with the same columns as this table */ public Table fullCopy() { Table copy = new Table(name); for (Column column : columnList) { copy.addColumn(column.emptyCopy()); } IntArrayList integers = new IntArrayList(); for(int i = 0; i < rowCount(); i++) integers.add(i); Rows.copyRowsToTable(integers,this,copy); return copy; } /** * Returns a table with the same columns as this table, but no data */ public Table emptyCopy() { Table copy = new Table(name); for (Column column : columnList) { copy.addColumn(column.emptyCopy()); } return copy; } /** * Returns a table with the same columns as this table, but no data, initialized to the given row size */ public Table emptyCopy(int rowSize) { Table copy = new Table(name); for (Column column : columnList) { copy.addColumn(column.emptyCopy(rowSize)); } return copy; } /** * Splits the table into two, randomly assigning records to each according to the proportion given in * trainingProportion * * @param table1Proportion The proportion to go in the first table * @return An array two tables, with the first table having the proportion specified in the method parameter, * and the second table having the balance of the rows */ public Table[] sampleSplit(double table1Proportion) { Table[] tables = new Table[2]; int table1Count = (int) Math.round(rowCount() * table1Proportion); Selection table2Selection = new BitmapBackedSelection(); for (int i = 0; i < rowCount(); i++) { table2Selection.add(i); } Selection table1Selection = new BitmapBackedSelection(); int[] table1Records = generateUniformBitmap(table1Count, rowCount()); for (int i = 0; i < table1Records.length; i++) { table1Selection.add(table1Records[i]); } table2Selection.andNot(table1Selection); tables[0] = selectWhere(table1Selection); tables[1] = selectWhere(table2Selection); return tables; } /** * Returns a table consisting of randomly selected records from this table. The sample size is based on the * given proportion * * @param proportion The proportion to go in the sample */ public Table sample(double proportion) { int tableCount = (int) Math.round(rowCount() * proportion); Selection table1Selection = new BitmapBackedSelection(); int[] selectedRecords = generateUniformBitmap(tableCount, rowCount()); for (int selectedRecord : selectedRecords) { table1Selection.add(selectedRecord); } return selectWhere(table1Selection); } /** * Clears all the data from this table */ @Override public void clear() { columnList.forEach(Column::clear); } /** * Returns a new table containing the first {@code nrows} of data in this table */ public Table first(int nRows) { nRows = Math.min(nRows, rowCount()); Table newTable = emptyCopy(nRows); Rows.head(nRows, this, newTable); return newTable; } /** * Returns a new table containing the last {@code nrows} of data in this table */ public Table last(int nRows) { nRows = Math.min(nRows, rowCount()); Table newTable = emptyCopy(nRows); Rows.tail(nRows, this, newTable); return newTable; } /** * Returns a copy of this table sorted on the given column names, applied in order, *

* if column name starts with - then sort that column descending otherwise sort ascending */ public Table sortOn(String... columnNames) { Sort key = null; Order order; List names = new ArrayList<>(); for (String name : columnNames()) { names.add(name.toUpperCase()); } for (String columnName : columnNames) { if (names.contains(columnName.toUpperCase())) { // the column name has not been annotated with a prefix. order = Order.ASCEND; } else { // get the prefix which could be - or + String prefix = columnName.substring(0, 1); // remove - prefix so provided name matches actual column name columnName = columnName.substring(1, columnName.length()); switch (prefix) { case "+": order = Order.ASCEND; break; case "-": order = Order.DESCEND; break; default: throw new IllegalStateException("Column prefix: " + prefix + " is unknown."); } } if (key == null) { // key will be null the first time through key = first(columnName, order); } else { key.next(columnName, order); } } return sortOn(key); } /** * Returns a copy of this table sorted in the order of the given column names, in ascending order */ public Table sortAscendingOn(String... columnNames) { return this.sortOn(columnNames); } /** * Returns a copy of this table sorted on the given column names, applied in order, descending */ public Table sortDescendingOn(String... columnNames) { Sort key = getSort(columnNames); return sortOn(key); } /** */ public Table sortOn(Sort key) { Preconditions.checkArgument(!key.isEmpty()); if (key.size() == 1) { IntComparator comparator = getComparator(key); return sortOn(comparator); } IntComparatorChain chain = getChain(key); return sortOn(chain); } /** * Returns a comparator that can be used to sort the records in this table according to the given sort key */ public IntComparator getComparator(Sort key) { Iterator> entries = key.iterator(); Map.Entry sort = entries.next(); IntComparator comparator; if (sort.getValue() == Order.ASCEND) { comparator = rowComparator(sort.getKey(), false); } else { comparator = rowComparator(sort.getKey(), true); } return comparator; } /** * Returns a comparator chain for sorting according to the given key */ private IntComparatorChain getChain(Sort key) { Iterator> entries = key.iterator(); Map.Entry sort = entries.next(); IntComparator comparator; if (sort.getValue() == Order.ASCEND) { comparator = rowComparator(sort.getKey(), false); } else { comparator = rowComparator(sort.getKey(), true); } IntComparatorChain chain = new IntComparatorChain(comparator); while (entries.hasNext()) { sort = entries.next(); if (sort.getValue() == Order.ASCEND) { chain.addComparator(rowComparator(sort.getKey(), false)); } else { chain.addComparator(rowComparator(sort.getKey(), true)); } } return chain; } /** * Returns a copy of this table sorted using the given comparator */ public Table sortOn(IntComparator rowComparator) { Table newTable = emptyCopy(rowCount()); int[] newRows = rows(); IntArrays.parallelQuickSort(newRows, rowComparator); Rows.copyRowsToTable(IntArrayList.wrap(newRows), this, newTable); return newTable; } /** * Returns an array of ints of the same number of rows as the table */ @VisibleForTesting public int[] rows() { int[] rowIndexes = new int[rowCount()]; for (int i = 0; i < rowCount(); i++) { rowIndexes[i] = i; } return rowIndexes; } /** * Returns a comparator for the column matching the specified name * * @param columnName The name of the column to sort * @param reverse {@code true} if the column should be sorted in reverse */ private IntComparator rowComparator(String columnName, boolean reverse) { Column column = this.column(columnName); IntComparator rowComparator = column.rowComparator(); if (reverse) { return ReversingIntComparator.reverse(rowComparator); } else { return rowComparator; } } public Table selectWhere(Selection selection) { Table newTable = this.emptyCopy(selection.size()); Rows.copyRowsToTable(selection, this, newTable); return newTable; } public BooleanColumn selectIntoColumn(String newColumnName, Selection selection) { return new BooleanColumn(newColumnName, selection, rowCount()); } public Table selectWhere(Filter filter) { Selection map = filter.apply(this); Table newTable = this.emptyCopy(map.size()); Rows.copyRowsToTable(map, this, newTable); return newTable; } public BooleanColumn selectIntoColumn(String newColumnName, Filter filter) { return new BooleanColumn(newColumnName, filter.apply(this), rowCount()); } /** * The first stage of a split-apply-combine operation */ public ViewGroup groupBy(String... columns) { return groupBy(columns(columns).toArray(new Column[columns.length])); } /** * The first stage of a split-apply-combine operation */ public ViewGroup groupBy(Column... columns) { return new ViewGroup(this, columns); } /** * Synonymous with groupBy * The first stage of a split-apply-combine operation */ public ViewGroup splitOn(String... columns) { return groupBy(columns); } /** * Synonymous with groupBy * The first stage of a split-apply-combine operation */ public ViewGroup splitOn(Column... columns) { return groupBy(columns); } public String printHtml() { return HtmlTableWriter.write(this); } public Table structure() { Table t = new Table("Structure of " + name()); IntColumn index = new IntColumn("Index", columnCount()); CategoryColumn columnName = new CategoryColumn("Column Name", columnCount()); CategoryColumn columnType = new CategoryColumn("Column Type", columnCount()); t.addColumn(index); t.addColumn(columnName); t.addColumn(columnType); columnName.addAll(columnNames()); for (int i = 0; i < columnCount(); i++) { Column column = columnList.get(i); index.append(i); columnType.add(column.type().name()); } return t; } /** * Returns a table with the given rows selected * @param rows the rows to select * @return the table with the selected rows */ public Table selectRows(Collection rows) { Table newTable = emptyCopy(); Rows.copyRowsToTable(new IntArrayList(rows), this, newTable); return newTable; } /** * Returns a table with the given rows selected * @param start the first row to select * @param end the last row to select * @return the table with the selected rows */ public Table selectRows(int start, int end) { Table newTable = emptyCopy(); IntArrayList rowsToKeep = new IntArrayList(); for (int i = 0; i < rowCount(); i++) { if (i >= start && i <= end) { rowsToKeep.add(i); } } Rows.copyRowsToTable(rowsToKeep, this, newTable); return newTable; } /** * Returns a table with the given rows dropped * @param rows the rows to drop * @return the table with the dropped rows */ public Table dropRows(Collection rows) { Table newTable = emptyCopy(); IntArrayList rowsToKeep = new IntArrayList(); for (int i = 0; i < rowCount(); i++) { rowsToKeep.add(i); } rowsToKeep.removeAll(new IntArrayList(rows)); Rows.copyRowsToTable(rowsToKeep, this, newTable); return newTable; } /** * Returns a table with the given rows dropped * @param start the first row to drop * @param end the last row to drop * @return the table with the dropped rows */ public Table dropRows(int start, int end) { Table newTable = emptyCopy(); IntArrayList rowsToKeep = new IntArrayList(); for (int i = 0; i < rowCount(); i++) { if (i < start || i > end) { rowsToKeep.add(i); } } Rows.copyRowsToTable(rowsToKeep, this, newTable); return newTable; } /** * Returns the unique records in this table * Note: Uses a lot of memory for a sort */ public Table uniqueRecords() { Table sorted = this.sortOn(columnNames().toArray(new String[columns().size()])); Table temp = emptyCopy(); for (int row = 0; row < rowCount(); row++) { if (temp.isEmpty() || !Rows.compareRows(row, sorted, temp)) { Rows.appendRowToTable(row, sorted, temp); } } return temp; } public Projection select(String... columnName) { return new Projection(this, columnName); } /** * Removes the given columns */ @Override public Table removeColumns(Column... columns) { columnList.removeAll(Arrays.asList(columns)); return this; } /** * Removes the given column from this table and returns it * * @throws IllegalStateException if the given columnName does not match the name of a column in the table */ public Column getAndRemoveColumn(String columnName) { Column c = column(columnName); removeColumns(c); return c; } /** * Removes the given column from this table and returns it * * @throws IndexOutOfBoundsException if the given columnIndex does not match any column in the table */ public Column getAndRemoveColumn(int columnIndex) { Column c = column(columnIndex); removeColumns(c); return c; } /** * Removes the given columns from this table */ public void retainColumns(Column... columns) { List retained = Arrays.asList(columns); columnList.retainAll(retained); } public void retainColumns(String... columnNames) { columnList.retainAll(columns(columnNames)); } public SummaryFunction sum(String numericColumnName) { return new SummaryFunction(this, numericColumnName, sum); } public SummaryFunction mean(String numericColumnName) { return new SummaryFunction(this, numericColumnName, mean); } public SummaryFunction median(String numericColumnName) { return new SummaryFunction(this, numericColumnName, median); } public SummaryFunction variance(String numericColumnName) { return new SummaryFunction(this, numericColumnName, variance); } public SummaryFunction stdDev(String numericColumnName) { return new SummaryFunction(this, numericColumnName, stdDev); } public SummaryFunction count(String numericColumnName) { return new SummaryFunction(this, numericColumnName, count); } public SummaryFunction max(String numericColumnName) { return new SummaryFunction(this, numericColumnName, max); } public SummaryFunction min(String numericColumnName) { return new SummaryFunction(this, numericColumnName, min); } public void append(Table tableToAppend) { for (Column column : columnList) { Column columnToAppend = tableToAppend.column(column.name()); column.append(columnToAppend); } } public String save(String folder) { String storageFolder = ""; try { storageFolder = StorageManager.saveTable(folder, this); } catch (IOException e) { System.err.println("Unable to save table in Tablesaw format"); e.printStackTrace(); } return storageFolder; } /** * Returns the result of applying the given aggregate function to the specified column * * @param numericColumnName The name of a numeric (integer, float, etc.) column in this table * @param function An aggregation function * @return the function result * @throws IllegalArgumentException if numericColumnName doesn't name a numeric column in this table */ public double agg(String numericColumnName, AggregateFunction function) { Column column = column(numericColumnName); return function.agg(column.toDoubleArray()); } public SummaryFunction summarize(String numericColumnName, AggregateFunction function) { return new SummaryFunction(this, numericColumnName, function); } public Table countBy(CategoryColumn column) { return column.countByCategory(); } /** * Returns the first row for which the column {@code columnName} contains {@code value}, or * null if there are no matches * TODO(lwhite) This is a toy implementation badly in need of rewrite for performance. */ public int getFirst(Column column, String value) { int row = -1; for (int r : this) { if (column.getString(r).equals(value)) { row = r; break; } } return row; } @Override public IntIterator iterator() { return new IntIterator() { private int i = 0; @Override public int nextInt() { return i++; } @Override public int skip(int k) { return i + k; } @Override public boolean hasNext() { return i < rowCount(); } @Override public Integer next() { return i++; } }; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy