All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.api.Table Maven / Gradle / Ivy

There is a newer version: 0.43.1
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.api;

import static java.util.stream.Collectors.toList;
import static tech.tablesaw.aggregate.AggregateFunctions.countMissing;
import static tech.tablesaw.api.QuerySupport.not;
import static tech.tablesaw.selection.Selection.selectNRowsAtRandom;

import com.google.common.annotations.Beta;
import com.google.common.base.Preconditions;
import com.google.common.collect.Streams;
import com.google.common.primitives.Ints;
import io.github.classgraph.ClassGraph;
import io.github.classgraph.ScanResult;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.ints.IntComparator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import tech.tablesaw.aggregate.AggregateFunction;
import tech.tablesaw.aggregate.CrossTab;
import tech.tablesaw.aggregate.PivotTable;
import tech.tablesaw.aggregate.Summarizer;
import tech.tablesaw.columns.Column;
import tech.tablesaw.io.DataFrameReader;
import tech.tablesaw.io.DataFrameWriter;
import tech.tablesaw.io.DataReader;
import tech.tablesaw.io.DataWriter;
import tech.tablesaw.io.ReaderRegistry;
import tech.tablesaw.io.WriterRegistry;
import tech.tablesaw.joining.DataFrameJoiner;
import tech.tablesaw.selection.BitmapBackedSelection;
import tech.tablesaw.selection.Selection;
import tech.tablesaw.sorting.Sort;
import tech.tablesaw.sorting.SortUtils;
import tech.tablesaw.sorting.comparators.IntComparatorChain;
import tech.tablesaw.table.*;

/**
 * A table of data, consisting of some number of columns, each of which has the same number of rows.
 * All the data in a column has the same type: integer, float, category, etc., but a table may
 * contain an arbitrary number of columns of any type.
 *
 * 

Tables are the main data-type and primary focus of Tablesaw. */ public class Table extends Relation implements Iterable { public static final ReaderRegistry defaultReaderRegistry = new ReaderRegistry(); public static final WriterRegistry defaultWriterRegistry = new WriterRegistry(); static { autoRegisterReadersAndWriters(); } /** The columns that hold the data in this table */ private final List> columnList = new ArrayList<>(); /** The name of the table */ private String name; // standard column names for melt and cast operations public static final String MELT_VARIABLE_COLUMN_NAME = "variable"; public static final String MELT_VALUE_COLUMN_NAME = "value"; /** Returns a new table */ private Table() {} /** Returns a new table initialized with the given name */ private Table(String name) { this.name = name; } /** * Returns a new Table initialized with the given names and columns * * @param name The name of the table * @param columns One or more columns, all of which must have either the same length or size 0 */ protected Table(String name, Column... columns) { this(name); for (final Column column : columns) { this.addColumns(column); } } /** * Returns a new Table initialized with the given names and columns * * @param name The name of the table * @param columns One or more columns, all of which must have either the same length or size 0 */ protected Table(String name, Collection> columns) { this(name); for (final Column column : columns) { this.addColumns(column); } } private static void autoRegisterReadersAndWriters() { try (ScanResult scanResult = new ClassGraph().enableAllInfo().whitelistPackages("tech.tablesaw.io").scan()) { List classes = new ArrayList<>(); classes.addAll(scanResult.getClassesImplementing(DataWriter.class.getName()).getNames()); classes.addAll(scanResult.getClassesImplementing(DataReader.class.getName()).getNames()); for (String clazz : classes) { try { Class.forName(clazz); } catch (ClassNotFoundException e) { throw new IllegalStateException(e); } } } } /** Returns a new, empty table (without rows or columns) */ public static Table create() { return new Table(); } /** Returns a new, empty table (without rows or columns) with the given name */ public static Table create(String tableName) { return new Table(tableName); } /** * Returns a new table with the given columns * * @param columns one or more columns, all of the same @code{column.size()} */ public static Table create(Column... columns) { return new Table(null, columns); } /** * Returns a new table with the given columns * * @param columns one or more columns, all of the same @code{column.size()} */ public static Table create(Collection> columns) { return new Table(null, columns); } /** * Returns a new table with the given columns * * @param columns one or more columns, all of the same @code{column.size()} */ public static Table create(Stream> columns) { return new Table(null, columns.collect(Collectors.toList())); } /** * Returns a new table with the given columns and given name * * @param name the name for this table * @param columns one or more columns, all of the same @code{column.size()} */ public static Table create(String name, Column... columns) { return new Table(name, columns); } /** * Returns a new table with the given columns and given name * * @param name the name for this table * @param columns one or more columns, all of the same @code{column.size()} */ public static Table create(String name, Collection> columns) { return new Table(name, columns); } /** * Returns a new table with the given columns and given name * * @param name the name for this table * @param columns one or more columns, all of the same @code{column.size()} */ public static Table create(String name, Stream> columns) { return new Table(name, columns.collect(Collectors.toList())); } /** * Returns a sort Key that can be used for simple or chained comparator sorting * *

You can extend the sort key by using .next() to fill more columns to the sort order */ private static Sort first(String columnName, Sort.Order order) { return Sort.on(columnName, order); } /** * Returns an object that can be used to sort this table in the order specified for by the given * column names */ private static Sort getSort(String... columnNames) { Sort key = null; for (String s : columnNames) { if (key == null) { key = first(s, Sort.Order.DESCEND); } else { key.next(s, Sort.Order.DESCEND); } } return key; } public static DataFrameReader read() { return new DataFrameReader(defaultReaderRegistry); } public DataFrameWriter write() { return new DataFrameWriter(defaultWriterRegistry, this); } /** Adds the given column to this table */ @Override public Table addColumns(final Column... cols) { for (final Column c : cols) { validateColumn(c); columnList.add(c); } return this; } /** * For internal Tablesaw use only * *

Adds the given column to this table without performing duplicate-name or column size checks */ public void internalAddWithoutValidation(final Column c) { columnList.add(c); } /** * Throws an IllegalArgumentException if a column with the given name is already in the table, or * if the number of rows in the column does not match the number of rows in the table */ private void validateColumn(final Column newColumn) { Preconditions.checkNotNull( newColumn, "Attempted to add a null to the columns in table " + name); List stringList = new ArrayList<>(); for (String name : columnNames()) { stringList.add(name.toLowerCase()); } if (stringList.contains(newColumn.name().toLowerCase())) { String message = String.format( "Cannot add column with duplicate name %s to table %s", newColumn.name(), name); throw new IllegalArgumentException(message); } checkColumnSize(newColumn); } /** * Throws an IllegalArgumentException if the column size doesn't match the rowCount() for the * table */ private void checkColumnSize(Column newColumn) { if (columnCount() != 0) { Preconditions.checkArgument( newColumn.size() == rowCount(), "Column " + newColumn.name() + " does not have the same number of rows as the other columns in the table."); } } /** * Adds the given column to this table at the given position in the column list * * @param index Zero-based index into the column list * @param column Column to be added */ public Table insertColumn(int index, Column column) { validateColumn(column); columnList.add(index, column); return this; } /** * Return a new table (shallow copy) that contains all the columns in this table, in the order * given in the argument. Throw an IllegalArgument exception if the number of names given does not * match the number of columns in this table. NOTE: This does not make a copy of the columns, so * they are shared between the two tables. * * @param columnNames a column name or array of names */ public Table reorderColumns(String... columnNames) { Preconditions.checkArgument(columnNames.length == columnCount()); Table table = Table.create(name); for (String name : columnNames) { table.addColumns(column(name)); } return table; } /** * Replaces an existing column (by index) in this table with the given new column * * @param colIndex Zero-based index of the column to be replaced * @param newColumn Column to be added */ public Table replaceColumn(final int colIndex, final Column newColumn) { removeColumns(column(colIndex)); return insertColumn(colIndex, newColumn); } /** * Replaces an existing column (by name) in this table with the given new column * * @param columnName String name of the column to be replaced * @param newColumn Column to be added */ public Table replaceColumn(final String columnName, final Column newColumn) { int colIndex = columnIndex(columnName); return replaceColumn(colIndex, newColumn); } /** * Replaces an existing column having the same name of the given column with the given column * * @param newColumn Column to be added */ public Table replaceColumn(Column newColumn) { return replaceColumn(newColumn.name(), newColumn); } /** Sets the name of the table */ @Override public Table setName(String name) { this.name = name; return this; } /** * Returns the column at the given index in the column list * * @param columnIndex an integer at least 0 and less than number of columns in the table */ @Override public Column column(int columnIndex) { return columnList.get(columnIndex); } /** Returns the number of columns in the table */ @Override public int columnCount() { return columnList.size(); } /** Returns the number of rows in the table */ @Override public int rowCount() { int result = 0; if (!columnList.isEmpty()) { // all the columns have the same number of elements, so we can check any of them result = columnList.get(0).size(); } return result; } /** Returns the list of columns */ @Override public List> columns() { return columnList; } public Column[] columnArray() { return columnList.toArray(new Column[columnCount()]); } /** Returns only the columns whose names are given in the input array */ @Override public List> categoricalColumns(String... columnNames) { List> columns = new ArrayList<>(); for (String columnName : columnNames) { columns.add(categoricalColumn(columnName)); } return columns; } /** * Returns the index of the column with the given name * * @throws IllegalArgumentException if the input string is not the name of any column in the table */ @Override public int columnIndex(String columnName) { int columnIndex = -1; for (int i = 0; i < columnList.size(); i++) { if (columnList.get(i).name().equalsIgnoreCase(columnName)) { columnIndex = i; break; } } if (columnIndex == -1) { throw new IllegalArgumentException( String.format("Column %s is not present in table %s", columnName, name)); } return columnIndex; } /** * Returns the index of the given column (its position in the list of columns) * *

* * @throws IllegalArgumentException if the column is not present in this table */ public int columnIndex(Column column) { int columnIndex = -1; for (int i = 0; i < columnList.size(); i++) { if (columnList.get(i).equals(column)) { columnIndex = i; break; } } if (columnIndex == -1) { throw new IllegalArgumentException( String.format("Column %s is not present in table %s", column.name(), name)); } return columnIndex; } /** Returns the name of the table */ @Override public String name() { return name; } /** Returns a List of the names of all the columns in this table */ public List columnNames() { return columnList.stream().map(Column::name).collect(toList()); } /** Returns a table with the same columns as this table */ public Table copy() { Table copy = new Table(name); for (Column column : columnList) { copy.addColumns(column.emptyCopy(rowCount())); } int[] rows = new int[rowCount()]; for (int i = 0; i < rowCount(); i++) { rows[i] = i; } Rows.copyRowsToTable(rows, this, copy); return copy; } /** Returns a table with the same columns as this table, but no data */ public Table emptyCopy() { Table copy = new Table(name); for (Column column : columnList) { copy.addColumns(column.emptyCopy()); } return copy; } /** * Returns a table with the same columns as this table, but no data, initialized to the given row * size */ public Table emptyCopy(int rowSize) { Table copy = new Table(name); for (Column column : columnList) { copy.addColumns(column.emptyCopy(rowSize)); } return copy; } /** * Splits the table into two, randomly assigning records to each according to the proportion given * in trainingProportion * * @param table1Proportion The proportion to go in the first table * @return An array two tables, with the first table having the proportion specified in the method * parameter, and the second table having the balance of the rows */ public Table[] sampleSplit(double table1Proportion) { Table[] tables = new Table[2]; int table1Count = (int) Math.round(rowCount() * table1Proportion); Selection table2Selection = new BitmapBackedSelection(); for (int i = 0; i < rowCount(); i++) { table2Selection.add(i); } Selection table1Selection = new BitmapBackedSelection(); Selection table1Records = selectNRowsAtRandom(table1Count, rowCount()); for (int table1Record : table1Records) { table1Selection.add(table1Record); } table2Selection.andNot(table1Selection); tables[0] = where(table1Selection); tables[1] = where(table2Selection); return tables; } /** * Splits the table into two stratified samples, this uses the specified column to divide the * table into groups, randomly assigning records to each according to the proportion given in * trainingProportion. * * @param column the column to be used for the stratified sampling * @param table1Proportion The proportion to go in the first table * @return An array two tables, with the first table having the proportion specified in the method * parameter, and the second table having the balance of the rows */ public Table[] stratifiedSampleSplit(CategoricalColumn column, double table1Proportion) { Preconditions.checkArgument( containsColumn(column), "The categorical column must be part of the table, you can create a string column and add it to this table before sampling."); final Table first = emptyCopy(); final Table second = emptyCopy(); splitOn(column) .asTableList() .forEach( tab -> { Table[] splits = tab.sampleSplit(table1Proportion); first.append(splits[0]); second.append(splits[1]); }); return new Table[] {first, second}; } /** * Returns a table consisting of randomly selected records from this table. The sample size is * based on the given proportion * * @param proportion The proportion to go in the sample */ public Table sampleX(double proportion) { Preconditions.checkArgument( proportion <= 1 && proportion >= 0, "The sample proportion must be between 0 and 1"); int tableSize = (int) Math.round(rowCount() * proportion); return where(selectNRowsAtRandom(tableSize, rowCount())); } /** * Returns a table consisting of randomly selected records from this table * * @param nRows The number of rows to go in the sample */ public Table sampleN(int nRows) { Preconditions.checkArgument( nRows > 0 && nRows < rowCount(), "The number of rows sampled must be greater than 0 and less than the number of rows in the table."); return where(selectNRowsAtRandom(nRows, rowCount())); } /** Clears all the data from this table */ @Override public void clear() { columnList.forEach(Column::clear); } /** Returns a new table containing the first {@code nrows} of data in this table */ public Table first(int nRows) { int newRowCount = Math.min(nRows, rowCount()); return inRange(0, newRowCount); } /** Returns a new table containing the last {@code nrows} of data in this table */ public Table last(int nRows) { int newRowCount = Math.min(nRows, rowCount()); return inRange(rowCount() - newRowCount, rowCount()); } /** * Sorts this table into a new table on the columns indexed * *

if index is negative then sort that column in descending order otherwise sort ascending */ public Table sortOn(int... columnIndexes) { List names = new ArrayList<>(); for (int i : columnIndexes) { if (i >= 0) { names.add(columnList.get(i).name()); } else { names.add("-" + columnList.get(-i).name()); } } return sortOn(names.toArray(new String[names.size()])); } /** * Returns a copy of this table sorted on the given column names, applied in order, * *

if column name starts with - then sort that column descending otherwise sort ascending */ public Table sortOn(String... columnNames) { return this.sortOn(Sort.create(this, columnNames)); } /** * Returns a copy of this table sorted in the order of the given column names, in ascending order */ public Table sortAscendingOn(String... columnNames) { return this.sortOn(columnNames); } /** * Returns a copy of this table sorted on the given column names, applied in order, descending * TODO: Provide equivalent methods naming columns by index */ public Table sortDescendingOn(String... columnNames) { Sort key = getSort(columnNames); return sortOn(key); } /** * Returns a copy of this table sorted using the given sort key. * * @param key to sort on. * @return a sorted copy of this table. */ public Table sortOn(Sort key) { Preconditions.checkArgument(!key.isEmpty()); if (key.size() == 1) { IntComparator comparator = SortUtils.getComparator(this, key); return sortOn(comparator); } IntComparatorChain chain = SortUtils.getChain(this, key); return sortOn(chain); } /** Returns a copy of this table sorted using the given comparator */ private Table sortOn(IntComparator rowComparator) { Table newTable = emptyCopy(rowCount()); int[] newRows = rows(); IntArrays.parallelQuickSort(newRows, rowComparator); Rows.copyRowsToTable(newRows, this, newTable); return newTable; } /** Returns a copy of this table sorted using the given comparator */ public Table sortOn(Comparator rowComparator) { Row row1 = new Row(this); Row row2 = new Row(this); return sortOn( (IntComparator) (k1, k2) -> { row1.at(k1); row2.at(k2); return rowComparator.compare(row1, row2); }); } /** Returns an array of ints of the same number of rows as the table */ private int[] rows() { int[] rowIndexes = new int[rowCount()]; for (int i = 0; i < rowCount(); i++) { rowIndexes[i] = i; } return rowIndexes; } /** * Adds a single row to this table from sourceTable, copying every column in sourceTable * * @param rowIndex The row in sourceTable to add to this table * @param sourceTable A table with the same column structure as this table */ public void addRow(int rowIndex, Table sourceTable) { for (int i = 0; i < columnCount(); i++) { column(i).appendObj(sourceTable.column(i).get(rowIndex)); } } public void addRow(Row row) { for (int i = 0; i < row.columnCount(); i++) { column(i).appendObj(row.getObject(i)); } } public Row row(int rowIndex) { Row row = new Row(Table.this); row.at(rowIndex); return row; } public Table rows(int... rowNumbers) { Preconditions.checkArgument(Ints.max(rowNumbers) <= rowCount()); return where(Selection.with(rowNumbers)); } public Table dropRows(int... rowNumbers) { Preconditions.checkArgument(Ints.max(rowNumbers) <= rowCount()); Selection selection = Selection.withRange(0, rowCount()).andNot(Selection.with(rowNumbers)); return where(selection); } /** * Retains the first rowCount rows if rowCount positive. Retains the last rowCount rows if * rowCount negative. */ public Table inRange(int rowCount) { Preconditions.checkArgument(rowCount <= rowCount()); int rowStart = rowCount >= 0 ? 0 : rowCount() + rowCount; int rowEnd = rowCount >= 0 ? rowCount : rowCount(); return where(Selection.withRange(rowStart, rowEnd)); } public Table inRange(int rowStart, int rowEnd) { Preconditions.checkArgument(rowEnd <= rowCount()); return where(Selection.withRange(rowStart, rowEnd)); } /** * Drops the first rowCount rows if rowCount positive. Drops the last rowCount rows if rowCount * negative. */ public Table dropRange(int rowCount) { Preconditions.checkArgument(rowCount <= rowCount()); int rowStart = rowCount >= 0 ? rowCount : 0; int rowEnd = rowCount >= 0 ? rowCount() : rowCount() + rowCount; return where(Selection.withRange(rowStart, rowEnd)); } public Table dropRange(int rowStart, int rowEnd) { Preconditions.checkArgument(rowEnd <= rowCount()); return where(Selection.withoutRange(0, rowCount(), rowStart, rowEnd)); } public Table where(Selection selection) { Table newTable = this.emptyCopy(selection.size()); Rows.copyRowsToTable(selection, this, newTable); return newTable; } public Table where(Function selection) { Table tempTable = where(selection.apply(this)); Table newTable = tempTable.emptyCopy(tempTable.rowCount()); Rows.copyRowsToTable(selection.apply(this), this, newTable); return newTable; } public Table dropWhere(Function selection) { return where(not(selection)); } public Table dropWhere(Selection selection) { Selection opposite = new BitmapBackedSelection(); opposite.addRange(0, rowCount()); opposite.andNot(selection); Table newTable = this.emptyCopy(opposite.size()); Rows.copyRowsToTable(opposite, this, newTable); return newTable; } /** * Returns a pivot on this table, where: The first column contains unique values from the index * column1 There are n additional columns, one for each unique value in column2 The values in each * of the cells in these new columns are the result of applying the given AggregateFunction to the * data in column3, grouped by the values of column1 and column2 */ public Table pivot( CategoricalColumn column1, CategoricalColumn column2, NumericColumn column3, AggregateFunction aggregateFunction) { return PivotTable.pivot(this, column1, column2, column3, aggregateFunction); } /** * Returns a pivot on this table, where: The first column contains unique values from the index * column1 There are n additional columns, one for each unique value in column2 The values in each * of the cells in these new columns are the result of applying the given AggregateFunction to the * data in column3, grouped by the values of column1 and column2 */ public Table pivot( String column1Name, String column2Name, String column3Name, AggregateFunction aggregateFunction) { return pivot( categoricalColumn(column1Name), categoricalColumn(column2Name), numberColumn(column3Name), aggregateFunction); } /** * Returns a non-overlapping and exhaustive collection of "slices" over this table. Each slice is * like a virtual table containing a subset of the records in this table * *

This method is intended for advanced or unusual operations on the subtables. If you want to * calculate summary statistics for each subtable, the summarize methods (e.g) * *

table.summarize(myColumn, mean, median).by(columns) * *

are preferred */ public TableSliceGroup splitOn(String... columns) { return splitOn(categoricalColumns(columns).toArray(new CategoricalColumn[columns.length])); } /** * Returns a non-overlapping and exhaustive collection of "slices" over this table. Each slice is * like a virtual table containing a subset of the records in this table * *

This method is intended for advanced or unusual operations on the subtables. If you want to * calculate summary statistics for each subtable, the summarize methods (e.g) * *

table.summarize(myColumn, mean, median).by(columns) * *

are preferred */ public TableSliceGroup splitOn(CategoricalColumn... columns) { return StandardTableSliceGroup.create(this, columns); } @Override public Table structure() { Table t = new Table("Structure of " + name()); IntColumn index = IntColumn.indexColumn("Index", columnCount(), 0); StringColumn columnName = StringColumn.create("Column Name", columnCount()); StringColumn columnType = StringColumn.create("Column Type", columnCount()); t.addColumns(index); t.addColumns(columnName); t.addColumns(columnType); for (int i = 0; i < columnCount(); i++) { Column column = columnList.get(i); columnType.set(i, column.type().name()); columnName.set(i, columnNames().get(i)); } return t; } /** Returns the unique records in this table Note: Uses a lot of memory for a sort */ public Table dropDuplicateRows() { Table sorted = this.sortOn(columnNames().toArray(new String[columns().size()])); Table temp = emptyCopy(); for (int row = 0; row < rowCount(); row++) { if (temp.isEmpty() || !Rows.compareRows(row, sorted, temp)) { Rows.appendRowToTable(row, sorted, temp); } } return temp; } /** Returns only those records in this table that have no columns with missing values */ public Table dropRowsWithMissingValues() { Selection missing = new BitmapBackedSelection(); for (int row = 0; row < rowCount(); row++) { for (int col = 0; col < columnCount(); col++) { Column c = column(col); if (c.isMissing(row)) { missing.add(row); break; } } } Selection notMissing = Selection.withRange(0, rowCount()); notMissing.andNot(missing); Table temp = emptyCopy(notMissing.size()); Rows.copyRowsToTable(notMissing, this, temp); return temp; } public Table select(Column... columns) { return new Table(this.name, columns); } public Table select(String... columnNames) { return Table.create(this.name, columns(columnNames).toArray(new Column[0])); } /** Removes the given columns */ @Override public Table removeColumns(Column... columns) { columnList.removeAll(Arrays.asList(columns)); return this; } /** Removes the given columns with missing values */ public Table removeColumnsWithMissingValues() { removeColumns(columnList.stream().filter(x -> x.countMissing() > 0).toArray(Column[]::new)); return this; } /** Removes all columns except for those given in the argument from this table */ public Table retainColumns(Column... columns) { List> retained = Arrays.asList(columns); columnList.clear(); columnList.addAll(retained); return this; } /** Removes all columns except for those given in the argument from this table */ public Table retainColumns(String... columnNames) { List> retained = columns(columnNames); columnList.clear(); columnList.addAll(retained); return this; } @SuppressWarnings({"rawtypes", "unchecked"}) public Table append(Table tableToAppend) { for (final Column column : columnList) { final Column columnToAppend = tableToAppend.column(column.name()); column.append(columnToAppend); } return this; } /** * Appends an empty row and returns a Row object indexed to the newly added row so values can be * set. * *

Intended usage: * *

for (int i = 0; ...) { Row row = table.appendRow(); row.setString("name", "Bob"); * row.setFloat("IQ", 123.4f); ...etc. } */ public Row appendRow() { for (final Column column : columnList) { column.appendMissing(); } return row(rowCount() - 1); } /** * Add all the columns of tableToConcatenate to this table Note: The columns in the result must * have unique names, when compared case insensitive Note: Both tables must have the same number * of rows * * @param tableToConcatenate The table containing the columns to be added * @return This table */ public Table concat(Table tableToConcatenate) { Preconditions.checkArgument( tableToConcatenate.rowCount() == this.rowCount(), "Both tables must have the same number of rows to concatenate them."); for (Column column : tableToConcatenate.columns()) { this.addColumns(column); } return this; } public Summarizer summarize(String columName, AggregateFunction... functions) { return summarize(column(columName), functions); } public Summarizer summarize(List columnNames, AggregateFunction... functions) { return new Summarizer(this, columnNames, functions); } public Summarizer summarize( String numericColumn1Name, String numericColumn2Name, AggregateFunction... functions) { return summarize(column(numericColumn1Name), column(numericColumn2Name), functions); } public Summarizer summarize( String col1Name, String col2Name, String col3Name, AggregateFunction... functions) { return summarize(column(col1Name), column(col2Name), column(col3Name), functions); } public Summarizer summarize( String col1Name, String col2Name, String col3Name, String col4Name, AggregateFunction... functions) { return summarize( column(col1Name), column(col2Name), column(col3Name), column(col4Name), functions); } public Summarizer summarize(Column numberColumn, AggregateFunction... function) { return new Summarizer(this, numberColumn, function); } public Summarizer summarize( Column column1, Column column2, AggregateFunction... function) { return new Summarizer(this, column1, column2, function); } public Summarizer summarize( Column column1, Column column2, Column column3, AggregateFunction... function) { return new Summarizer(this, column1, column2, column3, function); } public Summarizer summarize( Column column1, Column column2, Column column3, Column column4, AggregateFunction... function) { return new Summarizer(this, column1, column2, column3, column4, function); } /** * Returns a table with n by m + 1 cells. The first column contains labels, the other cells * contains the counts for every unique combination of values from the two specified columns in * this table */ public Table xTabCounts(String column1Name, String column2Name) { return CrossTab.counts(this, categoricalColumn(column1Name), categoricalColumn(column2Name)); } public Table xTabRowPercents(String column1Name, String column2Name) { return CrossTab.rowPercents(this, column1Name, column2Name); } public Table xTabColumnPercents(String column1Name, String column2Name) { return CrossTab.columnPercents(this, column1Name, column2Name); } /** * Returns a table with n by m + 1 cells. The first column contains labels, the other cells * contains the proportion for a unique combination of values from the two specified columns in * this table */ public Table xTabTablePercents(String column1Name, String column2Name) { return CrossTab.tablePercents(this, column1Name, column2Name); } /** * Returns a table with two columns, the first contains a value each unique value in the argument, * and the second contains the proportion of observations having that value */ public Table xTabPercents(String column1Name) { return CrossTab.percents(this, column1Name); } /** * Returns a table with two columns, the first contains a value each unique value in the argument, * and the second contains the number of observations of each value */ public Table xTabCounts(String column1Name) { return CrossTab.counts(this, column1Name); } /** * Returns a table containing two columns, the grouping column, and a column named "Count" that * contains the counts for each grouping column value */ public Table countBy(CategoricalColumn groupingColumn) { return groupingColumn.countByCategory(); } /** * Returns a table containing two columns, the grouping column, and a column named "Count" that * contains the counts for each grouping column value * * @param categoricalColumnName The name of a CategoricalColumn in this table * @return A table containing counts of rows grouped by the categorical column * @throws ClassCastException if the categoricalColumnName parameter is the name of a column that * does not * implement categorical */ public Table countBy(String categoricalColumnName) { CategoricalColumn groupingColumn = categoricalColumn(categoricalColumnName); return groupingColumn.countByCategory(); } /** * Returns a new DataFrameJoiner initialized with multiple {@code columnNames} * * @param columnNames Name of the columns to join on. * @return The new DataFrameJoiner */ public DataFrameJoiner joinOn(String... columnNames) { return new DataFrameJoiner(this, columnNames); } public Table missingValueCounts() { return summarize(columnNames(), countMissing).apply(); } @Override public Iterator iterator() { return new Iterator() { private final Row row = new Row(Table.this); @Override public Row next() { return row.next(); } @Override public boolean hasNext() { return row.hasNext(); } }; } /** * Iterates over rolling sets of rows. I.e. 0 to n-1, 1 to n, 2 to n+1, etc. * * @param n the number of rows to return for each iteration */ public Iterator rollingIterator(int n) { return new Iterator() { private int currRow = 0; @Override public Row[] next() { if (!hasNext()) { throw new NoSuchElementException(); } Row[] rows = new Row[n]; for (int i = 0; i < n; i++) { rows[i] = new Row(Table.this, currRow + i); } currRow++; return rows; } @Override public boolean hasNext() { return currRow + n <= rowCount(); } }; } /** * Streams over stepped sets of rows. I.e. 0 to n-1, n to 2n-1, 2n to 3n-1, etc. Only returns full * sets of rows. * * @param n the number of rows to return for each iteration */ public Iterator steppingIterator(int n) { return new Iterator() { private int currRow = 0; @Override public Row[] next() { if (!hasNext()) { throw new NoSuchElementException(); } Row[] rows = new Row[n]; for (int i = 0; i < n; i++) { rows[i] = new Row(Table.this, currRow + i); } currRow += n; return rows; } @Override public boolean hasNext() { return currRow + n <= rowCount(); } }; } public Stream stream() { return Streams.stream(iterator()); } /** * Streams over stepped sets of rows. I.e. 0 to n-1, n to 2n-1, 2n to 3n-1, etc. Only returns full * sets of rows. * * @param n the number of rows to return for each iteration */ public Stream steppingStream(int n) { return Streams.stream(steppingIterator(n)); } /** * Streams over rolling sets of rows. I.e. 0 to n-1, 1 to n, 2 to n+1, etc. * * @param n the number of rows to return for each iteration */ public Stream rollingStream(int n) { return Streams.stream(rollingIterator(n)); } /** * Transposes data in the table, switching rows for columns. For example, a table like this.
* value1 | value2 |
* -------------------------------
* 1 | 2 |
* 1.1 | 2.1 |
* 1.2 | 2.2 |
* *

Is transposed into the following
* 0 | 1 | 2 |
* -------------------------------------
* 1 | 1.1 | 1.2 |
* 2 | 2.1 | 2.2 |
* * @see Table#transpose(boolean,boolean) * @return transposed table */ public Table transpose() { return transpose(false, false); } /** * Transposes data in the table, switching rows for columns. For example, a table like this.
* label | value1 | value2 |
* -------------------------------
* row1 | 1 | 2 |
* row2 | 1.1 | 2.1 |
* row3 | 1.2 | 2.2 |
* *

Is transposed into the following
* label | row1 | row2 | row3 |
* -------------------------------------
* value1 | 1 | 1.1 | 1.2 |
* value2 | 2 | 2.1 | 2.2 |
* * @param includeColumnHeadingsAsFirstColumn Toggle whether to include the column headings as * first column in result * @param useFirstColumnForHeadings Use the first column as the column headings in the result. * Useful if the data set already has a first column which contains a set of labels * @return The transposed table */ public Table transpose( boolean includeColumnHeadingsAsFirstColumn, boolean useFirstColumnForHeadings) { if (this.columnCount() == 0) { return this; } // Validate first int columnOffset = useFirstColumnForHeadings ? 1 : 0; ColumnType resultColumnType = validateTableHasSingleColumnType(columnOffset); Table transposed = Table.create(this.name); if (includeColumnHeadingsAsFirstColumn) { String columnName = useFirstColumnForHeadings ? this.column(0).name() : "0"; StringColumn labelColumn = StringColumn.create(columnName); for (int i = columnOffset; i < this.columnCount(); i++) { Column columnToTranspose = this.column(i); labelColumn.append(columnToTranspose.name()); } transposed.addColumns(labelColumn); } if (useFirstColumnForHeadings) { transpose(transposed, resultColumnType, row -> String.valueOf(this.get(row, 0)), 1); } else { // default column labelling return transpose( transposed, resultColumnType, row -> String.valueOf(transposed.columnCount()), 0); } return transposed; } private ColumnType validateTableHasSingleColumnType(int startingColumn) { // If all columns are of the same type ColumnType[] columnTypes = this.columnTypes(); long distinctColumnTypesCount = Arrays.stream(columnTypes).skip(startingColumn).distinct().count(); if (distinctColumnTypesCount > 1) { throw new IllegalArgumentException( "This operation currently only supports tables where value columns are of the same type"); } return columnTypes[startingColumn]; } @SuppressWarnings({"unchecked", "rawtypes"}) private Table transpose( Table transposed, ColumnType resultColumnType, IntFunction columnNameExtractor, int startingColumn) { for (int row = 0; row < this.rowCount(); row++) { String columnName = columnNameExtractor.apply(row); Column column = resultColumnType.create(columnName); for (int col = startingColumn; col < this.columnCount(); col++) { column.append(this.column(col), row); } transposed.addColumns(column); } return transposed; } /** * Melt implements the 'tidy' melt operation as described in these papers by Hadley Wickham. * *

Tidy concepts: {@see https://www.jstatsoft.org/article/view/v059i10} * *

Cast function details: {@see https://www.jstatsoft.org/article/view/v021i12} * *

In short, melt turns columns into rows, but in a particular way. Used with the cast method, * it can help make data tidy. In a tidy dataset, every variable is a column and every observation * a row. * *

This method returns a table that contains all the data in this table, but organized such * that there is a set of identifier variables (columns) and a single measured variable (column). * For example, given a table with columns: * *

patient_id, gender, age, weight, temperature, * *

it returns a table with the columns: * *

patient_id, variable, value * *

In the new format, the strings age, weight, and temperature have become cells in the * measurement table, such that a single row in the source table might look like this in the * result table: * *

1234, gender, male 1234, age, 42 1234, weight, 186 1234, temperature, 97.4 * *

This kind of structure often makes for a good intermediate format for performing subsequent * transformations. It is especially useful when combined with the {@link #cast()} operation * * @param idVariables A list of column names intended to be used as identifiers. In he example, * only patient_id would be an identifier * @param measuredVariables A list of columns intended to be used as measured variables. All * columns must have the same type * @param dropMissing drop any row where the value is missing */ @Beta public Table melt( List idVariables, List> measuredVariables, Boolean dropMissing) { Table result = Table.create(name); for (String idColName : idVariables) { result.addColumns(column(idColName).type().create(idColName)); } result.addColumns( StringColumn.create(MELT_VARIABLE_COLUMN_NAME), DoubleColumn.create(MELT_VALUE_COLUMN_NAME)); List measureColumnNames = measuredVariables.stream().map(Column::name).collect(Collectors.toList()); TableSliceGroup slices = splitOn(idVariables.toArray(new String[0])); for (TableSlice slice : slices) { for (Row row : slice) { for (String colName : measureColumnNames) { if (!dropMissing || !row.isMissing(colName)) { writeIdVariables(idVariables, result, row); result.stringColumn(MELT_VARIABLE_COLUMN_NAME).append(colName); double value = row.getNumber(colName); result.doubleColumn(MELT_VALUE_COLUMN_NAME).append(value); } } } } return result; } /** Writes one row of id variables into the result table */ private void writeIdVariables(List idVariables, Table result, Row row) { for (String id : idVariables) { Column resultColumn = result.column(id); final ColumnType columnType = resultColumn.type(); if (columnType.equals(ColumnType.STRING)) { StringColumn sc = (StringColumn) resultColumn; sc.append(row.getString(resultColumn.name())); } else if (columnType.equals(ColumnType.TEXT)) { TextColumn sc = (TextColumn) resultColumn; sc.append(row.getString(resultColumn.name())); } else if (columnType.equals(ColumnType.INTEGER)) { IntColumn ic = (IntColumn) resultColumn; ic.append(row.getInt(resultColumn.name())); } else if (columnType.equals(ColumnType.LONG)) { LongColumn ic = (LongColumn) resultColumn; ic.append(row.getLong(resultColumn.name())); } else if (columnType.equals(ColumnType.SHORT)) { ShortColumn ic = (ShortColumn) resultColumn; ic.append(row.getShort(resultColumn.name())); } else if (columnType.equals(ColumnType.LOCAL_DATE)) { DateColumn ic = (DateColumn) resultColumn; ic.appendInternal(row.getPackedDate(resultColumn.name())); } else if (columnType.equals(ColumnType.LOCAL_DATE_TIME)) { DateTimeColumn ic = (DateTimeColumn) resultColumn; ic.appendInternal(row.getPackedDateTime(resultColumn.name())); } else if (columnType.equals(ColumnType.LOCAL_TIME)) { TimeColumn ic = (TimeColumn) resultColumn; ic.appendInternal(row.getPackedTime(resultColumn.name())); } else if (columnType.equals(ColumnType.INSTANT)) { InstantColumn ic = (InstantColumn) resultColumn; ic.appendInternal(row.getPackedInstant(resultColumn.name())); } else if (columnType.equals(ColumnType.BOOLEAN)) { BooleanColumn ic = (BooleanColumn) resultColumn; ic.append(row.getBooleanAsByte(resultColumn.name())); } else if (columnType.equals(ColumnType.DOUBLE)) { DoubleColumn ic = (DoubleColumn) resultColumn; ic.append(row.getDouble(resultColumn.name())); } else if (columnType.equals(ColumnType.FLOAT)) { FloatColumn ic = (FloatColumn) resultColumn; ic.append(row.getFloat(resultColumn.name())); } else { throw new IllegalArgumentException("melt() does not support column type " + columnType); } } } /** * Cast implements the 'tidy' cast operation as described in these papers by Hadley Wickham: * *

Cast takes a table in 'molten' format, such as is produced by the {@link #melt(List, List, * Boolean)} t} method, and returns a version in standard tidy format. * *

The molten table should have a StringColumn called "variable" and a column called "value" * Every unique variable name will become a column in the output table. * *

All other columns in this table are considered identifier variable. Each combination of * identifier variables specifies an observation, so there will be one row for each, with the * other variables added. * *

Variable columns are returned in an arbitrary order. Use {@link #reorderColumns(String...)} * if column order is important. * *

Tidy concepts: {@see https://www.jstatsoft.org/article/view/v059i10} * *

Cast function details: {@see https://www.jstatsoft.org/article/view/v021i12} */ @Beta public Table cast() { StringColumn variableNames = stringColumn(MELT_VARIABLE_COLUMN_NAME); List> idColumns = columnList.stream() .filter( column -> !column.name().equals(MELT_VARIABLE_COLUMN_NAME) && !column.name().equals(MELT_VALUE_COLUMN_NAME)) .collect(toList()); Table result = Table.create(name); for (Column idColumn : idColumns) { result.addColumns(idColumn.type().create(idColumn.name())); } StringColumn uniqueVariableNames = variableNames.unique(); for (String varName : uniqueVariableNames) { result.addColumns(DoubleColumn.create(varName)); } TableSliceGroup slices = splitOn(idColumns.stream().map(Column::name).toArray(String[]::new)); for (TableSlice slice : slices) { Table sliceTable = slice.asTable(); for (Column idColumn : idColumns) { final ColumnType columnType = idColumn.type(); if (columnType.equals(ColumnType.STRING)) { StringColumn source = (StringColumn) sliceTable.column(idColumn.name()); StringColumn dest = (StringColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.TEXT)) { TextColumn source = (TextColumn) sliceTable.column(idColumn.name()); TextColumn dest = (TextColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.INTEGER)) { IntColumn source = (IntColumn) sliceTable.column(idColumn.name()); IntColumn dest = (IntColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.LONG)) { LongColumn source = (LongColumn) sliceTable.column(idColumn.name()); LongColumn dest = (LongColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.SHORT)) { ShortColumn source = (ShortColumn) sliceTable.column(idColumn.name()); ShortColumn dest = (ShortColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.BOOLEAN)) { BooleanColumn source = (BooleanColumn) sliceTable.column(idColumn.name()); BooleanColumn dest = (BooleanColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.LOCAL_DATE)) { DateColumn source = (DateColumn) sliceTable.column(idColumn.name()); DateColumn dest = (DateColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.LOCAL_DATE_TIME)) { DateTimeColumn source = (DateTimeColumn) sliceTable.column(idColumn.name()); DateTimeColumn dest = (DateTimeColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.INSTANT)) { InstantColumn source = (InstantColumn) sliceTable.column(idColumn.name()); InstantColumn dest = (InstantColumn) result.column(idColumn.name()); dest.append(source.get(0)); } else if (columnType.equals(ColumnType.LOCAL_TIME)) { TimeColumn source = (TimeColumn) sliceTable.column(idColumn.name()); TimeColumn dest = (TimeColumn) result.column(idColumn.name()); dest.append(source.get(0)); } } for (String varName : uniqueVariableNames) { DoubleColumn dest = (DoubleColumn) result.column(varName); Table sliceRow = sliceTable.where(sliceTable.stringColumn(MELT_VARIABLE_COLUMN_NAME).isEqualTo(varName)); if (!sliceRow.isEmpty()) { dest.append(sliceRow.doubleColumn(MELT_VALUE_COLUMN_NAME).get(0)); } else { dest.appendMissing(); } } } return result; } /** * Applies the operation in {@code doable} to every row in the table * * @deprecated use {@code stream().forEach} */ @Deprecated public void doWithRows(Consumer doable) { stream().forEach(doable); } /** * Applies the predicate to each row, and return true if any row returns true * * @deprecated use {@code stream().anyMatch} */ @Deprecated public boolean detect(Predicate predicate) { return stream().anyMatch(predicate); } /** @deprecated use steppingStream(n).forEach(rowConsumer) */ @Deprecated public void stepWithRows(Consumer rowConsumer, int n) { steppingStream(n).forEach(rowConsumer); } /** @deprecated use stream(2).forEach(rowConsumer) */ @Deprecated public void doWithRows(Pairs pairs) { rollingStream(2).forEach(rows -> pairs.doWithPair(rows[0], rows[1])); } /** @deprecated use stream(2).forEach(rowConsumer) */ @Deprecated public void doWithRowPairs(Consumer pairConsumer) { rollingStream(2).forEach(rows -> pairConsumer.accept(new RowPair(rows[0], rows[1]))); } /** @deprecated use stream(n).forEach(rowConsumer) */ @Deprecated public void rollWithRows(Consumer rowConsumer, int n) { rollingStream(n).forEach(rowConsumer); } @Deprecated public static class RowPair { private final Row first; private final Row second; public RowPair(Row first, Row second) { this.first = first; this.second = second; } public Row getFirst() { return first; } public Row getSecond() { return second; } } @Deprecated interface Pairs { void doWithPair(Row row1, Row row2); /** * Returns an object containing the results of applying doWithPair() to the rows in a table. * *

The default implementation throws an exception, to be used if the operation produces only * side effects */ default Object getResult() { throw new UnsupportedOperationException("This Pairs function returns no results"); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy