All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.columns.Column Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.columns;

import static tech.tablesaw.selection.Selection.selectNRowsAtRandom;

import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntComparator;
import java.util.*;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Predicate;
import tech.tablesaw.api.ColumnType;
import tech.tablesaw.api.StringColumn;
import tech.tablesaw.api.Table;
import tech.tablesaw.interpolation.Interpolator;
import tech.tablesaw.selection.Selection;
import tech.tablesaw.table.RollingColumn;
import tech.tablesaw.util.StringUtils;

/**
 * The general interface for columns.
 *
 * 

Columns can either exist on their own or be a part of a table. All the data in a single column * is of a particular type. */ public interface Column extends Iterable, Comparator { /** Returns the number of elements in this column, including missing values */ int size(); /** Returns a table containing a ColumnType specific summary of the data in this column */ Table summary(); /** Returns an array of objects as appropriate for my type of column */ T[] asObjectArray(); /** * Returns the count of missing values in this column. * * @return missing values as int */ int countMissing(); /** * Returns the count of unique values in this column. * * @return unique values as int */ default int countUnique() { return unique().size(); } /** * Returns the column's name. * * @return name as String */ String name(); /** * Returns this column's ColumnType * * @return {@link ColumnType} */ ColumnType type(); /** * Returns the parser used by {@link #appendCell(String)} ()}. * * @return {@link AbstractColumnParser} */ AbstractColumnParser parser(); /** * Returns a string representation of the value at the given row. * * @param row The index of the row. * @return value as String */ String getString(int row); /** Returns the value at the given zero-based index */ T get(int row); /** * Reduction with binary operator and initial value * * @param initial initial value * @param op the operator * @return the result of reducing initial value and all rows with operator */ default T reduce(T initial, BinaryOperator op) { T acc = initial; for (T t : this) { acc = op.apply(acc, t); } return acc; } /** * Reduction with binary operator * * @param op the operator * @return Optional with the result of reducing all rows with operator */ default Optional reduce(BinaryOperator op) { boolean first = true; T acc = null; for (T t : this) { if (first) { acc = t; first = false; } else { acc = op.apply(acc, t); } } return (first ? Optional.empty() : Optional.of(acc)); } /** Removes all elements TODO: Make this return this column */ void clear(); /** Sorts my values in ascending order */ void sortAscending(); /** Sorts my values in descending order */ void sortDescending(); /** * Returns true if the column has no data * * @return true if empty, false if not */ boolean isEmpty(); /** Returns an IntComparator for sorting my rows */ IntComparator rowComparator(); default String title() { return "Column: " + name() + System.lineSeparator(); } /** Returns a selection containing an index for every missing value in this column */ Selection isMissing(); /** Returns a selection containing an index for every non-missing value in this column */ Selection isNotMissing(); /** * Returns the width of a cell in this column, in bytes. * * @return width in bytes */ int byteSize(); /** * Returns the contents of the cell at rowNumber as a byte[]. * * @param rowNumber index of the row * @return content as byte[] */ byte[] asBytes(int rowNumber); /** Returns a Set containing all the unique values in this column */ Set asSet(); /** * Returns a {@link RollingColumn} with the given windowSize, which can be used for performing * calculations on rolling subsets of my data * * @param windowSize The number of elements to include in each calculation * @return a RollingColumn */ default RollingColumn rolling(final int windowSize) { return new RollingColumn(this, windowSize); } /** Returns a String representation of the value at index r, without any formatting applied */ String getUnformattedString(int r); /** Returns true if the value at rowNumber is missing */ boolean isMissing(int rowNumber); /** TODO(lwhite): Print n from the top and bottom, like a table; */ default String print() { final StringBuilder builder = new StringBuilder(); builder.append(title()); for (int i = 0; i < size(); i++) { builder.append(getString(i)); builder.append(System.lineSeparator()); } return builder.toString(); } /** Returns the width of the column in characters, for printing */ default int columnWidth() { int width = name().length(); for (int rowNum = 0; rowNum < size(); rowNum++) { width = Math.max(width, StringUtils.length(getString(rowNum))); } return width; } /** * Returns a list of all the elements in this column * *

Note, if a value in the column is missing, a {@code null} is added in it's place */ default List asList() { List results = new ArrayList<>(); for (int i = 0; i < this.size(); i++) { if (isMissing(i)) { results.add(null); } else { results.add(get(i)); } } return results; } /** * Returns {@code true} if the given object appears in this column, and false otherwise * *

TODO override in column subtypes for performance */ default boolean contains(T object) { for (int i = 0; i < this.size(); i++) { if (object != null) { if (object.equals(get(i))) { return true; } } else { if (get(i) == null) return true; } } return false; } // functional methods corresponding to those in Stream /** * Counts the number of rows satisfying predicate, but only upto the max value * * @param test the predicate * @param max the maximum number of rows to count * @return the number of rows satisfying the predicate */ default int count(Predicate test, int max) { int count = 0; for (T t : this) { if (test.test(t)) { count++; if (max > 0 && count >= max) { return count; } } } return count; } /** * Counts the number of rows satisfying predicate * * @param test the predicate * @return the number of rows satisfying the predicate */ default int count(Predicate test) { return count(test, size()); } /** * Returns true if all rows satisfy the predicate, false otherwise * * @param test the predicate * @return true if all rows satisfy the predicate, false otherwise */ default boolean allMatch(Predicate test) { return count(test.negate(), 1) == 0; } /** * Returns true if any row satisfies the predicate, false otherwise * * @param test the predicate * @return true if any rows satisfies the predicate, false otherwise */ default boolean anyMatch(Predicate test) { return count(test, 1) > 0; } /** * Returns true if no row satisfies the predicate, false otherwise * * @param test the predicate * @return true if no row satisfies the predicate, false otherwise */ default boolean noneMatch(Predicate test) { return count(test, 1) == 0; } /** * Returns the maximum row according to the provided Comparator * * @param comp * @return the maximum row */ default Optional max(Comparator comp) { boolean first = true; T o1 = null; for (T o2 : this) { if (first) { o1 = o2; first = false; } else if (comp.compare(o1, o2) < 0) { o1 = o2; } } return (first ? Optional.empty() : Optional.of(o1)); } /** * Returns the minimum value according to the provided Comparator * * @param comp the Comparator to use * @return the minimum value */ default Optional min(Comparator comp) { boolean first = true; T o1 = null; for (T o2 : this) { if (first) { o1 = o2; first = false; } else if (comp.compare(o1, o2) > 0) { o1 = o2; } } return (first ? Optional.empty() : Optional.of(o1)); } /** * Maps the function across all rows, storing the results into the provided Column. * *

The target column must have at least the same number of rows. * * @param fun function to map * @param into Column into which results are set * @return the provided Column */ default > C mapInto(Function fun, C into) { for (int i = 0; i < size(); i++) { if (isMissing(i)) { into.setMissing(i); } else { into.set(i, fun.apply(get(i))); } } return into; } /** * Maps the function across all rows, appending the results to the created Column. * *

Example: * *

   * DoubleColumn d;
   * StringColumn s = d.map(String::valueOf, StringColumn::create);
   * 
* * @param fun function to map * @param creator the creator of the Column. Its String argument will be the name of the current * column (see {@link #name()}) * @return the Column with the results */ default > C map( Function fun, Function creator) { C into = creator.apply(name()); for (int i = 0; i < size(); i++) { if (isMissing(i)) { into.appendMissing(); } else { into.append(fun.apply(get(i))); } } return into; } /** * Sets the value at index i to the missing-value indicator for this column type, and return this * column */ Column setMissing(int i); /** * Sets the value of any missing data in the column to newValue and returns the same column * * @param newValue the value to be used for all missing data in this column * @return the column updated */ default Column setMissingTo(T newValue) { for (int i = 0; i < size(); i++) { if (isMissing(i)) { set(i, newValue); } } return this; } /** * Returns a new Column of the same type with only those rows satisfying the predicate * * @param test the predicate * @return a new Column of the same type with only those rows satisfying the predicate */ default Column filter(Predicate test) { Column result = emptyCopy(); for (T t : this) { if (test.test(t)) { result.append(t); } } return result; } /** * Return a column of the same type containing just those elements whose indexes are included in * the given array */ default Column subset(int[] rows) { final Column c = this.emptyCopy(); for (final int row : rows) { c.appendObj(get(row)); } return c; } /** * Returns a new Column of the same type sorted according to the provided Comparator * * @param comp the Comparator * @return a sorted Column */ default Column sorted(Comparator comp) { List list = asList(); list.sort(comp); Column result = emptyCopy(); for (T t : list) { result.append(t); } return result; } /** * Returns a copy of the receiver with no data. The column name and type are the same. * * @return a empty copy of {@link Column} */ Column emptyCopy(); /** * Returns a deep copy of the receiver * * @return a {@link Column} */ Column copy(); /** * Returns an empty copy of the receiver, with its internal storage initialized to the given row * size. * * @param rowSize the initial row size * @return a {@link Column} */ Column emptyCopy(int rowSize); /** * Maps the function across all rows, appending the results to a new Column of the same type * * @param fun function to map * @return the Column with the results */ default Column map(Function fun) { return mapInto(fun, emptyCopy(size())); } /** * Returns a column containing the element-wise min between this column and other column * *

TODO(lwhite) Override in column subtypes for better performance */ default Column min(Column other) { Preconditions.checkArgument(size() == other.size()); Column newCol = emptyCopy(); for (int i = 0; i < this.size(); i++) { if (isMissing(i) || other.isMissing(i)) { newCol.appendMissing(); } else { T thisValue = get(i); T otherValue = other.get(i); int result = compare(thisValue, otherValue); newCol.append(result <= 0 ? thisValue : otherValue); } } return newCol; } /** * Returns a column containing the element-wise min between this column and other column * *

TODO(lwhite) Override in column subtypes for better performance */ default Column max(Column other) { Preconditions.checkArgument(size() == other.size()); Column newCol = emptyCopy(); for (int i = 0; i < this.size(); i++) { if (isMissing(i) || other.isMissing(i)) { newCol.appendMissing(); } else { T thisValue = get(i); T otherValue = other.get(i); int result = compare(thisValue, otherValue); newCol.append(result >= 0 ? thisValue : otherValue); } } return newCol; } /** * Updates this column where values matching the selection are replaced with the corresponding * value from the given column */ default Column set(Predicate condition, Column other) { for (int row = 0; row < size(); row++) { if (condition.test(get(row))) { set(row, other.get(row)); } } return this; } /** * Updates this column where values matching the selection are replaced with the corresponding * value from the given column */ default Column set(Selection condition, Column other) { for (int row : condition) { set(row, other.get(row)); } return this; } /** * Returns a column of the same type as the receiver, containing the receivers values offset -n * For example if you lead a column containing 2, 3, 4 by 1, you get a column containing 3, 4, NA. */ default Column lead(final int n) { return lag(-n); } /** * Conditionally update this column, replacing current values with newValue for all rows where the * current value matches the selection criteria */ default Column set(Selection rowSelection, T newValue) { for (int row : rowSelection) { set(row, newValue); } return this; } /** * Returns a column of the same type and size as the receiver, containing the receivers values * offset by n. * *

For example if you lag a column containing 2, 3, 4 by 1, you get a column containing NA, 2, * 3 */ Column lag(int n); /** * Add one element to the bottom of this column and set its value to the parsed value of the given * String. Parsing is type-specific */ Column appendCell(String stringValue); /** * Add one element to the bottom of this column and set its value to the parsed value of the given * String, as performed by the given parser */ Column appendCell(String stringValue, AbstractColumnParser parser); /** Sets the value at index row to the given value and return this column */ Column set(int row, T value); /** * Sets the value at row to the parsed value of the given String using the given parser and * returns this column */ @SuppressWarnings("unchecked") default Column set(int row, String stringValue, AbstractColumnParser parser) { AbstractColumnParser typedParser = (AbstractColumnParser) parser; return set(row, typedParser.parse(stringValue)); } /** Sets the value at row to the value at sourceRow in the given column and return this column */ Column set(int row, Column sourceColumn, int sourceRow); /** Appends value to the bottom of this column and return this column */ Column append(T value); /** Appends all the values in the argument to the bottom of this column and return this column */ Column append(Column column); /** * Appends the value at the given row in the given column to the bottom of this column and return * this column */ Column append(Column column, int row); /** Appends the given value to the bottom of this column and return this column */ Column appendObj(Object value); /** Appends a missing value appropriate to the column */ Column appendMissing(); /** Returns an int suitable as a hash for the value in this column at the given index */ int valueHash(int rowNumber); /** Returns true if the value in this column at rowNumber1 is equal to the value at rowNumber2 */ boolean equals(int rowNumber1, int rowNumber2); /** Returns a new column containing the subset referenced by the {@link Selection} */ Column where(Selection selection); /** Returns a copy of this column with the missing values removed */ Column removeMissing(); /** * Returns a column of the same type containing only the unique values * * @return a {@link Column} */ Column unique(); /** Returns a column of the same type containing the first {@code numRows} of this column. */ default Column first(final int numRows) { int newRowCount = Math.min(numRows, size()); return inRange(0, newRowCount); } /** Returns a column of the same type containing the last {@code numRows} of this column. */ default Column last(final int numRows) { int newRowCount = Math.min(numRows, size()); return inRange(size() - newRowCount, size()); } /** * Sets the columns name to the given string * * @param name The new name MUST be unique for any table containing this column * @return this Column to allow method chaining */ Column setName(String name); /** * Sets the parser used by {@link #appendCell(String)} * * @param parser a column parser that converts text input to the column data type * @return this Column to allow method chaining */ Column setParser(AbstractColumnParser parser); /** * Returns a column containing the rows in this column beginning with start inclusive, and ending * with end exclusive */ default Column inRange(int start, int end) { Preconditions.checkArgument(start < end); Preconditions.checkArgument(end <= size()); return where(Selection.withRange(start, end)); } /** * Returns a column containing a random sample of the values in this column * * @param n the number of values to select * @return A column of the same type as the receiver */ default Column sampleN(int n) { Preconditions.checkArgument( n > 0 && n < size(), "The number of rows sampled must be greater than 0 and less than the number of rows in the table."); return where(selectNRowsAtRandom(n, size())); } /** * Returns a table consisting of randomly selected values from this column. The sample size is * based on the given proportion of the total number of cells in this column * * @param proportion The proportion to go in the sample */ default Column sampleX(double proportion) { Preconditions.checkArgument( proportion <= 1 && proportion >= 0, "The sample proportion must be between 0 and 1"); int tableSize = (int) Math.round(size() * proportion); return where(selectNRowsAtRandom(tableSize, size())); } /** * Provides the ability to create a new column with missing cells filled based off the value of * nearby cells. */ default Interpolator interpolate() { return new Interpolator<>(this); } /** * Returns a StringColumn consisting of the (unformatted) String representation of this column * values * * @return a {@link StringColumn} built using the column {@link #getUnformattedString} method */ StringColumn asStringColumn(); /** * Returns the index of the first occurrence of {@code o} in the column or -1 if the element is * not in the column. */ int indexOf(Object o); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy