All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.table.ViewGroup Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.table;

import static tech.tablesaw.aggregate.AggregateFunctions.*;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;

import tech.tablesaw.aggregate.AggregateFunction;
import tech.tablesaw.aggregate.NumericSummaryTable;
import tech.tablesaw.api.CategoryColumn;
import tech.tablesaw.api.DoubleColumn;
import tech.tablesaw.api.Table;
import tech.tablesaw.columns.Column;
import tech.tablesaw.util.BitmapBackedSelection;
import tech.tablesaw.util.Selection;

/**
 * A group of tables formed by performing splitting operations on an original table
 */
public class ViewGroup implements Iterable {

    private static final String SPLIT_STRING = "~~~";
    private static final Splitter SPLITTER = Splitter.on(SPLIT_STRING);


    private final Table sortedOriginal;

    private final List subTables = new ArrayList<>();

    // the name(s) of the column(s) we're splitting the table on
    private final String[] splitColumnNames;

    public ViewGroup(Table original, Column... columns) {
        splitColumnNames = new String[columns.length];
        for (int i = 0; i < columns.length; i++) {
            splitColumnNames[i] = columns[i].name();
        }
        this.sortedOriginal = original.sortOn(splitColumnNames);
        splitOn(splitColumnNames);
    }

    public static ViewGroup create(Table original, String... columnsNames) {
        List columns = original.columns(columnsNames);
        return new ViewGroup(original, columns.toArray(new Column[columns.size()]));
    }

    /**
     * Splits the sortedOriginal table into sub-tables, grouping on the columns whose names are given in
     * splitColumnNames
     */
    private void splitOn(String... columnNames) {

        List columns = sortedOriginal.columns(columnNames);
        int byteSize = getByteSize(columns);

        byte[] currentKey = null;
        String currentStringKey = null;
        TemporaryView view;

        Selection selection = new BitmapBackedSelection();

        for (int row = 0; row < sortedOriginal.rowCount(); row++) {

            ByteBuffer byteBuffer = ByteBuffer.allocate(byteSize);
            String newStringKey = "";

            for (int col = 0; col < columnNames.length; col++) {
                if (col > 0) {
                    newStringKey = newStringKey + SPLIT_STRING;
                }

                Column c = sortedOriginal.column(columnNames[col]);
                String groupKey = sortedOriginal.get(row, sortedOriginal.columnIndex(c));
                newStringKey = newStringKey + groupKey;
                byteBuffer.put(c.asBytes(row));
            }
            byte[] newKey = byteBuffer.array();
            if (row == 0) {
                currentKey = newKey;
                currentStringKey = newStringKey;
            }
            if (!Arrays.equals(newKey, currentKey)) {
                currentKey = newKey;
                view = new TemporaryView(sortedOriginal, selection);
                view.setName(currentStringKey);
                currentStringKey = newStringKey;
                addViewToSubTables(view);
                selection = new BitmapBackedSelection();
                selection.add(row);
            } else {
                selection.add(row);
            }
        }
        if (!selection.isEmpty()) {
            view = new TemporaryView(sortedOriginal, selection);
            view.setName(currentStringKey);
            addViewToSubTables(view);
        }
    }

    private int getByteSize(List columns) {
        int byteSize = 0;
        {
            for (Column c : columns) {
                byteSize += c.byteSize();
            }
        }
        return byteSize;
    }

    private void addViewToSubTables(TemporaryView view) {
        subTables.add(view);
    }

    public List getSubTables() {
        return subTables;
    }

    public TemporaryView get(int i) {
        return subTables.get(i);
    }

    @VisibleForTesting
    public Table getSortedOriginal() {
        return sortedOriginal;
    }

    public int size() {
        return subTables.size();
    }


    /**
     * For a subtable that is grouped by the values in more than one column, split the grouping column into separate
     * cols and return the revised view
     */
    private NumericSummaryTable splitGroupingColumn(NumericSummaryTable groupTable) {

        List newColumns = new ArrayList<>();

        List columns = sortedOriginal.columns(splitColumnNames);
        for (Column column : columns) {
            Column newColumn = column.emptyCopy();
            newColumns.add(newColumn);
        }
        // iterate through the rows in the table and split each of the grouping columns into multiple columns
        for (int row = 0; row < groupTable.rowCount(); row++) {
            List strings = SPLITTER.splitToList(groupTable.categoryColumn("Group").get(row));
            for (int col = 0; col < newColumns.size(); col++) {
                newColumns.get(col).appendCell(strings.get(col));
            }
        }
        for (int col = 0; col < newColumns.size(); col++) {
            Column c = newColumns.get(col);
            groupTable.addColumn(col, c);
        }
        groupTable.removeColumns("Group");
        return groupTable;
    }

    public NumericSummaryTable first(String columnName) {
      return agg(columnName, first);
    }
    public NumericSummaryTable last(String columnName) {
      return agg(columnName, last);
    }
    public NumericSummaryTable count(String columnName) {
      return agg(columnName, count);
    }
    public NumericSummaryTable mean(String columnName) {
      return agg(columnName, mean);
    }
    public NumericSummaryTable sum(String columnName) {
      return agg(columnName, sum);
    }
    public NumericSummaryTable median(String columnName) {
      return agg(columnName, median);
    }
    public NumericSummaryTable quartile1(String columnName) {
      return agg(columnName, quartile1);
    }
    public NumericSummaryTable quartile3(String columnName) {
      return agg(columnName, quartile3);
    }
    public NumericSummaryTable percentile90(String columnName) {
      return agg(columnName, percentile90);
    }
    public NumericSummaryTable percentile95(String columnName) {
      return agg(columnName, percentile95);
    }
    public NumericSummaryTable percentile99(String columnName) {
      return agg(columnName, percentile99);
    }
    public NumericSummaryTable range(String columnName) {
      return agg(columnName, range);
    }
    public NumericSummaryTable min(String columnName) {
      return agg(columnName, min);
    }
    public NumericSummaryTable max(String columnName) {
      return agg(columnName, max);
    }
    public NumericSummaryTable product(String columnName) {
      return agg(columnName, product);
    }
    public NumericSummaryTable geometricMean(String columnName) {
      return agg(columnName, geometricMean);
    }
    public NumericSummaryTable populationVariance(String columnName) {
      return agg(columnName, populationVariance);
    }
    public NumericSummaryTable quadraticMean(String columnName) {
      return agg(columnName, quadraticMean);
    }
    public NumericSummaryTable kurtosis(String columnName) {
      return agg(columnName, kurtosis);
    }
    public NumericSummaryTable skewness(String columnName) {
      return agg(columnName, skewness);
    }
    public NumericSummaryTable sumOfSquares(String columnName) {
      return agg(columnName, sumOfSquares);
    }
    public NumericSummaryTable sumOfLogs(String columnName) {
      return agg(columnName, sumOfLogs);
    }
    public NumericSummaryTable variance(String columnName) {
      return agg(columnName, variance);
    }
    public NumericSummaryTable stdDev(String columnName) {
      return agg(columnName, stdDev);
    }

    /**
     * Applies the given aggregation to the given column.
     * The apply and combine steps of a split-apply-combine.
     */
    public NumericSummaryTable agg(String colName1, AggregateFunction func1) {
        return agg(ImmutableMap.of(colName1, func1));
    }

    /**
     * Applies the given aggregation to the given column.
     * The apply and combine steps of a split-apply-combine.
     */
    public NumericSummaryTable agg(
        String colName1, AggregateFunction func1,
        String colName2, AggregateFunction func2) {
      return agg(ImmutableMap.of(
          colName1, func1,
          colName2, func2));
    }

    /**
     * Applies the given aggregation to the given column.
     * The apply and combine steps of a split-apply-combine.
     */
    public NumericSummaryTable agg(
        String colName1, AggregateFunction func1,
        String colName2, AggregateFunction func2,
        String colName3, AggregateFunction func3) {
      return agg(ImmutableMap.of(
          colName1, func1,
          colName2, func2,
          colName3, func3));
    }

    /**
     * Applies the given aggregation to the given column.
     * The apply and combine steps of a split-apply-combine.
     */
    public NumericSummaryTable agg(
        String colName1, AggregateFunction func1,
        String colName2, AggregateFunction func2,
        String colName3, AggregateFunction func3,
        String colName4, AggregateFunction func4) {
      return agg(ImmutableMap.of(
          colName1, func1,
          colName2, func2,
          colName3, func3,
          colName4, func4));
    }

    /**
     * Applies the given aggregations to the given columns.
     * The apply and combine steps of a split-apply-combine.
     * @param functions map from column name to aggregation to apply on that function
     */
    public NumericSummaryTable agg(Map functions) {
      Preconditions.checkArgument(!subTables.isEmpty());
      NumericSummaryTable groupTable = NumericSummaryTable.create(sortedOriginal.name() + " summary");
      CategoryColumn groupColumn = new CategoryColumn("Group", subTables.size());
      groupTable.addColumn(groupColumn);
      for (Map.Entry entry : functions.entrySet()) {
          String columnName = entry.getKey();
          AggregateFunction function = entry.getValue();

          String colName = aggregateColumnName(columnName, function.functionName());
          DoubleColumn resultColumn = new DoubleColumn(colName, subTables.size());
          for (TemporaryView subTable : subTables) {
              double result = subTable.reduce(columnName, function);
              groupColumn.add(subTable.name());
              resultColumn.append(result);
          }
          groupTable.addColumn(resultColumn);
      }
      return splitGroupingColumn(groupTable);
    }

    /**
     * Returns an iterator over elements of type {@code T}.
     *
     * @return an Iterator.
     */
    @Override
    public Iterator iterator() {
        return subTables.iterator();
    }

    private String aggregateColumnName(String columnName, String functionName) {
        return String.format("%s [%s]", functionName, columnName);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy