All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.api.BooleanColumn Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.api;

import static com.google.common.base.Preconditions.checkArgument;

import it.unimi.dsi.fastutil.booleans.BooleanIterable;
import it.unimi.dsi.fastutil.booleans.BooleanIterator;
import it.unimi.dsi.fastutil.booleans.BooleanOpenHashSet;
import it.unimi.dsi.fastutil.booleans.BooleanSet;
import it.unimi.dsi.fastutil.bytes.Byte2IntMap;
import it.unimi.dsi.fastutil.bytes.Byte2IntOpenHashMap;
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
import it.unimi.dsi.fastutil.bytes.ByteComparators;
import it.unimi.dsi.fastutil.bytes.ByteIterator;
import it.unimi.dsi.fastutil.bytes.ByteListIterator;
import it.unimi.dsi.fastutil.bytes.ByteOpenHashSet;
import it.unimi.dsi.fastutil.bytes.ByteSet;
import it.unimi.dsi.fastutil.ints.IntComparator;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.function.BiPredicate;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Stream;
import tech.tablesaw.columns.AbstractColumn;
import tech.tablesaw.columns.AbstractColumnParser;
import tech.tablesaw.columns.Column;
import tech.tablesaw.columns.booleans.BooleanColumnType;
import tech.tablesaw.columns.booleans.BooleanColumnUtils;
import tech.tablesaw.columns.booleans.BooleanFillers;
import tech.tablesaw.columns.booleans.BooleanFilters;
import tech.tablesaw.columns.booleans.BooleanFormatter;
import tech.tablesaw.columns.booleans.BooleanMapUtils;
import tech.tablesaw.filtering.BooleanFilterSpec;
import tech.tablesaw.filtering.predicates.BytePredicate;
import tech.tablesaw.selection.BitmapBackedSelection;
import tech.tablesaw.selection.Selection;

/** A column that contains boolean values */
public class BooleanColumn extends AbstractColumn
    implements BooleanFilterSpec,
        BooleanMapUtils,
        CategoricalColumn,
        BooleanFillers,
        BooleanFilters {

  /** The data held by this column */
  protected ByteArrayList data;

  /** An IntComparator. The ints are row indexes */
  private final IntComparator comparator =
      (r1, r2) -> {
        byte f1 = getByte(r1);
        byte f2 = getByte(r2);
        return Byte.compare(f1, f2);
      };

  /** The print formatter for this column, if any */
  private BooleanFormatter formatter = new BooleanFormatter("true", "false", "");

  /**
   * Constructs a BooleanColumn with the given name and values.
   *
   * @param name The column name
   * @param values The values: 0 is false, 1 is true, Byte.MIN_VALUE is the missing value indicator
   */
  private BooleanColumn(String name, ByteArrayList values) {
    super(BooleanColumnType.instance(), name, BooleanColumnType.DEFAULT_PARSER);
    data = values;
  }

  /** Returns {@code true} if b is the missing value indicator for this column type */
  public static boolean valueIsMissing(byte b) {
    return BooleanColumnType.valueIsMissing(b);
  }

  /** {@inheritDoc} */
  @Override
  public boolean isMissing(int rowNumber) {
    return valueIsMissing(getByte(rowNumber));
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn setMissing(int i) {
    set(i, BooleanColumnType.missingValueIndicator());
    return this;
  }

  /**
   * Returns a new Boolean column of the given size. Elements indexed by the selection are set to
   * true
   *
   * @param name The column name
   * @param hits The true values
   * @param columnSize The column size
   * @return A new BooleanColumn
   */
  public static BooleanColumn create(String name, Selection hits, int columnSize) {
    BooleanColumn column = create(name, columnSize);
    checkArgument(
        (hits.size() <= columnSize),
        "Cannot have more true values than total values in a boolean column");

    for (int hit : hits) {
      column.set(hit, true);
    }
    column.set(column.isMissing(), false);
    return column;
  }

  /**
   * Returns a new, empty Boolean column with the given name.
   *
   * @param name The column name
   * @return A new BooleanColumn
   */
  public static BooleanColumn create(String name) {
    return new BooleanColumn(name, new ByteArrayList(DEFAULT_ARRAY_SIZE));
  }

  /**
   * Returns a new Boolean column of the given size. All elements are false
   *
   * @param name The column name
   * @param initialSize The column size
   * @return A new BooleanColumn
   */
  public static BooleanColumn create(String name, int initialSize) {
    BooleanColumn column = new BooleanColumn(name, new ByteArrayList(initialSize));
    for (int i = 0; i < initialSize; i++) {
      column.appendMissing();
    }
    return column;
  }

  /** Returns a new Boolean column with the given name and values */
  public static BooleanColumn create(String name, boolean... values) {
    BooleanColumn column = create(name, values.length);
    int r = 0;
    for (boolean b : values) {
      column.set(r, b);
      r++;
    }
    return column;
  }

  /** Returns a new Boolean column with the given name and values */
  public static BooleanColumn create(String name, Collection values) {
    BooleanColumn column = create(name);
    for (Boolean b : values) {
      column.append(b);
    }
    return column;
  }

  /** Returns a new Boolean column with the given name and values */
  public static BooleanColumn create(String name, Boolean[] values) {
    BooleanColumn column = create(name);
    for (Boolean val : values) {
      column.append(val);
    }
    return column;
  }

  /** Returns a new Boolean column with the given name and values */
  public static BooleanColumn create(String name, Stream values) {
    BooleanColumn column = create(name);
    values.forEach(column::append);
    return column;
  }

  /** Sets the print formatter for this column */
  public void setPrintFormatter(BooleanFormatter formatter) {
    this.formatter = formatter;
  }

  /** Returns the print formatter for this column */
  public BooleanFormatter getPrintFormatter() {
    return formatter;
  }

  /** {@inheritDoc} */
  @Override
  public int size() {
    return data.size();
  }

  /** {@inheritDoc} */
  @Override
  public Table summary() {
    Byte2IntMap counts = new Byte2IntOpenHashMap(3);
    counts.put(BooleanColumnType.BYTE_FALSE, 0);
    counts.put(BooleanColumnType.BYTE_TRUE, 0);

    for (byte next : data) {
      counts.put(next, counts.get(next) + 1);
    }

    Table table = Table.create(name());

    StringColumn label = StringColumn.create("Value");
    DoubleColumn countColumn = DoubleColumn.create("Count");
    table.addColumns(label);
    table.addColumns(countColumn);

    for (Map.Entry entry : counts.byte2IntEntrySet()) {
      label.append(entry.getKey() == 1 ? "true" : "false");
      countColumn.append(entry.getValue());
    }
    return table;
  }

  /** Returns the count of missing values in this column */
  @Override
  public int countMissing() {
    int count = 0;
    for (int i = 0; i < size(); i++) {
      if (valueIsMissing(getByte(i))) {
        count++;
      }
    }
    return count;
  }

  /** {@inheritDoc} */
  @Override
  public int countUnique() {
    ByteSet count = new ByteOpenHashSet(3);
    for (byte next : data) {
      count.add(next);
    }
    return count.size();
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn unique() {
    ByteSet count = new ByteOpenHashSet(3);
    for (byte next : data) {
      count.add(next);
    }
    ByteArrayList list = new ByteArrayList(count);
    return new BooleanColumn(name() + " Unique values", list);
  }

  /** Appends b to the end of this column and returns this column */
  public BooleanColumn append(boolean b) {
    if (b) {
      data.add(BooleanColumnType.BYTE_TRUE);
    } else {
      data.add(BooleanColumnType.BYTE_FALSE);
    }
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn append(Boolean b) {
    if (b == null) {
      appendMissing();
    } else if (b) {
      data.add(BooleanColumnType.BYTE_TRUE);
    } else {
      data.add(BooleanColumnType.BYTE_FALSE);
    }
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn appendObj(Object obj) {
    if (obj == null) {
      return appendMissing();
    }
    if (!(obj instanceof Boolean)) {
      throw new IllegalArgumentException(
          "Cannot append " + obj.getClass().getName() + " to BooleanColumn");
    }
    return append((Boolean) obj);
  }

  /** Appends b to the end of this column and returns this column */
  public BooleanColumn append(byte b) {
    data.add(b);
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn appendMissing() {
    append(BooleanColumnType.MISSING_VALUE);
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public int valueHash(int rowNumber) {
    return getByte(rowNumber);
  }

  /** {@inheritDoc} */
  @Override
  public boolean equals(int rowNumber1, int rowNumber2) {
    return getByte(rowNumber1) == getByte(rowNumber2);
  }

  /** {@inheritDoc} */
  @Override
  public String getString(int row) {
    return formatter.format(get(row));
  }

  /** {@inheritDoc} */
  @Override
  public String getUnformattedString(int row) {
    Boolean b = get(row);
    if (b == null) {
      return "";
    }
    return String.valueOf(b);
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn emptyCopy() {
    BooleanColumn empty = create(name());
    empty.setPrintFormatter(getPrintFormatter());
    return empty;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn emptyCopy(int rowSize) {
    return create(name(), rowSize);
  }

  /** {@inheritDoc} */
  @Override
  public void clear() {
    data.clear();
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn copy() {
    return new BooleanColumn(name(), data.clone());
  }

  /** {@inheritDoc} */
  @Override
  public void sortAscending() {
    data.sort(ByteComparators.NATURAL_COMPARATOR);
  }

  /** {@inheritDoc} */
  @Override
  public void sortDescending() {
    data.sort(ByteComparators.OPPOSITE_COMPARATOR);
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn appendCell(String object) {
    return append(parser().parseByte(object));
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn appendCell(String object, AbstractColumnParser parser) {
    return append(parser.parseByte(object));
  }

  /**
   * Returns the value in row i as a Boolean
   *
   * @param i the row number
   * @return A Boolean object (may be null)
   */
  @Override
  public Boolean get(int i) {
    byte b = data.getByte(i);
    if (b == BooleanColumnType.BYTE_TRUE) {
      return Boolean.TRUE;
    }
    if (b == BooleanColumnType.BYTE_FALSE) {
      return Boolean.FALSE;
    }
    return null;
  }

  /**
   * Returns the value in row i as a byte (0, 1, or Byte.MIN_VALUE representing missing data)
   *
   * @param i the row number
   */
  public byte getByte(int i) {
    return data.getByte(i);
  }

  /** {@inheritDoc} */
  @Override
  public boolean isEmpty() {
    return data.isEmpty();
  }

  /** Returns the number of {@code true} elements in this column */
  public int countTrue() {
    int count = 0;
    for (byte b : data) {
      if (b == BooleanColumnType.BYTE_TRUE) {
        count++;
      }
    }
    return count;
  }

  /** Returns the number of {@code false} elements in this column */
  public int countFalse() {
    int count = 0;
    for (byte b : data) {
      if (b == BooleanColumnType.BYTE_FALSE) {
        count++;
      }
    }
    return count;
  }

  /** Returns the proportion of non-missing row elements that contain true */
  public double proportionTrue() {
    return (double) countTrue() / (size() - countMissing());
  }

  /** Returns the proportion of non-missing row elements that contain true */
  public double proportionFalse() {
    return 1.0 - proportionTrue();
  }

  /** Returns true if the column contains any true values, and false otherwise */
  public boolean any() {
    return countTrue() > 0;
  }

  /**
   * Returns true if the column contains only true values, and false otherwise. If there are any
   * missing values it returns false.
   */
  public boolean all() {
    return countTrue() == size();
  }

  /** Returns true if the column contains no true values, and false otherwise */
  public boolean none() {
    return countTrue() == 0;
  }

  /** {@inheritDoc} */
  @Override
  public Selection isFalse() {
    Selection results = new BitmapBackedSelection();
    int i = 0;
    for (byte next : data) {
      if (next == BooleanColumnType.BYTE_FALSE) {
        results.add(i);
      }
      i++;
    }
    return results;
  }

  /** {@inheritDoc} */
  @Override
  public Selection isTrue() {
    Selection results = new BitmapBackedSelection();
    int i = 0;
    for (byte next : data) {
      if (next == BooleanColumnType.BYTE_TRUE) {
        results.add(i);
      }
      i++;
    }
    return results;
  }

  /** {@inheritDoc} */
  @Override
  public Selection isEqualTo(BooleanColumn other) {
    Selection results = new BitmapBackedSelection();
    int i = 0;
    ByteIterator booleanIterator = other.byteIterator();
    for (byte next : data) {
      if (next == booleanIterator.nextByte()) {
        results.add(i);
      }
      i++;
    }
    return results;
  }

  /** Returns a ByteArrayList containing 0 (false), 1 (true) or Byte.MIN_VALUE (missing) */
  public ByteArrayList data() {
    return data;
  }

  /** Sets the value at i to b, and returns this column */
  public BooleanColumn set(int i, boolean b) {
    if (b) {
      data.set(i, BooleanColumnType.BYTE_TRUE);
    } else {
      data.set(i, BooleanColumnType.BYTE_FALSE);
    }
    return this;
  }

  /** Sets the value at i to b, and returns this column */
  private BooleanColumn set(int i, byte b) {
    data.set(i, b);
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn set(int i, Boolean val) {
    return val == null ? setMissing(i) : set(i, val.booleanValue());
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn set(int row, String stringValue, AbstractColumnParser parser) {
    return set(row, parser.parseByte(stringValue));
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn lead(int n) {
    BooleanColumn column = lag(-n);
    column.setName(name() + " lead(" + n + ")");
    return column;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn lag(int n) {
    int srcPos = n >= 0 ? 0 : -n;
    byte[] dest = new byte[size()];
    int destPos = Math.max(n, 0);
    int length = n >= 0 ? size() - n : size() + n;

    for (int i = 0; i < size(); i++) {
      dest[i] = BooleanColumnType.MISSING_VALUE;
    }

    System.arraycopy(data.toByteArray(), srcPos, dest, destPos, length);

    BooleanColumn copy = emptyCopy(size());
    copy.data = new ByteArrayList(dest);
    copy.setName(name() + " lag(" + n + ")");
    return copy;
  }

  /**
   * Conditionally update this column, replacing current values with newValue for all rows where the
   * current value matches the selection criteria
   */
  public BooleanColumn set(Selection rowSelection, boolean newValue) {
    for (int row : rowSelection) {
      set(row, newValue);
    }
    return this;
  }

  /**
   * Conditionally update this column, replacing current values with newValue for all rows where the
   * current value matches the selection criteria.
   *
   * @param rowSelection the rows to be updated
   * @param newValue a byte representation of boolean values. The only valid arguments are 0, 1, and
   *     {@link BooleanColumnType#missingValueIndicator()}
   */
  public BooleanColumn set(Selection rowSelection, byte newValue) {
    for (int row : rowSelection) {
      set(row, newValue);
    }
    return this;
  }

  /**
   * Returns the value at row as a double, with true values encoded as 1.0 and false values as 0.0
   */
  public double getDouble(int row) {
    return getByte(row);
  }

  /**
   * Returns all the values in this column as an array of doubles, with true values encoded as 1.0
   * and false values as 0.0
   */
  public double[] asDoubleArray() {
    double[] doubles = new double[data.size()];
    for (int i = 0; i < size(); i++) {
      doubles[i] = data.getByte(i);
    }
    return doubles;
  }

  /** {@inheritDoc} */
  @Override
  public IntComparator rowComparator() {
    return comparator;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn append(Column column) {
    checkArgument(
        column.type() == this.type(),
        "Column '%s' has type %s, but column '%s' has type %s.",
        name(),
        type(),
        column.name(),
        column.type());
    BooleanColumn col = (BooleanColumn) column;
    final int size = col.size();
    for (int i = 0; i < size; i++) {
      append(col.getByte(i));
    }
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public Column append(Column column, int row) {
    checkArgument(
        column.type() == this.type(),
        "Column '%s' has type %s, but column '%s' has type %s.",
        name(),
        type(),
        column.name(),
        column.type());
    BooleanColumn col = (BooleanColumn) column;
    append(col.getByte(row));
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public Column set(int row, Column column, int sourceRow) {
    checkArgument(
        column.type() == this.type(),
        "Column '%s' has type %s, but column '%s' has type %s.",
        name(),
        type(),
        column.name(),
        column.type());
    BooleanColumn col = (BooleanColumn) column;
    set(row, col.getByte(sourceRow));
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public Selection asSelection() {
    Selection selection = new BitmapBackedSelection();
    for (int i = 0; i < size(); i++) {
      byte value = getByte(i);
      if (value == 1) {
        selection.add(i);
      }
    }
    return selection;
  }

  /** {@inheritDoc} */
  @Override
  public Selection isMissing() {
    return eval(BooleanColumnUtils.isMissing);
  }

  /** {@inheritDoc} */
  @Override
  public Selection isNotMissing() {
    return eval(BooleanColumnUtils.isNotMissing);
  }

  /** {@inheritDoc} */
  @Override
  public Iterator iterator() {
    return new BooleanColumnIterator(this.byteIterator());
  }

  /** Returns a ByteIterator for this column */
  public ByteIterator byteIterator() {
    return data.iterator();
  }

  /** Returns the values in this column as a BooleanSet instance */
  public BooleanSet asSet() {
    BooleanSet set = new BooleanOpenHashSet(3);
    BooleanColumn unique = unique();
    for (int i = 0; i < unique.size(); i++) {
      set.add((boolean) unique.get(i));
    }
    return set;
  }

  /** Returns true if the column contains at least one value like {@code aBoolean} */
  public boolean contains(boolean aBoolean) {
    if (aBoolean) {
      return data().contains(BooleanColumnType.BYTE_TRUE);
    }
    return data().contains(BooleanColumnType.BYTE_FALSE);
  }

  /** {@inheritDoc} */
  @Override
  public int byteSize() {
    return type().byteSize();
  }

  /** {@inheritDoc} */
  @Override
  public byte[] asBytes(int row) {
    byte[] result = new byte[byteSize()];
    result[0] = (get(row) ? BooleanColumnType.BYTE_TRUE : BooleanColumnType.BYTE_FALSE);
    return result;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn where(Selection selection) {
    return subset(selection.toArray());
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn removeMissing() {
    BooleanColumn noMissing = emptyCopy();
    ByteListIterator iterator = byteListIterator();
    while (iterator.hasNext()) {
      byte b = iterator.nextByte();
      if (!valueIsMissing(b)) {
        noMissing.append(b);
      }
    }
    return noMissing;
  }

  /** Returns a Selection of the elements that return true when the predicate is evaluated */
  public Selection eval(BytePredicate predicate) {
    Selection selection = new BitmapBackedSelection();
    for (int idx = 0; idx < data.size(); idx++) {
      byte next = data.getByte(idx);
      if (predicate.test(next)) {
        selection.add(idx);
      }
    }
    return selection;
  }

  /** Returns a Selection of the elements that return true when the predicate is evaluated */
  public Selection eval(Predicate predicate) {
    Selection selection = new BitmapBackedSelection();
    for (int idx = 0; idx < data.size(); idx++) {
      if (predicate.test(get(idx))) {
        selection.add(idx);
      }
    }
    return selection;
  }

  /**
   * Returns a Selection of the elements that return true when the predicate is evaluated with the
   * given Boolean argument
   */
  public Selection eval(BiPredicate predicate, Boolean valueToCompare) {
    Selection selection = new BitmapBackedSelection();
    for (int idx = 0; idx < data.size(); idx++) {
      if (predicate.test(get(idx), valueToCompare)) {
        selection.add(idx);
      }
    }
    return selection;
  }

  /** Returns a byteListIterator, which allows iteration by byte (value) and int (index) */
  private ByteListIterator byteListIterator() {
    return data.iterator();
  }

  /**
   * Returns a DoubleColumn containing the elements in this column, with true as 1.0 and false as
   * 0.0.
   */
  public DoubleColumn asDoubleColumn() {
    DoubleColumn numberColumn = DoubleColumn.create(this.name(), size());
    ByteArrayList data = data();
    for (int i = 0; i < size(); i++) {
      numberColumn.set(i, data.getByte(i));
    }
    return numberColumn;
  }

  /** {@inheritDoc} */
  @Override
  public int compare(Boolean o1, Boolean o2) {
    return Boolean.compare(o1, o2);
  }

  private static class BooleanColumnIterator implements Iterator {

    private final ByteIterator iterator;

    BooleanColumnIterator(ByteIterator iterator) {
      this.iterator = iterator;
    }

    /**
     * Returns {@code true} if the iteration has more elements. (In other words, returns {@code
     * true} if {@link #next()} would return an element rather than throwing an exception.)
     *
     * @return {@code true} if the iteration has more elements
     */
    @Override
    public boolean hasNext() {
      return iterator.hasNext();
    }

    /**
     * Returns the next element in the iteration.
     *
     * @return the next element in the iteration
     * @throws java.util.NoSuchElementException if the iteration has no more elements
     */
    @Override
    public Boolean next() {
      byte b = iterator.nextByte();
      if (b == (byte) 0) {
        return false;
      }
      if (b == (byte) 1) {
        return true;
      }
      return null;
    }
  }

  // fillWith methods

  /** {@inheritDoc} */
  @Override
  public BooleanColumn fillWith(BooleanIterator iterator) {
    for (int r = 0; r < size(); r++) {
      if (!iterator.hasNext()) {
        break;
      }
      set(r, iterator.nextBoolean());
    }
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn fillWith(BooleanIterable iterable) {
    BooleanIterator iterator = iterable.iterator();
    for (int r = 0; r < size(); r++) {
      if (!iterator.hasNext()) {
        iterator = iterable.iterator();
        if (!iterator.hasNext()) {
          break;
        }
      }
      set(r, iterator.nextBoolean());
    }
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public BooleanColumn fillWith(Supplier supplier) {
    for (int r = 0; r < size(); r++) {
      try {
        set(r, supplier.get());
      } catch (Exception e) {
        break;
      }
    }
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public Boolean[] asObjectArray() {
    final Boolean[] output = new Boolean[data.size()];
    for (int i = 0; i < data.size(); i++) {
      output[i] = get(i);
    }
    return output;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy