All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.dataframe.api.BooleanColumn Maven / Gradle / Ivy

Go to download

High-performance Java Dataframe with integrated columnar storage (fork of tablesaw)

There is a newer version: 0.9.1
Show newest version
package org.datavec.dataframe.api;

import org.datavec.dataframe.columns.AbstractColumn;
import org.datavec.dataframe.columns.Column;
import org.datavec.dataframe.filtering.BooleanPredicate;
import org.datavec.dataframe.io.TypeUtils;
import org.datavec.dataframe.mapping.BooleanMapUtils;
import org.datavec.dataframe.store.ColumnMetadata;
import org.datavec.dataframe.util.BitmapBackedSelection;
import org.datavec.dataframe.util.Selection;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import it.unimi.dsi.fastutil.booleans.BooleanOpenHashSet;
import it.unimi.dsi.fastutil.booleans.BooleanSet;
import it.unimi.dsi.fastutil.bytes.Byte2IntMap;
import it.unimi.dsi.fastutil.bytes.Byte2IntOpenHashMap;
import it.unimi.dsi.fastutil.bytes.ByteArrayList;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.bytes.ByteComparator;
import it.unimi.dsi.fastutil.bytes.ByteIterator;
import it.unimi.dsi.fastutil.bytes.ByteOpenHashSet;
import it.unimi.dsi.fastutil.bytes.ByteSet;
import it.unimi.dsi.fastutil.ints.IntComparator;
import it.unimi.dsi.fastutil.ints.IntIterator;

import java.util.Iterator;
import java.util.Map;

import static org.datavec.dataframe.columns.BooleanColumnUtils.isMissing;
import static org.datavec.dataframe.columns.BooleanColumnUtils.isNotMissing;

/**
 * A column in a base table that contains float values
 */
public class BooleanColumn extends AbstractColumn implements BooleanMapUtils {

  public static final byte MISSING_VALUE = Byte.MIN_VALUE;

  private static final int BYTE_SIZE = 1;

  private static int DEFAULT_ARRAY_SIZE = 128;

  private ByteArrayList data;

  public static BooleanColumn create(String name) {
    return new BooleanColumn(name);
  }

  public static BooleanColumn create(String name, int rowSize) {
    return new BooleanColumn(name, rowSize);
  }

  public static BooleanColumn create(String name, Selection selection, int rowSize) {
    return new BooleanColumn(name, selection, rowSize);
  }

  public BooleanColumn(ColumnMetadata metadata) {
    super(metadata);
    data = new ByteArrayList(DEFAULT_ARRAY_SIZE);
  }

  private BooleanColumn(String name) {
    super(name);
    data = new ByteArrayList(DEFAULT_ARRAY_SIZE);
  }

  public BooleanColumn(String name, int initialSize) {
    super(name);
    data = new ByteArrayList(initialSize);
  }

  private BooleanColumn(String name, ByteArrayList values) {
    super(name);
    data = values;
  }

  public BooleanColumn(String name, Selection hits, int columnSize) {
    super(name);
    if (columnSize == 0) {
      return;
    }
    ByteArrayList data = new ByteArrayList(columnSize);

    for (int i = 0; i < columnSize; i++) {
      data.add((byte) 0);
    }

    IntIterator intIterator = hits.iterator();
    while (intIterator.hasNext()) {
      byte b = (byte) 1;
      int i = intIterator.next();
      data.set(i, b);
    }
    this.data = data;
  }

  public int size() {
    return data.size();
  }

  @Override
  public Table summary() {

    Byte2IntMap counts = new Byte2IntOpenHashMap(3);
    counts.put((byte) 0, 0);
    counts.put((byte) 1, 0);

    for (byte next : data) {
      counts.put(next, counts.get(next) + 1);
    }

    Table table = Table.create(name());

    BooleanColumn booleanColumn = BooleanColumn.create("Value");
    IntColumn countColumn = IntColumn.create("Count");
    table.addColumn(booleanColumn);
    table.addColumn(countColumn);

    for (Map.Entry entry : counts.entrySet()) {
      booleanColumn.add(entry.getKey());
      countColumn.add(entry.getValue());
    }
    return table;
  }

  /**
   * Returns the count of missing values in this column
   */
  @Override
  public int countMissing() {
    int count = 0;
    for (int i = 0; i < size(); i++) {
      if (getByte(i) == MISSING_VALUE) {
        count++;
      }
    }
    return count;
  }

  @Override
  public int countUnique() {
    ByteSet count = new ByteOpenHashSet(3);
    for (byte next : data) {
      count.add(next);
    }
    return count.size();
  }

  @Override
  public BooleanColumn unique() {
    ByteSet count = new ByteOpenHashSet(3);
    for (byte next : data) {
      count.add(next);
    }
    ByteArrayList list = new ByteArrayList(count);
    return new BooleanColumn(name() + " Unique values", list);
  }

  @Override
  public ColumnType type() {
    return ColumnType.BOOLEAN;
  }

  public void add(boolean b) {
    if (b) {
      data.add((byte) 1);
    } else {
      data.add((byte) 0);
    }
  }

  public void add(byte b) {
    data.add(b);
  }

  @Override
  public String getString(int row) {
    return String.valueOf(get(row));
  }

  @Override
  public BooleanColumn emptyCopy() {
    BooleanColumn column = BooleanColumn.create(name());
    column.setComment(comment());
    return column;
  }

  @Override
  public BooleanColumn emptyCopy(int rowSize) {
    BooleanColumn column = BooleanColumn.create(name(), rowSize);
    column.setComment(comment());
    return column;
  }

  @Override
  public void clear() {
    data.clear();
  }

  @Override
  public BooleanColumn copy() {
    BooleanColumn column = BooleanColumn.create(name(), data);
    column.setComment(comment());
    return column;
  }

  @Override
  public void sortAscending() {
    ByteArrays.mergeSort(data.elements());
  }

  @Override
  public void sortDescending() {
    ByteArrays.mergeSort(data.elements(), reverseByteComparator);
  }

  ByteComparator reverseByteComparator = new ByteComparator() {

    @Override
    public int compare(Byte o1, Byte o2) {
      return Byte.compare(o2, o1);
    }

    @Override
    public int compare(byte o1, byte o2) {
      return Byte.compare(o2, o1);
    }
  };

  public static boolean convert(String stringValue) {
    if (Strings.isNullOrEmpty(stringValue) || TypeUtils.MISSING_INDICATORS.contains(stringValue)) {
      return (boolean) ColumnType.BOOLEAN.getMissingValue();
    } else if (TypeUtils.TRUE_STRINGS.contains(stringValue)) {
      return true;
    } else if (TypeUtils.FALSE_STRINGS.contains(stringValue)) {
      return false;
    } else {
      throw new IllegalArgumentException("Attempting to convert non-boolean value " +
          stringValue + " to Boolean");
    }
  }

  public void addCell(String object) {
    try {
      add(convert(object));
    } catch (NullPointerException e) {
      throw new RuntimeException(name() + ": "
          + String.valueOf(object) + ": "
          + e.getMessage());
    }
  }

  /**
   * Returns the value in row i as a Boolean
   * @param i the row number
   * @return  A Boolean object (may be null)
   */
  public Boolean get(int i) {
    byte b = data.getByte(i);
    if (b == 1) {
      return Boolean.TRUE;
    }
    if (b == 0) {
      return Boolean.FALSE;
    }
    return null;
  }

  /**
   * Returns the value in row i as a byte (0, 1, or Byte.MIN_VALUE representing missing data)
   * @param i the row number
   */
  public byte getByte(int i) {
    return data.getByte(i);
  }

  @Override
  public boolean isEmpty() {
    return data.isEmpty();
  }

  public static BooleanColumn create(String fileName, ByteArrayList bools) {
    BooleanColumn booleanColumn = new BooleanColumn(fileName, bools.size());
    booleanColumn.data.addAll(bools);
    return booleanColumn;
  }

  public int countTrue() {
    int count = 0;
    for (byte b : data) {
      if (b == 1) {
        count++;
      }
    }
    return count;
  }

  public int countFalse() {
    int count = 0;
    for (byte b : data) {
      if (b == 0) {
        count++;
      }
    }
    return count;
  }

  public Selection isFalse() {
    Selection results = new BitmapBackedSelection();
    int i = 0;
    for (byte next : data) {
      if (next == 0) {
        results.add(i);
      }
      i++;
    }
    return results;
  }

  public Selection isTrue() {
    Selection results = new BitmapBackedSelection();
    int i = 0;
    for (byte next : data) {
      if (next == 1) {
        results.add(i);
      }
      i++;
    }
    return results;
  }

  public Selection isEqualTo(BooleanColumn other) {
    Selection results = new BitmapBackedSelection();
    int i = 0;
    ByteIterator booleanIterator = other.byteIterator();
    for (byte next : data) {
      if (next == booleanIterator.nextByte()) {
        results.add(i);
      }
      i++;
    }
    return results;
  }

  /**
   * Returns a ByteArrayList containing 0 (false), 1 (true) or Byte.MIN_VALUE (missing)
   */
  public ByteArrayList data() {
    return data;
  }

  public void set(int i, boolean b) {
    data.set(i, b ? (byte) 1 : (byte) 0);
  }

  @Override
  public IntComparator rowComparator() {
    return comparator;
  }

  @Override
  public void append(Column column) {
    Preconditions.checkArgument(column.type() == this.type());
    BooleanColumn booleanColumn = (BooleanColumn) column;
    for (int i = 0; i < booleanColumn.size(); i++) {
      add(booleanColumn.get(i));
    }
  }

  IntComparator comparator = new IntComparator() {

    @Override
    public int compare(Integer r1, Integer r2) {
      return compare((int) r1, (int) r2);
    }

    @Override
    public int compare(int r1, int r2) {
      boolean f1 = get(r1);
      boolean f2 = get(r2);
      return Boolean.compare(f1, f2);
    }
  };

  // TODO(lwhite): this won't scale
  public String print() {
    StringBuilder builder = new StringBuilder();
    builder.append(title());
    for (byte next : data) {
      if (next == (byte) 0) {
        builder.append(String.valueOf(false));
      } else if (next == (byte) 1) {
        builder.append(String.valueOf(true));
      } else {
        builder.append(String.valueOf("NA"));
      }
      builder.append('\n');
    }
    return builder.toString();
  }

  @Override
  public Selection isMissing() {  //TODO
    return select(isMissing);
  }

  @Override
  public Selection isNotMissing() { //TODO
    return select(isNotMissing);
  }

  public Iterator iterator() {
    return new BooleanColumnIterator(this.byteIterator());
  }

  public ByteIterator byteIterator() {
    return data.iterator();
  }

  @Override
  public String toString() {
    return "Boolean column: " + name();
  }

  public BooleanSet asSet() {
    BooleanSet set = new BooleanOpenHashSet(3);
    BooleanColumn unique = unique();
    for (int i = 0; i < unique.size(); i++) {
      set.add(unique.get(i));
    }
    return set;
  }

  public boolean contains(boolean aBoolean) {
    return data().contains(aBoolean);
  }

  @Override
  public int byteSize() {
    return BYTE_SIZE;
  }

  @Override
  public byte[] asBytes(int row) {
    byte[] result = new byte[1];
    result[0] = (byte) (get(row) ? 1 : 0);
    return result;
  }

  public Selection select(BooleanPredicate predicate) {
    Selection selection = new BitmapBackedSelection();
    for (int idx = 0; idx < data.size(); idx++) {
      byte next = data.getByte(idx);
      if (predicate.test(next)) {
        selection.add(idx);
      }
    }
    return selection;
  }

  public int[] toIntArray() {
    int[] output = new int[data.size()];
    for (int i = 0; i < data.size(); i++) {
      output[i] = data.getByte(i);
    }
    return output;
  }

  public IntColumn toIntColumn() {
    IntColumn intColumn = IntColumn.create(this.name() + ": ints", size());
    ByteArrayList data = data();
    for (int i = 0; i < size(); i++) {
      intColumn.add(data.getByte(i));
    }
    return intColumn;
  }

  static class BooleanColumnIterator implements Iterator {

    final ByteIterator iterator;

    public BooleanColumnIterator(ByteIterator iterator) {
      this.iterator = iterator;
    }

    /**
     * Returns {@code true} if the iteration has more elements.
     * (In other words, returns {@code true} if {@link #next} would
     * return an element rather than throwing an exception.)
     *
     * @return {@code true} if the iteration has more elements
     */
    @Override
    public boolean hasNext() {
      return iterator.hasNext();
    }

    /**
     * Returns the next element in the iteration.
     *
     * @return the next element in the iteration
     * @throws java.util.NoSuchElementException if the iteration has no more elements
     */
    @Override
    public Boolean next() {
      byte b = iterator.next();
      if (b == (byte) 0) {
        return false;
      }
      if (b == (byte) 1) {
        return true;
      }
      return null;
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy