All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.api.InstantColumn Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package tech.tablesaw.api;

import static tech.tablesaw.columns.temporal.TemporalPredicates.isMissing;
import static tech.tablesaw.columns.temporal.TemporalPredicates.isNotMissing;

import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntComparator;
import it.unimi.dsi.fastutil.longs.LongArrayList;
import it.unimi.dsi.fastutil.longs.LongArrays;
import it.unimi.dsi.fastutil.longs.LongComparators;
import it.unimi.dsi.fastutil.longs.LongIterator;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Stream;
import tech.tablesaw.columns.AbstractColumn;
import tech.tablesaw.columns.AbstractColumnParser;
import tech.tablesaw.columns.Column;
import tech.tablesaw.columns.instant.InstantColumnFormatter;
import tech.tablesaw.columns.instant.InstantColumnType;
import tech.tablesaw.columns.instant.InstantMapFunctions;
import tech.tablesaw.columns.instant.PackedInstant;
import tech.tablesaw.columns.temporal.TemporalFillers;
import tech.tablesaw.columns.temporal.TemporalFilters;
import tech.tablesaw.selection.Selection;

/**
 * A column that contains long-integer encoded (packed) instant values. An instant is a unique point
 * of time on the timeline. The instants held by Instant column have millisecond precision, unlike
 * instances of {@link java.time.Instant}, which have nanosecond precision
 */
public class InstantColumn extends AbstractColumn
    implements InstantMapFunctions,
        TemporalFillers,
        TemporalFilters,
        CategoricalColumn {

  protected LongArrayList data;

  /** {@inheritDoc} */
  @Override
  public int valueHash(int rowNumber) {
    return Long.hashCode(getLongInternal(rowNumber));
  }

  /** {@inheritDoc} */
  @Override
  public boolean equals(int rowNumber1, int rowNumber2) {
    return getLongInternal(rowNumber1) == getLongInternal(rowNumber2);
  }

  private final IntComparator comparator =
      (r1, r2) -> {
        long f1 = getPackedDateTime(r1);
        long f2 = getPackedDateTime(r2);
        return Long.compare(f1, f2);
      };

  private InstantColumnFormatter printFormatter = new InstantColumnFormatter();

  private InstantColumn(String name, LongArrayList data) {
    super(InstantColumnType.instance(), name, InstantColumnType.DEFAULT_PARSER);
    this.data = data;
  }

  public static InstantColumn create(String name) {
    return new InstantColumn(name, new LongArrayList(DEFAULT_ARRAY_SIZE));
  }

  /**
   * For internal Tablesaw use only Returns a new column with the given name and data
   *
   * @param name The column name
   * @param data An array of longs representing Instant values in Tablesaw internal format
   */
  public static InstantColumn createInternal(String name, long[] data) {
    return new InstantColumn(name, new LongArrayList(data));
  }

  public static InstantColumn create(String name, int initialSize) {
    InstantColumn column = new InstantColumn(name, new LongArrayList(initialSize));
    for (int i = 0; i < initialSize; i++) {
      column.appendMissing();
    }
    return column;
  }

  public static InstantColumn create(String name, Collection data) {
    InstantColumn column = new InstantColumn(name, new LongArrayList(data.size()));
    for (Instant date : data) {
      column.append(date);
    }
    return column;
  }

  public static InstantColumn create(String name, Instant... data) {
    InstantColumn column = new InstantColumn(name, new LongArrayList(data.length));
    for (Instant date : data) {
      column.append(date);
    }
    return column;
  }

  public static InstantColumn create(String name, Stream stream) {
    InstantColumn column = create(name);
    stream.forEach(column::append);
    return column;
  }

  public static boolean valueIsMissing(long value) {
    return InstantColumnType.valueIsMissing(value);
  }

  /** {@inheritDoc} */
  @Override
  public boolean isMissing(int rowNumber) {
    return valueIsMissing(getLongInternal(rowNumber));
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn plus(long amountToAdd, ChronoUnit unit) {
    InstantColumn newColumn = emptyCopy();
    newColumn.setName(temporalColumnName(this, amountToAdd, unit));
    InstantColumn column1 = this;

    for (int r = 0; r < column1.size(); r++) {
      long packedDateTime = column1.getLongInternal(r);
      if (packedDateTime == InstantColumnType.missingValueIndicator()) {
        newColumn.appendMissing();
      } else {
        newColumn.appendInternal(PackedInstant.plus(packedDateTime, amountToAdd, unit));
      }
    }
    return newColumn;
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn subset(final int[] rows) {
    final InstantColumn c = this.emptyCopy();
    for (final int row : rows) {
      c.appendInternal(getLongInternal(row));
    }
    return c;
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn removeMissing() {
    InstantColumn noMissing = emptyCopy();
    LongIterator iterator = longIterator();
    while (iterator.hasNext()) {
      long i = iterator.nextLong();
      if (!valueIsMissing(i)) {
        noMissing.appendInternal(i);
      }
    }
    return noMissing;
  }

  /** {@inheritDoc} */
  @Override
  public boolean contains(Instant dateTime) {
    long dt = PackedInstant.pack(dateTime);
    return data.contains(dt);
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn setMissing(int i) {
    return set(i, InstantColumnType.missingValueIndicator());
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn where(Selection selection) {
    return subset(selection.toArray());
  }

  /**
   * Sets the print formatter to the argument. The print formatter is used in pretty-printing and
   * optionally for writing to text files like CSVs
   */
  public void setPrintFormatter(InstantColumnFormatter formatter) {
    Preconditions.checkNotNull(formatter);
    this.printFormatter = formatter;
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn lag(int n) {
    int srcPos = n >= 0 ? 0 : -n;
    long[] dest = new long[size()];
    int destPos = Math.max(n, 0);
    int length = n >= 0 ? size() - n : size() + n;

    for (int i = 0; i < size(); i++) {
      dest[i] = InstantColumnType.missingValueIndicator();
    }

    System.arraycopy(data.toLongArray(), srcPos, dest, destPos, length);

    InstantColumn copy = emptyCopy(size());
    copy.data = new LongArrayList(dest);
    copy.setName(name() + " lag(" + n + ")");
    return copy;
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn appendCell(String stringValue) {
    return appendInternal(PackedInstant.pack(parser().parse(stringValue)));
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn appendCell(String stringValue, AbstractColumnParser parser) {
    return appendObj(parser.parse(stringValue));
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn append(Instant dateTime) {
    if (dateTime != null) {
      final long dt = PackedInstant.pack(dateTime);
      appendInternal(dt);
    } else {
      appendInternal(InstantColumnType.missingValueIndicator());
    }
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn appendObj(Object obj) {
    if (obj == null) {
      return appendMissing();
    }
    if (obj instanceof Instant) {
      return append((Instant) obj);
    }
    if (obj instanceof Timestamp) {
      Timestamp timestamp = (Timestamp) obj;
      return append(timestamp.toInstant());
    }
    throw new IllegalArgumentException(
        "Cannot append " + obj.getClass().getName() + " to DateTimeColumn");
  }

  /** {@inheritDoc} */
  @Override
  public int size() {
    return data.size();
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn appendInternal(long dateTime) {
    data.add(dateTime);
    return this;
  }

  /** {@inheritDoc} */
  @Override
  public String getString(int row) {
    return printFormatter.format(getPackedDateTime(row));
  }

  /** {@inheritDoc} */
  @Override
  public String getUnformattedString(int row) {
    return PackedInstant.toString(getPackedDateTime(row));
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn emptyCopy() {
    InstantColumn empty = create(name());
    empty.printFormatter = printFormatter;
    return empty;
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn emptyCopy(int rowSize) {
    InstantColumn column = create(name(), rowSize);
    column.setPrintFormatter(printFormatter);
    return column;
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn copy() {
    InstantColumn column = emptyCopy(data.size());
    column.data = data.clone();
    column.printFormatter = this.printFormatter;
    return column;
  }

  /** {@inheritDoc} */
  @Override
  public void clear() {
    data.clear();
  }

  /** {@inheritDoc} */
  @Override
  public void sortAscending() {
    data.sort(LongComparators.NATURAL_COMPARATOR);
  }

  /** {@inheritDoc} */
  @Override
  public void sortDescending() {
    data.sort(LongComparators.OPPOSITE_COMPARATOR);
  }

  /** {@inheritDoc} */
  @Override
  public Table summary() {
    Table table = Table.create("Column: " + name());
    StringColumn measure = StringColumn.create("Measure");
    StringColumn value = StringColumn.create("Value");
    table.addColumns(measure);
    table.addColumns(value);

    measure.append("Count");
    value.append(String.valueOf(size()));

    measure.append("Missing");
    value.append(String.valueOf(countMissing()));

    measure.append("Earliest");
    value.append(String.valueOf(min()));

    measure.append("Latest");
    value.append(String.valueOf(max()));

    return table;
  }

  /** {@inheritDoc} */
  @Override
  public int countUnique() {
    LongSet ints = new LongOpenHashSet(data.size());
    for (long i : data) {
      ints.add(i);
    }
    return ints.size();
  }

  /** {@inheritDoc} */
  @Override
  public InstantColumn unique() {
    LongSet ints = new LongOpenHashSet(data.size());
    for (long i : data) {
      ints.add(i);
    }
    InstantColumn column = emptyCopy(ints.size());
    column.setName(name() + " Unique values");
    column.data = LongArrayList.wrap(ints.toLongArray());
    return column;
  }

  /** {@inheritDoc} */
  @Override
  public boolean isEmpty() {
    return data.isEmpty();
  }

  /** {@inheritDoc} */
  @Override
  public long getLongInternal(int index) {
    return data.getLong(index);
  }

  // TODO: Name?
  protected long getPackedDateTime(int index) {
    return getLongInternal(index);
  }

  /** {@inheritDoc} */
  @Override
  public Instant get(int index) {
    return PackedInstant.asInstant(getPackedDateTime(index));
  }

  /** {@inheritDoc} */
  @Override
  public IntComparator rowComparator() {
    return comparator;
  }

  /**
   * Conditionally update this column, replacing current values with newValue for all rows where the
   * current value matches the selection criteria
   *
   * 

Example: myColumn.set(myColumn.valueIsMissing(), Instant.now()); // no more missing values */ @Override public InstantColumn set(Selection rowSelection, Instant newValue) { for (int row : rowSelection) { set(row, newValue); } return this; } /** Returns the count of missing values in this column */ @Override public int countMissing() { int count = 0; for (int i = 0; i < size(); i++) { if (getPackedDateTime(i) == InstantColumnType.missingValueIndicator()) { count++; } } return count; } /** * Returns an array where each entry is the difference, measured in seconds, between the Instant * and midnight, January 1, 1970 UTC. * *

If a value is missing, InstantColumnType.missingValueIndicator() is used */ public long[] asEpochSecondArray() { return asEpochSecondArray(ZoneOffset.UTC); } /** * Returns the seconds from epoch for each value as an array based on the given offset * *

If a value is missing, InstantColumnType.missingValueIndicator() is used */ public long[] asEpochSecondArray(ZoneOffset offset) { long[] output = new long[data.size()]; for (int i = 0; i < data.size(); i++) { Instant instant = PackedInstant.asInstant(data.getLong(i)); if (instant == null) { output[i] = InstantColumnType.missingValueIndicator(); } else { output[i] = instant.getEpochSecond(); } } return output; } /** * Returns an array where each entry is the difference, measured in milliseconds, between the * Instant and midnight, January 1, 1970 UTC. * *

If a missing value is encountered, InstantColumnType.missingValueIndicator() is inserted in * the array */ public long[] asEpochMillisArray() { return asEpochMillisArray(ZoneOffset.UTC); } /** * Returns an array where each entry is the difference, measured in milliseconds, between the * Instant and midnight, January 1, 1970 UTC. * *

If a missing value is encountered, InstantColumnType.missingValueIndicator() is inserted in * the array */ public long[] asEpochMillisArray(ZoneOffset offset) { long[] output = new long[data.size()]; for (int i = 0; i < data.size(); i++) { Instant instant = PackedInstant.asInstant(data.getLong(i)); if (instant == null) { output[i] = InstantColumnType.missingValueIndicator(); } else { output[i] = instant.toEpochMilli(); } } return output; } /** * Returns a DateTimeColumn where each element is a representation of the associated Instant * translated using UTC as the timezone */ public DateTimeColumn asLocalDateTimeColumn() { return asLocalDateTimeColumn(ZoneOffset.UTC); } /** * Returns a DateTimeColumn where each element is a representation of the associated Instant * translated using the argument as the timezone */ public DateTimeColumn asLocalDateTimeColumn(ZoneId zone) { LocalDateTime[] output = new LocalDateTime[data.size()]; for (int i = 0; i < data.size(); i++) { Instant instant = PackedInstant.asInstant(data.getLong(i)); if (instant == null) { output[i] = null; } else { output[i] = LocalDateTime.ofInstant(instant, zone); } } return DateTimeColumn.create(name(), output); } /** {@inheritDoc} */ @Override public InstantColumn append(Column column) { Preconditions.checkArgument( column.type() == this.type(), "Column '%s' has type %s, but column '%s' has type %s.", name(), type(), column.name(), column.type()); InstantColumn dateTimeColumn = (InstantColumn) column; final int size = dateTimeColumn.size(); for (int i = 0; i < size; i++) { append(dateTimeColumn.get(i)); } return this; } /** {@inheritDoc} */ @Override public InstantColumn append(Column column, int row) { Preconditions.checkArgument( column.type() == this.type(), "Column '%s' has type %s, but column '%s' has type %s.", name(), type(), column.name(), column.type()); return appendInternal(((InstantColumn) column).getLongInternal(row)); } /** {@inheritDoc} */ @Override public InstantColumn set(int row, Column column, int sourceRow) { Preconditions.checkArgument( column.type() == this.type(), "Column '%s' has type %s, but column '%s' has type %s.", name(), type(), column.name(), column.type()); return set(row, ((InstantColumn) column).getLongInternal(sourceRow)); } /** Returns the largest instant value in the column */ public Instant max() { if (isEmpty()) { return null; } long max = Long.MIN_VALUE; boolean allMissing = true; for (long aData : data) { if (InstantColumnType.missingValueIndicator() != aData) { max = Math.max(max, aData); allMissing = false; } } if (allMissing) { return null; } return PackedInstant.asInstant(max); } /** {@inheritDoc} */ @Override public InstantColumn appendMissing() { appendInternal(InstantColumnType.missingValueIndicator()); return this; } /** {@inheritDoc} */ @Override public Instant min() { if (isEmpty()) { return null; } long min = Long.MAX_VALUE; boolean allMissing = true; for (long aData : data) { if (InstantColumnType.missingValueIndicator() != aData) { min = Math.min(min, aData); allMissing = false; } } if (allMissing) { return null; } return PackedInstant.asInstant(min); } public InstantColumn set(int index, long value) { data.set(index, value); return this; } /** {@inheritDoc} */ @Override public InstantColumn set(int index, Instant value) { return value == null ? setMissing(index) : set(index, PackedInstant.pack(value)); } /** * Returns the largest ("top") n values in the column * * @param n The maximum number of records to return. The actual number will be smaller if n is * greater than the number of observations in the column * @return A list, possibly empty, of the largest observations */ public List top(int n) { List top = new ArrayList<>(); long[] values = data.toLongArray(); LongArrays.parallelQuickSort(values, LongComparators.OPPOSITE_COMPARATOR); for (int i = 0; i < n && i < values.length; i++) { top.add(PackedInstant.asInstant(values[i])); } return top; } /** * Returns the smallest ("bottom") n values in the column * * @param n The maximum number of records to return. The actual number will be smaller if n is * greater than the number of observations in the column * @return A list, possibly empty, of the smallest n observations */ public List bottom(int n) { List bottom = new ArrayList<>(); long[] values = data.toLongArray(); LongArrays.parallelQuickSort(values); for (int i = 0; i < n && i < values.length; i++) { bottom.add(PackedInstant.asInstant(values[i])); } return bottom; } /** Returns an iterator over the long representations of the instants in this column */ public LongIterator longIterator() { return data.iterator(); } /** {@inheritDoc} */ @Override public Set asSet() { Set times = new HashSet<>(); InstantColumn unique = unique(); for (Instant Instant : unique) { times.add(Instant); } return times; } /** {@inheritDoc} */ @Override public int byteSize() { return type().byteSize(); } /** Returns the contents of the cell at rowNumber as a byte[] */ @Override public byte[] asBytes(int rowNumber) { return ByteBuffer.allocate(byteSize()).putLong(getPackedDateTime(rowNumber)).array(); } public double getDouble(int i) { return getPackedDateTime(i); } public double[] asDoubleArray() { double[] doubles = new double[size()]; long[] millis = asEpochSecondArray(); for (int i = 0; i < millis.length; i++) { doubles[i] = millis[i]; } return doubles; } public DoubleColumn asDoubleColumn() { return DoubleColumn.create(name(), asEpochSecondArray()); } /** * Returns an iterator over elements of type {@code T}. * * @return an Iterator. */ @Override public Iterator iterator() { return new Iterator() { final LongIterator longIterator = longIterator(); @Override public boolean hasNext() { return longIterator.hasNext(); } @Override public Instant next() { return PackedInstant.asInstant(longIterator.nextLong()); } }; } // fillWith methods private InstantColumn fillWith( int count, Iterator iterator, Consumer acceptor) { for (int r = 0; r < count; r++) { if (!iterator.hasNext()) { break; } acceptor.accept(iterator.next()); } return this; } /** {@inheritDoc} */ @Override public InstantColumn fillWith(Iterator iterator) { int[] r = new int[1]; fillWith(size(), iterator, date -> set(r[0]++, date)); return this; } private InstantColumn fillWith( int count, Iterable iterable, Consumer acceptor) { Iterator iterator = iterable.iterator(); for (int r = 0; r < count; r++) { if (!iterator.hasNext()) { iterator = iterable.iterator(); if (!iterator.hasNext()) { break; } } acceptor.accept(iterator.next()); } return this; } /** {@inheritDoc} */ @Override public InstantColumn fillWith(Iterable iterable) { int[] r = new int[1]; fillWith(size(), iterable, date -> set(r[0]++, date)); return this; } private InstantColumn fillWith( int count, Supplier supplier, Consumer acceptor) { for (int r = 0; r < count; r++) { try { acceptor.accept(supplier.get()); } catch (Exception e) { break; } } return this; } /** {@inheritDoc} */ @Override public InstantColumn fillWith(Supplier supplier) { int[] r = new int[1]; fillWith(size(), supplier, date -> set(r[0]++, date)); return this; } /** {@inheritDoc} */ @Override public Instant[] asObjectArray() { final Instant[] output = new Instant[data.size()]; for (int i = 0; i < data.size(); i++) { output[i] = get(i); } return output; } /** {@inheritDoc} */ @Override public int compare(Instant o1, Instant o2) { return o1.compareTo(o2); } /** {@inheritDoc} */ @Override public Selection isMissing() { return eval(isMissing); } /** {@inheritDoc} */ @Override public Selection isNotMissing() { return eval(isNotMissing); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy