tech.tablesaw.api.DateColumn Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package tech.tablesaw.api;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.ints.IntComparator;
import it.unimi.dsi.fastutil.ints.IntComparators;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import java.nio.ByteBuffer;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
import tech.tablesaw.columns.AbstractColumn;
import tech.tablesaw.columns.AbstractColumnParser;
import tech.tablesaw.columns.Column;
import tech.tablesaw.columns.dates.DateColumnFormatter;
import tech.tablesaw.columns.dates.DateColumnType;
import tech.tablesaw.columns.dates.DateFillers;
import tech.tablesaw.columns.dates.DateFilters;
import tech.tablesaw.columns.dates.DateMapFunctions;
import tech.tablesaw.columns.dates.PackedLocalDate;
import tech.tablesaw.selection.Selection;
/** A column that contains int-encoded local date values */
public class DateColumn extends AbstractColumn
implements DateFilters,
DateFillers,
DateMapFunctions,
CategoricalColumn {
/** The data held in this column in its integer encoding form. See {@link PackedLocalDate} */
protected IntArrayList data;
/** A comparator for the encoded dates. Note that the ints compared are the column indexes */
private final IntComparator comparator =
(r1, r2) -> {
final int f1 = getIntInternal(r1);
int f2 = getIntInternal(r2);
return Integer.compare(f1, f2);
};
/** The {@link tech.tablesaw.columns.ColumnFormatter} for formatting output from this column */
private DateColumnFormatter printFormatter = new DateColumnFormatter();
/** Creates a new DateColumn with the given name. The column is completely empty. */
public static DateColumn create(final String name) {
return new DateColumn(name, new IntArrayList(DEFAULT_ARRAY_SIZE));
}
/**
* Creates a new DateColumn with the given name and integer-encoded data. See {@link
* PackedLocalDate} for details of the encoding
*/
public static DateColumn createInternal(String name, int[] data) {
return new DateColumn(name, new IntArrayList(data));
}
/**
* Creates a new DateColumn with the given name. The column contains {@code initialSize} missing
* values.
*/
public static DateColumn create(final String name, final int initialSize) {
DateColumn column = new DateColumn(name, new IntArrayList(initialSize));
for (int i = 0; i < initialSize; i++) {
column.appendMissing();
}
return column;
}
/** Creates a new DateColumn with the given name and data */
public static DateColumn create(String name, Collection data) {
DateColumn column = new DateColumn(name, new IntArrayList(data.size()));
for (LocalDate date : data) {
column.append(date);
}
return column;
}
/** Creates a new DateColumn with the given name and data */
public static DateColumn create(String name, LocalDate... data) {
DateColumn column = new DateColumn(name, new IntArrayList(data.length));
for (LocalDate date : data) {
column.append(date);
}
return column;
}
/** Creates a new DateColumn with the given name and data */
public static DateColumn create(String name, Stream stream) {
DateColumn column = create(name);
stream.forEach(column::append);
return column;
}
/**
* Creates a new DateColumn with the given name and integer-encoded data. See {@link
* PackedLocalDate} for the details of the encoding scheme
*/
private DateColumn(String name, IntArrayList data) {
super(DateColumnType.instance(), name, DateColumnType.DEFAULT_PARSER);
this.data = data;
}
/** {@inheritDoc} */
@Override
public int size() {
return data.size();
}
/** {@inheritDoc} */
@Override
public DateColumn subset(final int[] rows) {
final DateColumn c = this.emptyCopy();
for (final int row : rows) {
c.appendInternal(getIntInternal(row));
}
return c;
}
public DateColumn appendInternal(int f) {
data.add(f);
return this;
}
public DateColumn set(int index, int value) {
data.set(index, value);
return this;
}
/** {@inheritDoc} */
@Override
public DateColumn set(int index, LocalDate value) {
return value == null ? setMissing(index) : set(index, PackedLocalDate.pack(value));
}
/**
* Creates and sets a printFormatter based-on the given DateTimeFormatter. Missing values will be
* printed as the given missingValueString. Non missing values will be handled by the
* dateTimeFormatter
*/
public void setPrintFormatter(DateTimeFormatter dateTimeFormatter, String missingValueString) {
Preconditions.checkNotNull(dateTimeFormatter);
Preconditions.checkNotNull(missingValueString);
this.printFormatter = new DateColumnFormatter(dateTimeFormatter, missingValueString);
}
/**
* Creates and sets a printFormatter based-on the given DateTimeFormatter. Missing values will be
* printed as empty strings. Non missing values will be handled by the dateTimeFormatter
*/
public void setPrintFormatter(DateTimeFormatter dateTimeFormatter) {
Preconditions.checkNotNull(dateTimeFormatter);
this.printFormatter = new DateColumnFormatter(dateTimeFormatter);
}
/** Sets the print formatter to the argument */
public void setPrintFormatter(@Nonnull DateColumnFormatter dateColumnFormatter) {
Preconditions.checkNotNull(dateColumnFormatter);
this.printFormatter = dateColumnFormatter;
}
/** {@inheritDoc} */
@Override
public String getString(int row) {
return printFormatter.format(getPackedDate(row));
}
/** {@inheritDoc} */
@Override
public String getUnformattedString(int row) {
return PackedLocalDate.toDateString(getPackedDate(row));
}
/** {@inheritDoc} */
@Override
public DateColumn emptyCopy() {
DateColumn empty = create(name());
empty.printFormatter = printFormatter;
return empty;
}
/** {@inheritDoc} */
@Override
public DateColumn emptyCopy(int rowSize) {
DateColumn copy = create(name(), rowSize);
copy.printFormatter = printFormatter;
return copy;
}
/** {@inheritDoc} */
@Override
public DateColumn copy() {
DateColumn copy = emptyCopy(data.size());
copy.data = data.clone();
copy.printFormatter = printFormatter;
return copy;
}
/** {@inheritDoc} */
@Override
public void clear() {
data.clear();
}
/** {@inheritDoc} */
@Override
public DateColumn lead(int n) {
DateColumn column = lag(-n);
column.setName(name() + " lead(" + n + ")");
return column;
}
/** {@inheritDoc} */
@Override
public DateColumn lag(int n) {
int srcPos = n >= 0 ? 0 : -n;
int[] dest = new int[size()];
int destPos = Math.max(n, 0);
int length = n >= 0 ? size() - n : size() + n;
for (int i = 0; i < size(); i++) {
dest[i] = DateColumnType.missingValueIndicator();
}
System.arraycopy(data.toIntArray(), srcPos, dest, destPos, length);
DateColumn copy = emptyCopy(size());
copy.data = new IntArrayList(dest);
copy.setName(name() + " lag(" + n + ")");
return copy;
}
/** {@inheritDoc} */
@Override
public void sortAscending() {
data.sort(IntComparators.NATURAL_COMPARATOR);
}
/** {@inheritDoc} */
@Override
public void sortDescending() {
data.sort(IntComparators.OPPOSITE_COMPARATOR);
}
/** {@inheritDoc} */
@Override
public int countUnique() {
IntSet ints = new IntOpenHashSet(size());
for (int i = 0; i < size(); i++) {
ints.add(data.getInt(i));
}
return ints.size();
}
/** {@inheritDoc} */
@Override
public DateColumn unique() {
IntSet ints = new IntOpenHashSet(data.size());
for (int i = 0; i < size(); i++) {
ints.add(data.getInt(i));
}
DateColumn copy = emptyCopy(ints.size());
copy.setName(name() + " Unique values");
copy.data = IntArrayList.wrap(ints.toIntArray());
return copy;
}
/** {@inheritDoc} */
@Override
public DateColumn append(final Column column) {
checkArgument(
column.type() == this.type(),
"Column '%s' has type %s, but column '%s' has type %s.",
name(),
type(),
column.name(),
column.type());
DateColumn dateColumn = (DateColumn) column;
final int size = dateColumn.size();
for (int i = 0; i < size; i++) {
appendInternal(dateColumn.getPackedDate(i));
}
return this;
}
/** {@inheritDoc} */
@Override
public DateColumn append(Column column, int row) {
checkArgument(
column.type() == this.type(),
"Column '%s' has type %s, but column '%s' has type %s.",
name(),
type(),
column.name(),
column.type());
return appendInternal(((DateColumn) column).getIntInternal(row));
}
/** {@inheritDoc} */
@Override
public DateColumn set(int row, Column column, int sourceRow) {
checkArgument(
column.type() == this.type(),
"Column '%s' has type %s, but column '%s' has type %s.",
name(),
type(),
column.name(),
column.type());
return set(row, ((DateColumn) column).getIntInternal(sourceRow));
}
/** {@inheritDoc} */
@Override
public LocalDate max() {
if (isEmpty()) {
return null;
}
Integer max = null;
for (int aData : data) {
if (DateColumnType.missingValueIndicator() != aData) {
if (max == null) {
max = aData;
} else {
max = (max > aData) ? max : aData;
}
}
}
if (max == null) {
return null;
}
return PackedLocalDate.asLocalDate(max);
}
/** {@inheritDoc} */
@Override
public LocalDate min() {
if (isEmpty()) {
return null;
}
Integer min = null;
for (int aData : data) {
if (DateColumnType.missingValueIndicator() != aData) {
if (min == null) {
min = aData;
} else {
min = (min < aData) ? min : aData;
}
}
}
if (min == null) {
return null;
}
return PackedLocalDate.asLocalDate(min);
}
/**
* Conditionally update this column, replacing current values with newValue for all rows where the
* current value matches the selection criteria
*
* Example: myColumn.set(myColumn.valueIsMissing(), LocalDate.now()); // no more missing values
*/
@Override
public DateColumn set(Selection rowSelection, LocalDate newValue) {
int packed = PackedLocalDate.pack(newValue);
for (int row : rowSelection) {
set(row, packed);
}
return this;
}
/** {@inheritDoc} */
@Override
public DateColumn appendMissing() {
appendInternal(DateColumnType.missingValueIndicator());
return this;
}
/** {@inheritDoc} */
@Override
public LocalDate get(int index) {
return PackedLocalDate.asLocalDate(getPackedDate(index));
}
/** {@inheritDoc} */
@Override
public boolean isEmpty() {
return data.isEmpty();
}
/** {@inheritDoc} */
@Override
public IntComparator rowComparator() {
return comparator;
}
/** {@inheritDoc} */
@Override
public DateColumn append(LocalDate value) {
return this.appendInternal(PackedLocalDate.pack(value));
}
/** {@inheritDoc} */
@Override
public DateColumn appendObj(Object obj) {
if (obj == null) {
return appendMissing();
}
if (obj instanceof java.sql.Date) {
return append(((java.sql.Date) obj).toLocalDate());
}
if (obj instanceof LocalDate) {
return append((LocalDate) obj);
}
throw new IllegalArgumentException(
"Cannot append " + obj.getClass().getName() + " to DateColumn");
}
/** {@inheritDoc} */
@Override
public DateColumn appendCell(String string) {
return appendInternal(PackedLocalDate.pack(parser().parse(string)));
}
/** {@inheritDoc} */
@Override
public DateColumn appendCell(String string, AbstractColumnParser> parser) {
return appendObj(parser.parse(string));
}
/** {@inheritDoc} */
@Override
public int getIntInternal(int index) {
return data.getInt(index);
}
protected int getPackedDate(int index) {
return getIntInternal(index);
}
/**
* Returns a table of dates and the number of observations of those dates
*
* @return the summary table
*/
@Override
public Table summary() {
Table table = Table.create("Column: " + name());
StringColumn measure = StringColumn.create("Measure");
StringColumn value = StringColumn.create("Value");
table.addColumns(measure);
table.addColumns(value);
measure.append("Count");
value.append(String.valueOf(size()));
measure.append("Missing");
value.append(String.valueOf(countMissing()));
measure.append("Earliest");
value.append(String.valueOf(min()));
measure.append("Latest");
value.append(String.valueOf(max()));
return table;
}
public static boolean valueIsMissing(int i) {
return DateColumnType.valueIsMissing(i);
}
/** Returns the count of missing values in this column */
@Override
public int countMissing() {
int count = 0;
for (int i = 0; i < size(); i++) {
if (getPackedDate(i) == DateColumnType.missingValueIndicator()) {
count++;
}
}
return count;
}
/**
* Returns the largest ("top") n values in the column
*
* @param n The maximum number of records to return. The actual number will be smaller if n is
* greater than the number of observations in the column
* @return A list, possibly empty, of the largest observations
*/
public List top(int n) {
List top = new ArrayList<>();
int[] values = data.toIntArray();
IntArrays.parallelQuickSort(values, IntComparators.OPPOSITE_COMPARATOR);
for (int i = 0; i < n && i < values.length; i++) {
top.add(PackedLocalDate.asLocalDate(values[i]));
}
return top;
}
/**
* Returns the smallest ("bottom") n values in the column
*
* @param n The maximum number of records to return. The actual number will be smaller if n is
* greater than the number of observations in the column
* @return A list, possibly empty, of the smallest n observations
*/
public List bottom(int n) {
List bottom = new ArrayList<>();
int[] values = data.toIntArray();
IntArrays.parallelQuickSort(values);
for (int i = 0; i < n && i < values.length; i++) {
bottom.add(PackedLocalDate.asLocalDate(values[i]));
}
return bottom;
}
/** {@inheritDoc} */
@Override
public IntIterator intIterator() {
return data.iterator();
}
/** {@inheritDoc} */
@Override
public DateColumn removeMissing() {
DateColumn noMissing = emptyCopy();
IntIterator iterator = intIterator();
while (iterator.hasNext()) {
int i = iterator.nextInt();
if (!valueIsMissing(i)) {
noMissing.appendInternal(i);
}
}
return noMissing;
}
/** {@inheritDoc} */
@Override
public List asList() {
List dates = new ArrayList<>(size());
for (LocalDate localDate : this) {
dates.add(localDate);
}
return dates;
}
/** {@inheritDoc} */
@Override
public DateColumn where(Selection selection) {
return subset(selection.toArray());
}
/** {@inheritDoc} */
public Set asSet() {
Set dates = new HashSet<>();
DateColumn unique = unique();
for (LocalDate d : unique) {
dates.add(d);
}
return dates;
}
/** {@inheritDoc} */
@Override
public boolean contains(LocalDate localDate) {
int date = PackedLocalDate.pack(localDate);
return data.contains(date);
}
/** {@inheritDoc} */
@Override
public DateColumn setMissing(int i) {
return set(i, DateColumnType.missingValueIndicator());
}
public double[] asDoubleArray() {
double[] doubles = new double[size()];
for (int i = 0; i < size(); i++) {
doubles[i] = data.getInt(i);
}
return doubles;
}
public DoubleColumn asDoubleColumn() {
return DoubleColumn.create(name(), asDoubleArray());
}
/** {@inheritDoc} */
@Override
public boolean isMissing(int rowNumber) {
return valueIsMissing(getIntInternal(rowNumber));
}
public double getDouble(int i) {
return getIntInternal(i);
}
/** {@inheritDoc} */
@Override
public int byteSize() {
return type().byteSize();
}
/** {@inheritDoc} */
@Override
public int valueHash(int rowNumber) {
return getIntInternal(rowNumber);
}
/** {@inheritDoc} */
@Override
public boolean equals(int rowNumber1, int rowNumber2) {
return getIntInternal(rowNumber1) == getIntInternal(rowNumber2);
}
/**
* Returns the contents of the cell at rowNumber as a byte[]
*
* @param rowNumber the number of the row as int
*/
@Override
public byte[] asBytes(int rowNumber) {
return ByteBuffer.allocate(byteSize()).putInt(getPackedDate(rowNumber)).array();
}
/**
* Returns an iterator over elements of type {@code T}.
*
* @return an Iterator.
*/
@Override
public Iterator iterator() {
return new Iterator() {
final IntIterator intIterator = intIterator();
@Override
public boolean hasNext() {
return intIterator.hasNext();
}
@Override
public LocalDate next() {
return PackedLocalDate.asLocalDate(intIterator.nextInt());
}
};
}
// fillWith methods
private DateColumn fillWith(
int count, Iterator iterator, Consumer acceptor) {
for (int r = 0; r < count; r++) {
if (!iterator.hasNext()) {
break;
}
acceptor.accept(iterator.next());
}
return this;
}
/** {@inheritDoc} */
@Override
public DateColumn fillWith(Iterator iterator) {
int[] r = new int[1];
fillWith(size(), iterator, date -> set(r[0]++, date));
return this;
}
private DateColumn fillWith(
int count, Iterable iterable, Consumer acceptor) {
Iterator iterator = iterable.iterator();
for (int r = 0; r < count; r++) {
if (!iterator.hasNext()) {
iterator = iterable.iterator();
if (!iterator.hasNext()) {
break;
}
}
acceptor.accept(iterator.next());
}
return this;
}
/** {@inheritDoc} */
@Override
public DateColumn fillWith(Iterable iterable) {
int[] r = new int[1];
fillWith(size(), iterable, date -> set(r[0]++, date));
return this;
}
private DateColumn fillWith(
int count, Supplier supplier, Consumer acceptor) {
for (int r = 0; r < count; r++) {
try {
acceptor.accept(supplier.get());
} catch (Exception e) {
break;
}
}
return this;
}
/** {@inheritDoc} */
@Override
public DateColumn fillWith(Supplier supplier) {
int[] r = new int[1];
fillWith(size(), supplier, date -> set(r[0]++, date));
return this;
}
/** {@inheritDoc} */
@Override
public LocalDate[] asObjectArray() {
final LocalDate[] output = new LocalDate[data.size()];
for (int i = 0; i < data.size(); i++) {
output[i] = get(i);
}
return output;
}
/** {@inheritDoc} */
@Override
public int compare(LocalDate o1, LocalDate o2) {
return o1.compareTo(o2);
}
}