
org.numenta.nupic.encoders.DateEncoder Maven / Gradle / Ivy
/* ---------------------------------------------------------------------
* Numenta Platform for Intelligent Computing (NuPIC)
* Copyright (C) 2014, Numenta, Inc. Unless you have an agreement
* with Numenta, Inc., for a separate license for this software code, the
* following terms and conditions apply:
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses.
*
* http://numenta.org/licenses/
* ---------------------------------------------------------------------
*/
package org.numenta.nupic.encoders;
import gnu.trove.list.TDoubleList;
import gnu.trove.list.TIntList;
import gnu.trove.list.array.TDoubleArrayList;
import gnu.trove.list.array.TIntArrayList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.joda.time.Interval;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.numenta.nupic.util.Tuple;
/**
* DOCUMENTATION TAKEN DIRECTLY FROM THE PYTHON VERSION:
*
* A date encoder encodes a date according to encoding parameters specified in its constructor.
*
* The input to a date encoder is a datetime.datetime object. The output is
* the concatenation of several sub-encodings, each of which encodes a different
* aspect of the date. Which sub-encodings are present, and details of those
* sub-encodings, are specified in the DateEncoder constructor.
*
* Each parameter describes one attribute to encode. By default, the attribute
* is not encoded.
*
* season (season of the year; units = day):
* (int) width of attribute; default radius = 91.5 days (1 season)
* (tuple) season[0] = width; season[1] = radius
*
* dayOfWeek (monday = 0; units = day)
* (int) width of attribute; default radius = 1 day
* (tuple) dayOfWeek[0] = width; dayOfWeek[1] = radius
*
* weekend (boolean: 0, 1)
* (int) width of attribute
*
* holiday (boolean: 0, 1)
* (int) width of attribute
*
* timeOfday (midnight = 0; units = hour)
* (int) width of attribute: default radius = 4 hours
* (tuple) timeOfDay[0] = width; timeOfDay[1] = radius
*
* customDays TODO: what is it?
*
* forced (default True) : if True, skip checks for parameters' settings; see {@code ScalarEncoders} for details
*
* @author utensil
*
* TODO Improve the document:
*
* - improve wording on unspecified attributes: "Each parameter describes one extra attribute(other than the datetime
* object itself) to encode. By default, the unspecified attributes are not encoded."
* - refer to DateEncoder::Builder, which where these parameters are defined.
* - explain customDays here and at Python version
*/
public class DateEncoder extends Encoder {
protected int width;
//See DateEncoder.Builder for default values.
protected Tuple season;
protected ScalarEncoder seasonEncoder;
protected Tuple dayOfWeek;
protected ScalarEncoder dayOfWeekEncoder;
protected Tuple weekend;
protected ScalarEncoder weekendEncoder;
protected Tuple customDays;
protected ScalarEncoder customDaysEncoder;
protected Tuple holiday;
protected ScalarEncoder holidayEncoder;
protected Tuple timeOfDay;
protected ScalarEncoder timeOfDayEncoder;
protected List customDaysList = new ArrayList<>();
// Currently the only holiday we know about is December 25
// holidays is a list of holidays that occur on a fixed date every year
protected List holidaysList = Arrays.asList(new Tuple(12, 25));
//////////////// Convenience DateTime Formats ////////////////////
public static DateTimeFormatter FULL_DATE_TIME_ZONE = DateTimeFormat.forPattern("YYYY/MM/dd HH:mm:ss.SSSz");
public static DateTimeFormatter FULL_DATE_TIME = DateTimeFormat.forPattern("YYYY/MM/dd HH:mm:ss.SSS");
public static DateTimeFormatter RELAXED_DATE_TIME = DateTimeFormat.forPattern("YYYY/MM/dd HH:mm:ss");
public static DateTimeFormatter LOOSE_DATE_TIME = DateTimeFormat.forPattern("MM/dd/YY HH:mm");
public static DateTimeFormatter FULL_DATE = DateTimeFormat.forPattern("YYYY/MM/dd");
public static DateTimeFormatter FULL_TIME_ZONE = DateTimeFormat.forPattern("HH:mm:ss.SSSz");
public static DateTimeFormatter FULL_TIME_MILLIS = DateTimeFormat.forPattern("HH:mm:ss.SSS");
public static DateTimeFormatter FULL_TIME_SECS = DateTimeFormat.forPattern("HH:mm:ss");
public static DateTimeFormatter FULL_TIME_MINS = DateTimeFormat.forPattern("HH:mm");
protected DateTimeFormatter customFormatter;
/**
* Constructs a new {@code DateEncoder}
*
* Package private to encourage construction using the Builder Pattern
* but still allow inheritance.
*/
DateEncoder() {
}
/**
* Returns a builder for building DateEncoder.
* This builder may be reused to produce multiple builders
*
* @return a {@code DateEncoder.Builder}
*/
public static DateEncoder.Builder builder() {
return new DateEncoder.Builder();
}
/**
* Init the {@code DateEncoder} with parameters
*/
@SuppressWarnings("unchecked")
public void init() {
width = 0;
// Because most of the ScalarEncoder fields have less than 21 bits(recommended in
// ScalarEncoder.checkReasonableSettings), so for now we set forced to be true to
// override.
// TODO figure out how to remove this
setForced(true);
// Note: The order of adding encoders matters, must be in the following
// season, dayOfWeek, weekend, customDays, holiday, timeOfDay
if(isValidEncoderPropertyTuple(season)) {
seasonEncoder = ScalarEncoder.builder()
.w((int) season.get(0))
.radius((double) season.get(1))
.minVal(0)
.maxVal(366)
.periodic(true)
.name("season")
.forced(this.isForced())
.build();
addChildEncoder(seasonEncoder);
}
if(isValidEncoderPropertyTuple(dayOfWeek)) {
dayOfWeekEncoder = ScalarEncoder.builder()
.w((int) dayOfWeek.get(0))
.radius((double) dayOfWeek.get(1))
.minVal(0)
.maxVal(7)
.periodic(true)
.name("day of week")
.forced(this.isForced())
.build();
addChildEncoder(dayOfWeekEncoder);
}
if(isValidEncoderPropertyTuple(weekend)) {
weekendEncoder = ScalarEncoder.builder()
.w((int) weekend.get(0))
.radius((double) weekend.get(1))
.minVal(0)
.maxVal(1)
.periodic(false)
.name("weekend")
.forced(this.isForced())
.build();
addChildEncoder(weekendEncoder);
}
if(isValidEncoderPropertyTuple(customDays)) {
List days = (List) customDays.get(1);
StringBuilder customDayEncoderName = new StringBuilder();
if(days.size() == 1) {
customDayEncoderName.append(days.get(0));
} else {
for(String day : days) {
customDayEncoderName.append(day).append(" ");
}
}
customDaysEncoder = ScalarEncoder.builder()
.w((int) customDays.get(0))
.radius(1)
.minVal(0)
.maxVal(1)
.periodic(false)
.name(customDayEncoderName.toString())
.forced(this.isForced())
.build();
//customDaysEncoder is special in naming
addEncoder("customdays", customDaysEncoder);
addCustomDays(days);
}
if(isValidEncoderPropertyTuple(holiday)) {
holidayEncoder = ScalarEncoder.builder()
.w((int) holiday.get(0))
.radius((double) holiday.get(1))
.minVal(0)
.maxVal(1)
.periodic(false)
.name("holiday")
.forced(this.isForced())
.build();
addChildEncoder(holidayEncoder);
}
if(isValidEncoderPropertyTuple(timeOfDay)) {
timeOfDayEncoder = ScalarEncoder.builder()
.w((int) timeOfDay.get(0))
.radius((double) timeOfDay.get(1))
.minVal(0)
.maxVal(24)
.periodic(true)
.name("time of day")
.forced(this.isForced())
.build();
addChildEncoder(timeOfDayEncoder);
}
}
private boolean isValidEncoderPropertyTuple(Tuple encoderPropertyTuple) {
return encoderPropertyTuple != null && (int)encoderPropertyTuple.get(0) != 0;
}
// Adapted from MultiEncoder
@SuppressWarnings({ "rawtypes", "unchecked" })
public void addEncoder(String name, Encoder child) {
super.addEncoder(this, name, child, width);
for (Object d : child.getDescription()) {
Tuple dT = (Tuple) d;
description.add(new Tuple(dT.get(0), (int)dT.get(1) + getWidth()));
}
width += child.getWidth();
}
protected void addChildEncoder(ScalarEncoder encoder) {
this.addEncoder(encoder.getName(), encoder);
}
protected void addCustomDays(List daysList) {
for(String dayStr : daysList)
{
switch (dayStr.toLowerCase())
{
case "mon":
case "monday":
customDaysList.add(0);
break;
case "tue":
case "tuesday":
customDaysList.add(1);
break;
case "wed":
case "wednesday":
customDaysList.add(2);
break;
case "thu":
case "thursday":
customDaysList.add(3);
break;
case "fri":
case "friday":
customDaysList.add(4);
break;
case "sat":
case "saturday":
customDaysList.add(5);
break;
case "sun":
case "sunday":
customDaysList.add(6);
break;
default:
throw new IllegalArgumentException(
String.format("Unable to understand %s as a day of week", dayStr)
);
}
}
}
public Tuple getSeason() {
return season;
}
public void setSeason(Tuple season) {
this.season = season;
}
public Tuple getDayOfWeek() {
return dayOfWeek;
}
public void setDayOfWeek(Tuple dayOfWeek) {
this.dayOfWeek = dayOfWeek;
}
public Tuple getWeekend() {
return weekend;
}
public void setWeekend(Tuple weekend) {
this.weekend = weekend;
}
public Tuple getCustomDays() {
return customDays;
}
public void setCustomDays(Tuple customDays) {
this.customDays = customDays;
}
public Tuple getHoliday() {
return holiday;
}
public void setHoliday(Tuple holiday) {
this.holiday = holiday;
}
public Tuple getTimeOfDay() {
return timeOfDay;
}
public void setTimeOfDay(Tuple timeOfDay) {
this.timeOfDay = timeOfDay;
}
/**
* {@inheritDoc}
*/
@Override
public int getWidth() {
return width;
}
/**
* {@inheritDoc}
*/
@Override
public int getN() {
return width;
}
/**
* {@inheritDoc}
*/
@Override
public int getW() {
return width;
}
/**
* {@inheritDoc}
*/
@Override
public boolean isDelta() {
return false;
}
/**
* Sets the custom formatter for the format field.
* @param formatter
*/
public void setCustomFormat(DateTimeFormatter formatter) {
this.customFormatter = formatter;
}
/**
* Convenience method to employ the configured {@link DateTimeFormatter}
* to return a {@link DateTime}
*
* This method assumes that a custom formatter has been configured
* and set on this object. see {@link #setCustomFormat(DateTimeFormatter)}
*
* @param dateTimeStr
* @return
*/
public DateTime parse(String dateTimeStr) {
return customFormatter.parseDateTime(dateTimeStr);
}
/**
* Convenience method to parse a date string into a date
* before delegating to {@link #encodeIntoArray(DateTime, int[])}
*
* This method assumes that a custom formatter has been configured
* and set on this object. see {@link #setCustomFormat(DateTimeFormatter)}
*
* @param dateStr the date string to parse
* @return the binary encoded date
* @throws NullPointerException if the custom formatter is not previously
* configured.
*/
public int[] parseEncode(String dateStr) {
int[] output = new int[getN()];
this.encodeIntoArray(customFormatter.parseDateTime(dateStr), output);
return output;
}
/**
* {@inheritDoc}
*/
// Adapted from MultiEncoder
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void encodeIntoArray(DateTime inputData, int[] output) {
if(inputData == null) {
throw new IllegalArgumentException("DateEncoder requires a valid Date object but got null");
}
// Get the scalar values for each sub-field
TDoubleList scalars = getScalars(inputData);
int fieldCounter = 0;
for (EncoderTuple t : getEncoders(this)) {
Encoder encoder = t.getEncoder();
int offset = t.getOffset();
int[] tempArray = new int[encoder.getWidth()];
encoder.encodeIntoArray(scalars.get(fieldCounter), tempArray);
System.arraycopy(tempArray, 0, output, offset, tempArray.length);
++fieldCounter;
}
}
/**
* Returns the input in the same format as is returned by topDownCompute().
* For most encoder types, this is the same as the input data.
* For instance, for scalar and category types, this corresponds to the numeric
* and string values, respectively, from the inputs. For datetime encoders, this
* returns the list of scalars for each of the sub-fields (timeOfDay, dayOfWeek, etc.)
*
* This method is essentially the same as getScalars() except that it returns
* strings
* @param inputData The input data in the format it is received from the data source
*
* @return A list of values, in the same format and in the same order as they
* are returned by topDownCompute.
*
* @return list of encoded values in String form
*/
public List getEncodedValues(Date inputData) {
List values = new ArrayList<>();
List encodedValues = getEncodedValues(inputData);
for (String v : encodedValues) {
values.add(String.valueOf(v));
}
return values;
}
/**
* Returns an {@link TDoubleList} containing the sub-field scalar value(s) for
* each sub-field of the inputData. To get the associated field names for each of
* the scalar values, call getScalarNames().
*
* @param inputData the input value, in this case a date object
* @return a list of one input double
*/
public TDoubleList getScalars(DateTime inputData) {
if(inputData == null) {
throw new IllegalArgumentException("DateEncoder requires a valid Date object but got null");
}
TDoubleList values = new TDoubleArrayList();
//Get the scalar values for each sub-field
double timeOfDay = inputData.getHourOfDay() + inputData.getMinuteOfHour() / 60.0;
// The day of week was 1 based, so convert to 0 based
int dayOfWeek = inputData.getDayOfWeek() - 1; // + timeOfDay / 24.0
if(seasonEncoder != null) {
// The day of year was 1 based, so convert to 0 based
double dayOfYear = inputData.getDayOfYear() - 1;
values.add(dayOfYear);
}
if(dayOfWeekEncoder != null) {
values.add(dayOfWeek);
}
if(weekendEncoder != null) {
//saturday, sunday or friday evening
boolean isWeekend = dayOfWeek == 6 || dayOfWeek == 5 ||
(dayOfWeek == 4 && timeOfDay > 18);
int weekend = isWeekend ? 1 : 0;
values.add(weekend);
}
if(customDaysEncoder != null) {
boolean isCustomDays = customDaysList.contains(dayOfWeek);
int customDay = isCustomDays ? 1 : 0;
values.add(customDay);
}
if(holidayEncoder != null) {
// A "continuous" binary value. = 1 on the holiday itself and smooth ramp
// 0->1 on the day before the holiday and 1->0 on the day after the holiday.
double holidayness = 0;
for(Tuple h : holidaysList) {
//hdate is midnight on the holiday
DateTime hdate = new DateTime(inputData.getYear(), (int)h.get(0), (int)h.get(1), 0, 0, 0);
if(inputData.isAfter(hdate)) {
Duration diff = new Interval(hdate, inputData).toDuration();
long days = diff.getStandardDays();
if(days == 0) {
//return 1 on the holiday itself
holidayness = 1;
break;
} else if(days == 1) {
//ramp smoothly from 1 -> 0 on the next day
holidayness = 1.0 - ((diff.getStandardSeconds() - 86400.0 * days) / 86400.0);
break;
}
} else {
//TODO This is not the same as when date.isAfter(hdate), why?
Duration diff = new Interval(inputData, hdate).toDuration();
long days = diff.getStandardDays();
if(days == 0) {
//ramp smoothly from 0 -> 1 on the previous day
holidayness = 1.0 - ((diff.getStandardSeconds() - 86400.0 * days) / 86400.0);
//TODO Why no break?
}
}
}
values.add(holidayness);
}
if(timeOfDayEncoder != null) {
values.add(timeOfDay);
}
return values;
}
/**
* {@inheritDoc}
*/
@Override
// TODO Why can getBucketValues return null for some encoders, e.g. MultiEncoder
public List getBucketValues(Class returnType) {
return null;
}
/**
* Returns an array containing the sub-field bucket indices for
* each sub-field of the inputData. To get the associated field names for each of
* the buckets, call getScalarNames().
* @param input The data from the source. This is typically a object with members.
*
* @return array of bucket indices
*/
public int[] getBucketIndices(DateTime input) {
TDoubleList scalars = getScalars(input);
TIntList l = new TIntArrayList();
List encoders = getEncoders(this);
if(encoders != null && encoders.size() > 0) {
int i = 0;
for(EncoderTuple t : encoders) {
l.addAll(t.getEncoder().getBucketIndices(scalars.get(i)));
++i;
}
}else{
throw new IllegalStateException("Should be implemented in base classes that are not " +
"containers for other encoders");
}
return l.toArray();
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("rawtypes")
@Override
public void setLearning(boolean learningEnabled) {
for (EncoderTuple t : getEncoders(this)) {
Encoder encoder = t.getEncoder();
encoder.setLearningEnabled(learningEnabled);
}
}
/**
* Returns a {@link Encoder.Builder} for constructing {@link DateEncoder}s
*
* The base class architecture is put together in such a way where boilerplate
* initialization can be kept to a minimum for implementing subclasses.
* Hopefully! :-)
*
* @see ScalarEncoder.Builder#setStuff(int)
*/
public static class Builder extends Encoder.Builder {
// Ignore leap year differences -- assume 366 days in a year
// Radius = 91.5 days = length of season
// Value is number of days since beginning of year (0 - 355)
protected Tuple season = new Tuple(0, 91.5);
// Value is day of week (floating point)
// Radius is 1 day
protected Tuple dayOfWeek = new Tuple(0, 1.0);
// Binary value.
protected Tuple weekend = new Tuple(0, 1.0);
// Custom days encoder, first argument in tuple is width
// second is either a single day of the week or a list of the days
// you want encoded as ones.
protected Tuple customDays = new Tuple(0, new ArrayList());
// A "continuous" binary value. = 1 on the holiday itself and smooth ramp
// 0->1 on the day before the holiday and 1->0 on the day after the holiday.
protected Tuple holiday = new Tuple(0, 1.0);
// Value is time of day in hours
// Radius = 4 hours, e.g. morning, afternoon, evening, early night,
// late night, etc.
protected Tuple timeOfDay = new Tuple(0, 4.0);
protected DateTimeFormatter customFormatter;
private Builder() {}
@Override
public DateEncoder build() {
//Must be instantiated so that super class can initialize
//boilerplate variables.
encoder = new DateEncoder();
//Call super class here
super.build();
////////////////////////////////////////////////////////
// Implementing classes would do setting of specific //
// vars here together with any sanity checking //
////////////////////////////////////////////////////////
DateEncoder e = ((DateEncoder)encoder);
e.setSeason(this.season);
e.setDayOfWeek(this.dayOfWeek);
e.setWeekend(this.weekend);
e.setHoliday(this.holiday);
e.setTimeOfDay(this.timeOfDay);
e.setCustomDays(this.customDays);
e.setCustomFormat(this.customFormatter);
((DateEncoder)encoder).init();
return (DateEncoder)encoder;
}
/**
* Set how many bits are used to encode season
*/
public DateEncoder.Builder season(int season, double radius) {
this.season = new Tuple(season, radius);
return this;
}
/**
* Set how many bits are used to encode season
*/
public DateEncoder.Builder season(int season) {
return this.season(season, (double) this.season.get(1));
}
/**
* Set how many bits are used to encode dayOfWeek
*/
public DateEncoder.Builder dayOfWeek(int dayOfWeek, double radius) {
this.dayOfWeek = new Tuple(dayOfWeek, radius);
return this;
}
/**
* Set how many bits are used to encode dayOfWeek
*/
public DateEncoder.Builder dayOfWeek(int dayOfWeek) {
return this.dayOfWeek(dayOfWeek, (double) this.dayOfWeek.get(1));
}
/**
* Set how many bits are used to encode weekend
*/
public DateEncoder.Builder weekend(int weekend, double radius) {
this.weekend = new Tuple(weekend, radius);
return this;
}
/**
* Set how many bits are used to encode weekend
*/
public DateEncoder.Builder weekend(int weekend) {
return this.weekend(weekend, (double) this.weekend.get(1));
}
/**
* Set how many bits are used to encode customDays
*/
public DateEncoder.Builder customDays(int customDays, List customDaysList) {
this.customDays = new Tuple(customDays, customDaysList);
return this;
}
/**
* Set how many bits are used to encode customDays
*/
@SuppressWarnings("unchecked")
public DateEncoder.Builder customDays(int customDays) {
return this.customDays(customDays, (ArrayList) this.customDays.get(1));
}
/**
* Set how many bits are used to encode holiday
*/
public DateEncoder.Builder holiday(int holiday, double radius) {
this.holiday = new Tuple(holiday, radius);
return this;
}
/**
* Set how many bits are used to encode holiday
*/
public DateEncoder.Builder holiday(int holiday) {
return this.holiday(holiday, (double) this.holiday.get(1));
}
/**
* Set how many bits are used to encode timeOfDay
*/
public DateEncoder.Builder timeOfDay(int timeOfDay, double radius) {
this.timeOfDay = new Tuple(timeOfDay, radius);
return this;
}
/**
* Set how many bits are used to encode timeOfDay
*/
public DateEncoder.Builder timeOfDay(int timeOfDay) {
return this.timeOfDay(timeOfDay, (double) this.timeOfDay.get(1));
}
/**
* Set the name of the encoder
*/
public DateEncoder.Builder name(String name) {
this.name = name;
return this;
}
/**
* Creates the pattern used to parse the date field.
* @param pattern
* @return
*/
public DateEncoder.Builder formatPattern(String pattern) {
this.customFormatter = DateTimeFormat.forPattern(pattern);
return this;
}
/**
* Sets the {@link DateTimeFormatte} on this builder.
* @param formatter
* @return
*/
public DateEncoder.Builder formatter(DateTimeFormatter formatter) {
this.customFormatter = formatter;
return this;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy