org.datacleaner.beans.DateAndTimeAnalyzerColumnDelegate Maven / Gradle / Ivy
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.beans;
import java.util.Date;
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
import org.apache.commons.math.stat.descriptive.StatisticalSummary;
import org.apache.commons.math.stat.descriptive.SummaryStatistics;
import org.datacleaner.api.InputRow;
import org.datacleaner.storage.RowAnnotation;
import org.datacleaner.storage.RowAnnotationFactory;
import org.joda.time.LocalDate;
import org.joda.time.LocalTime;
/**
* Helper class for the Date/time Analyzer. This class collects all the
* statistics for a single column. The Date/time Analyzer then consists of a
* number of these delegates.
*
*
*/
final class DateAndTimeAnalyzerColumnDelegate {
private final RowAnnotationFactory _annotationFactory;
private final RowAnnotation _nullAnnotation;
private final RowAnnotation _maxDateAnnotation;
private final RowAnnotation _minDateAnnotation;
private final RowAnnotation _maxTimeAnnotation;
private final RowAnnotation _minTimeAnnotation;
private final StatisticalSummary _statistics;
private volatile int _numRows;
private volatile LocalDate _minDate;
private volatile LocalDate _maxDate;
private volatile LocalTime _minTime;
private volatile LocalTime _maxTime;
public DateAndTimeAnalyzerColumnDelegate(boolean descriptiveStatistics, RowAnnotationFactory annotationFactory) {
_annotationFactory = annotationFactory;
_nullAnnotation = _annotationFactory.createAnnotation();
_maxDateAnnotation = _annotationFactory.createAnnotation();
_minDateAnnotation = _annotationFactory.createAnnotation();
_maxTimeAnnotation = _annotationFactory.createAnnotation();
_minTimeAnnotation = _annotationFactory.createAnnotation();
_numRows = 0;
if (descriptiveStatistics) {
_statistics = new DescriptiveStatistics();
} else {
_statistics = new SummaryStatistics();
}
}
public synchronized void run(final Date value, final InputRow row, final int distinctCount) {
_numRows += distinctCount;
if (value == null) {
_annotationFactory.annotate(row, distinctCount, _nullAnnotation);
} else {
final long timestamp = value.getTime();
for (int i = 0; i < distinctCount; i++) {
if (_statistics instanceof DescriptiveStatistics) {
((DescriptiveStatistics) _statistics).addValue(timestamp);
} else {
((SummaryStatistics) _statistics).addValue(timestamp);
}
}
LocalDate localDate = new LocalDate(value);
LocalTime localTime = new LocalTime(value);
if (_minDate == null) {
// first non-null value
_minDate = localDate;
_maxDate = localDate;
_minTime = localTime;
_maxTime = localTime;
} else {
if (localDate.isAfter(_maxDate)) {
_maxDate = localDate;
_annotationFactory.reset(_maxDateAnnotation);
} else if (localDate.isBefore(_minDate)) {
_minDate = localDate;
_annotationFactory.reset(_minDateAnnotation);
}
if (localTime.isAfter(_maxTime)) {
_maxTime = localTime;
_annotationFactory.reset(_maxTimeAnnotation);
} else if (localTime.isBefore(_minTime)) {
_minTime = localTime;
_annotationFactory.reset(_minTimeAnnotation);
}
}
if (localDate.isEqual(_maxDate)) {
_annotationFactory.annotate(row, distinctCount, _maxDateAnnotation);
}
if (localDate.isEqual(_minDate)) {
_annotationFactory.annotate(row, distinctCount, _minDateAnnotation);
}
if (localTime.isEqual(_maxTime)) {
_annotationFactory.annotate(row, distinctCount, _maxTimeAnnotation);
}
if (localTime.isEqual(_minTime)) {
_annotationFactory.annotate(row, distinctCount, _minTimeAnnotation);
}
}
}
public Date getMean() {
double meanTimestamp = _statistics.getMean();
if (Double.isNaN(meanTimestamp)) {
return null;
}
return new Date(Double.valueOf(meanTimestamp).longValue());
}
public Date getMedian() {
if (_statistics instanceof DescriptiveStatistics) {
double medianTimestamp = ((DescriptiveStatistics) _statistics).getPercentile(50.0);
if (Double.isNaN(medianTimestamp)) {
return null;
}
return new Date(Double.valueOf(medianTimestamp).longValue());
}
return null;
}
public Date getPercentile25() {
if (_statistics instanceof DescriptiveStatistics) {
double percentileTimestamp = ((DescriptiveStatistics) _statistics).getPercentile(25.0);
if (Double.isNaN(percentileTimestamp)) {
return null;
}
return new Date(Double.valueOf(percentileTimestamp).longValue());
}
return null;
}
public Date getPercentile75() {
if (_statistics instanceof DescriptiveStatistics) {
double percentileTimestamp = ((DescriptiveStatistics) _statistics).getPercentile(75.0);
if (Double.isNaN(percentileTimestamp)) {
return null;
}
return new Date(Double.valueOf(percentileTimestamp).longValue());
}
return null;
}
public Number getKurtosis() {
if (_statistics instanceof DescriptiveStatistics) {
double result = ((DescriptiveStatistics) _statistics).getKurtosis();
if (Double.isNaN(result)) {
return null;
}
return result;
}
return null;
}
public Number getSkewness() {
if (_statistics instanceof DescriptiveStatistics) {
double result = ((DescriptiveStatistics) _statistics).getSkewness();
if (Double.isNaN(result)) {
return null;
}
return result;
}
return null;
}
public LocalDate getMaxDate() {
return _maxDate;
}
public LocalTime getMaxTime() {
return _maxTime;
}
public LocalDate getMinDate() {
return _minDate;
}
public LocalTime getMinTime() {
return _minTime;
}
public int getNumRows() {
return _numRows;
}
public RowAnnotation getNullAnnotation() {
return _nullAnnotation;
}
public RowAnnotation getMaxDateAnnotation() {
return _maxDateAnnotation;
}
public RowAnnotation getMinDateAnnotation() {
return _minDateAnnotation;
}
public RowAnnotation getMaxTimeAnnotation() {
return _maxTimeAnnotation;
}
public RowAnnotation getMinTimeAnnotation() {
return _minTimeAnnotation;
}
public int getNumNull() {
return _nullAnnotation.getRowCount();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy