All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.beans.dategap.DateGapAnalyzer Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Free Software Foundation, Inc.
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.beans.dategap;

import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;

import javax.inject.Named;

import org.datacleaner.api.Analyzer;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.components.categories.DateAndTimeCategory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Named("Date gap analyzer")
@Description("Analyze the periodic gaps between FROM and TO dates.")
@Categorized(DateAndTimeCategory.class)
public class DateGapAnalyzer implements Analyzer {

    private static final Logger logger = LoggerFactory.getLogger(DateGapAnalyzer.class);
    private final Map timelines = new HashMap<>();
    @Configured(order = 1)
    InputColumn fromColumn;
    @Configured(order = 2)
    InputColumn toColumn;
    @Configured(order = 3, required = false)
    @Description("Optional column to group timelines by, if the table contains multiple timelines")
    InputColumn groupColumn;
    @Configured(order = 4, required = false, value = "Count intersecting from and to dates as overlaps")
    Boolean singleDateOverlaps = false;
    @Configured(order = 5, value = "Fault tolerant switch from/to dates", required = false)
    @Description("Turn on/off automatic switching of FROM and TO dates, if FROM has a higher value than TO.")
    boolean faultTolerantDateSwitch = true;

    public DateGapAnalyzer() {
    }

    public DateGapAnalyzer(final InputColumn fromColumn, final InputColumn toColumn,
            final InputColumn groupColumn) {
        this.fromColumn = fromColumn;
        this.toColumn = toColumn;
        this.groupColumn = groupColumn;
    }

    @Override
    public void run(final InputRow row, final int distinctCount) {
        final Date from = row.getValue(fromColumn);
        final Date to = row.getValue(toColumn);

        if (from != null && to != null) {
            String groupName = null;
            if (groupColumn != null) {
                groupName = row.getValue(groupColumn);
            }

            if (faultTolerantDateSwitch && from.compareTo(to) > 0) {
                logger.debug("Switching around from and to, because {} is higher than {} (row: {})",
                        new Object[] { from, to, row });
                put(groupName, new TimeInterval(to, from));
            } else {
                put(groupName, new TimeInterval(from, to));
            }
        } else {
            logger.debug("Encountered row where from column or to column was null, ignoring");
        }
    }

    protected void put(final String groupName, final TimeInterval interval) {
        TimeLine timeline = timelines.get(groupName);
        if (timeline == null) {
            timeline = new TimeLine();
            timelines.put(groupName, timeline);
        }
        timeline.addInterval(interval);
    }

    @Override
    public DateGapAnalyzerResult getResult() {
        boolean includeSingleTimeInstanceIntervals = false;
        if (singleDateOverlaps != null) {
            includeSingleTimeInstanceIntervals = singleDateOverlaps.booleanValue();
        }
        final Map completeIntervals = new HashMap<>();
        final Map> gaps = new HashMap<>();
        final Map> overlaps = new HashMap<>();
        final Set groupNames = timelines.keySet();
        for (final String name : groupNames) {
            final TimeLine timeline = timelines.get(name);
            final SortedSet timelineGaps = timeline.getTimeGapIntervals();
            final SortedSet timelineOverlaps =
                    timeline.getOverlappingIntervals(includeSingleTimeInstanceIntervals);

            completeIntervals.put(name, new TimeInterval(timeline.getFrom(), timeline.getTo()));
            gaps.put(name, timelineGaps);
            overlaps.put(name, timelineOverlaps);
        }

        final String groupColumnName = groupColumn == null ? null : groupColumn.getName();
        return new DateGapAnalyzerResult(fromColumn.getName(), toColumn.getName(), groupColumnName, completeIntervals,
                gaps, overlaps);
    }

    public void setFromColumn(final InputColumn fromColumn) {
        this.fromColumn = fromColumn;
    }

    public void setGroupColumn(final InputColumn groupColumn) {
        this.groupColumn = groupColumn;
    }

    public void setSingleDateOverlaps(final Boolean singleDateOverlaps) {
        this.singleDateOverlaps = singleDateOverlaps;
    }

    public void setToColumn(final InputColumn toColumn) {
        this.toColumn = toColumn;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy