All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.beans.valuedist.WeekdayDistributionAnalyzer Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.beans.valuedist;

import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

import javax.inject.Named;

import org.datacleaner.api.Analyzer;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Concurrent;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Distributed;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.components.categories.DateAndTimeCategory;
import org.datacleaner.result.Crosstab;
import org.datacleaner.result.CrosstabDimension;
import org.datacleaner.result.CrosstabNavigator;
import org.datacleaner.result.CrosstabResult;

@Named("Weekday distribution")
@Description("Finds the distribution of weekdays from Date values.")
@Concurrent(true)
@Categorized(DateAndTimeCategory.class)
@Distributed(reducer=WeekdayDistributionResultReducer.class)
public class WeekdayDistributionAnalyzer implements Analyzer {

	private final Map, Map> distributionMap;

	@Configured
	InputColumn[] dateColumns;

	public WeekdayDistributionAnalyzer() {
		distributionMap = new HashMap, Map>();
	}

	@Initialize
	public void init() {
		for (InputColumn col : dateColumns) {
			Map countMap = new HashMap(7);
			for (int i = Calendar.SUNDAY; i <= Calendar.SATURDAY; i++) {
				// put a count of 0 for each day of the week
				countMap.put(i, new AtomicInteger(0));
			}
			distributionMap.put(col, countMap);
		}
	}

	@Override
	public void run(InputRow row, int distinctCount) {
		for (InputColumn col : dateColumns) {
			Date value = row.getValue(col);
			if (value != null) {
				Calendar c = Calendar.getInstance();
				c.setTime(value);
				int dayOfWeek = c.get(Calendar.DAY_OF_WEEK);
				Map countMap = distributionMap.get(col);
				AtomicInteger count = countMap.get(dayOfWeek);
				count.addAndGet(distinctCount);
			}
		}
	}

	@Override
	public CrosstabResult getResult() {
		CrosstabDimension columnDimension = new CrosstabDimension("Column");
		CrosstabDimension weekdayDimension = new CrosstabDimension("Weekday");
		weekdayDimension.addCategory("Sunday").addCategory("Monday").addCategory("Tuesday").addCategory("Wednesday")
				.addCategory("Thursday").addCategory("Friday").addCategory("Saturday");
		Crosstab crosstab = new Crosstab(Integer.class, columnDimension, weekdayDimension);
		for (InputColumn col : dateColumns) {
			columnDimension.addCategory(col.getName());
			CrosstabNavigator nav = crosstab.where(columnDimension, col.getName());
			Map countMap = distributionMap.get(col);
			nav.where(weekdayDimension, "Sunday").put(countMap.get(Calendar.SUNDAY).get());
			nav.where(weekdayDimension, "Monday").put(countMap.get(Calendar.MONDAY).get());
			nav.where(weekdayDimension, "Tuesday").put(countMap.get(Calendar.TUESDAY).get());
			nav.where(weekdayDimension, "Wednesday").put(countMap.get(Calendar.WEDNESDAY).get());
			nav.where(weekdayDimension, "Thursday").put(countMap.get(Calendar.THURSDAY).get());
			nav.where(weekdayDimension, "Friday").put(countMap.get(Calendar.FRIDAY).get());
			nav.where(weekdayDimension, "Saturday").put(countMap.get(Calendar.SATURDAY).get());
		}

		return new CrosstabResult(crosstab);
	}

	// used only for unittesting
	public void setDateColumns(InputColumn[] dateColumns) {
		this.dateColumns = dateColumns;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy