Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Free Software Foundation, Inc.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.user;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections.MapUtils;
import org.datacleaner.api.Analyzer;
import org.datacleaner.api.InputColumn;
import org.datacleaner.beans.BooleanAnalyzer;
import org.datacleaner.beans.DateAndTimeAnalyzer;
import org.datacleaner.beans.NumberAnalyzer;
import org.datacleaner.beans.StringAnalyzer;
import org.datacleaner.beans.stringpattern.PatternFinderAnalyzer;
import org.datacleaner.beans.valuedist.ValueDistributionAnalyzer;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.AnalyzerComponentBuilder;
import org.datacleaner.util.ReflectionUtils;
/**
* Defines the strategy and rules for doing quick analysis.
*
* @see QuickAnalysisActionListener
*/
public class QuickAnalysisStrategy implements Serializable {
private static final long serialVersionUID = 1L;
private static final String USER_PREFERENCES_NAMESPACE = "datacleaner.quickanalysis.strategy";
private final int columnsPerAnalyzer;
private final boolean includeValueDistribution;
private final boolean includePatternFinder;
public QuickAnalysisStrategy() {
this(5, false, false);
}
public QuickAnalysisStrategy(final int columnsPerAnalyzer, final boolean includeValueDistribution,
final boolean includePatternFinder) {
this.columnsPerAnalyzer = columnsPerAnalyzer;
this.includeValueDistribution = includeValueDistribution;
this.includePatternFinder = includePatternFinder;
}
/**
* Saves a {@link QuickAnalysisStrategy} to a {@link UserPreferences}
* object.
*
* @param strategy
* @param userPreferences
*/
public static void saveToUserPreferences(final QuickAnalysisStrategy strategy,
final UserPreferences userPreferences) {
final Map properties = userPreferences.getAdditionalProperties();
properties.put(USER_PREFERENCES_NAMESPACE + ".columnsPerAnalyzer", "" + strategy.columnsPerAnalyzer);
properties
.put(USER_PREFERENCES_NAMESPACE + ".includeValueDistribution", "" + strategy.includeValueDistribution);
properties.put(USER_PREFERENCES_NAMESPACE + ".includePatternFinder", "" + strategy.includePatternFinder);
}
/**
* Loads {@link QuickAnalysisStrategy} from a {@link UserPreferences}
* object.
*
* @param userPreferences
* @return
*/
public static QuickAnalysisStrategy loadFromUserPreferences(final UserPreferences userPreferences) {
final Map properties = userPreferences.getAdditionalProperties();
final int columnsPerAnalyzer =
MapUtils.getIntValue(properties, USER_PREFERENCES_NAMESPACE + ".columnsPerAnalyzer", 5);
final boolean includeValueDistribution =
MapUtils.getBooleanValue(properties, USER_PREFERENCES_NAMESPACE + ".includeValueDistribution", false);
final boolean includePatternFinder =
MapUtils.getBooleanValue(properties, USER_PREFERENCES_NAMESPACE + ".includePatternFinder", false);
return new QuickAnalysisStrategy(columnsPerAnalyzer, includeValueDistribution, includePatternFinder);
}
public boolean isIncludePatternFinder() {
return includePatternFinder;
}
public boolean isIncludeValueDistribution() {
return includeValueDistribution;
}
public int getColumnsPerAnalyzer() {
return columnsPerAnalyzer;
}
public void configureAnalysisJobBuilder(final AnalysisJobBuilder ajb) {
final List> booleanColumns = new ArrayList<>();
final List> stringColumns = new ArrayList<>();
final List> numberColumns = new ArrayList<>();
final List> dateTimeColumns = new ArrayList<>();
for (final InputColumn> inputColumn : ajb.getSourceColumns()) {
final Class> dataType = inputColumn.getDataType();
if (ReflectionUtils.isBoolean(dataType)) {
booleanColumns.add(inputColumn);
} else if (ReflectionUtils.isNumber(dataType)) {
numberColumns.add(inputColumn);
} else if (ReflectionUtils.isDate(dataType)) {
dateTimeColumns.add(inputColumn);
} else if (ReflectionUtils.isString(dataType)) {
stringColumns.add(inputColumn);
}
}
if (!booleanColumns.isEmpty()) {
// boolean analyzer contains combination matrices, so all columns
// are added to a single analyzer job.
ajb.addAnalyzer(BooleanAnalyzer.class).addInputColumns(booleanColumns);
}
if (!numberColumns.isEmpty()) {
createAnalyzers(ajb, NumberAnalyzer.class, numberColumns);
}
if (!dateTimeColumns.isEmpty()) {
createAnalyzers(ajb, DateAndTimeAnalyzer.class, dateTimeColumns);
}
if (!stringColumns.isEmpty()) {
createAnalyzers(ajb, StringAnalyzer.class, stringColumns);
}
}
/**
* Registers analyzers and up to 4 columns per analyzer. This restriction is
* to ensure that results will be nicely readable. A table might contain
* hundreds of columns.
*
* @param ajb
* @param analyzerClass
* @param columns
*/
private void createAnalyzers(final AnalysisJobBuilder ajb, final Class extends Analyzer>> analyzerClass,
final List> columns) {
final int columnsPerAnalyzer = getColumnsPerAnalyzer();
AnalyzerComponentBuilder> analyzerJobBuilder = ajb.addAnalyzer(analyzerClass);
int columnCount = 0;
for (final InputColumn> inputColumn : columns) {
if (columnCount == columnsPerAnalyzer) {
analyzerJobBuilder = ajb.addAnalyzer(analyzerClass);
columnCount = 0;
}
analyzerJobBuilder.addInputColumn(inputColumn);
if (isIncludeValueDistribution()) {
ajb.addAnalyzer(ValueDistributionAnalyzer.class).addInputColumn(inputColumn);
}
if (inputColumn.getDataType() == String.class && isIncludePatternFinder()) {
ajb.addAnalyzer(PatternFinderAnalyzer.class).addInputColumn(inputColumn);
}
columnCount++;
}
}
}