All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.beans.BooleanAnalyzerCombinationMetric Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.beans;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

import org.datacleaner.api.ParameterizableMetric;
import org.datacleaner.components.convert.ConvertToBooleanTransformer;
import org.datacleaner.result.Crosstab;
import org.datacleaner.result.CrosstabDimension;
import org.datacleaner.result.CrosstabNavigator;

/**
 * Metric implementation for the "Combination count" metric in the
 * {@link BooleanAnalyzerResult}.
 */
final class BooleanAnalyzerCombinationMetric implements ParameterizableMetric {

    private final Crosstab _valueCombinationCrosstab;

    public BooleanAnalyzerCombinationMetric(Crosstab valueCombinationCrosstab) {
        _valueCombinationCrosstab = valueCombinationCrosstab;
    }

    @Override
    public Number getValue(final String parameter) {
        if (_valueCombinationCrosstab == null) {
            return 0;
        }
        final CrosstabDimension measureDimension = _valueCombinationCrosstab
                .getDimension(BooleanAnalyzer.DIMENSION_MEASURE);
        if (measureDimension.containsCategory(parameter)) {
            return _valueCombinationCrosstab.where(measureDimension, parameter)
                    .where(BooleanAnalyzer.DIMENSION_COLUMN, BooleanAnalyzer.VALUE_COMBINATION_COLUMN_FREQUENCY).get();
        }

        // attempt to parse the parameter as a comma-separated list of
        // false/true tokens
        final String[] tokens = parameter.split(",");
        final boolean[] bools = new boolean[tokens.length];
        for (int i = 0; i < tokens.length; i++) {
            String token = tokens[i].trim();
            if (!("true".equalsIgnoreCase(token) || "false".equalsIgnoreCase(token))) {
                // not parseable as a boolean
                return 0;
            }
            bools[i] = ConvertToBooleanTransformer.transformValue(token);
        }

        final List columnCategories = _valueCombinationCrosstab.getDimension(BooleanAnalyzer.DIMENSION_COLUMN)
                .getCategories();
        if (bools.length != columnCategories.size() - 1) {
            // the number of columns should match the number of booleans in the
            // parameter
            return 0;
        }

        final List measureCategories = _valueCombinationCrosstab
                .getDimension(BooleanAnalyzer.DIMENSION_MEASURE).getCategories();
        for (String category : measureCategories) {
            final CrosstabNavigator nav = _valueCombinationCrosstab.where(BooleanAnalyzer.DIMENSION_MEASURE,
                    category);
            boolean[] combination = new boolean[bools.length];
            int i = 0;
            for (String column : columnCategories) {
                if (!BooleanAnalyzer.VALUE_COMBINATION_COLUMN_FREQUENCY.equals(column)) {
                    Number number = nav.where(BooleanAnalyzer.DIMENSION_COLUMN, column).get();
                    if (Byte.valueOf((byte) 1).equals(number)) {
                        combination[i] = true;
                    } else {
                        combination[i] = false;
                    }
                    i++;
                }
            }

            if (Arrays.equals(combination, bools)) {
                Number number = nav.where(BooleanAnalyzer.DIMENSION_COLUMN,
                        BooleanAnalyzer.VALUE_COMBINATION_COLUMN_FREQUENCY).get();
                return number;
            }
        }

        return 0;
    }

    @Override
    public Collection getParameterSuggestions() {
        if (_valueCombinationCrosstab == null) {
            return Collections.emptyList();
        }

        final List suggestions = new ArrayList();
        suggestions.add(BooleanAnalyzer.MEASURE_MOST_FREQUENT);
        suggestions.add(BooleanAnalyzer.MEASURE_LEAST_FREQUENT);

        // create suggestions for each combination of true and false
        final List columnCategories = _valueCombinationCrosstab.getDimension(BooleanAnalyzer.DIMENSION_COLUMN)
                .getCategories();
        final List measureCategories = _valueCombinationCrosstab
                .getDimension(BooleanAnalyzer.DIMENSION_MEASURE).getCategories();
        for (String category : measureCategories) {
            StringBuilder sb = new StringBuilder();
            CrosstabNavigator nav = _valueCombinationCrosstab
                    .where(BooleanAnalyzer.DIMENSION_MEASURE, category);
            for (String column : columnCategories) {
                if (!BooleanAnalyzer.VALUE_COMBINATION_COLUMN_FREQUENCY.equals(column)) {
                    if (sb.length() != 0) {
                        sb.append(',');
                    }
                    Number number = nav.where(BooleanAnalyzer.DIMENSION_COLUMN, column).get();
                    if (Byte.valueOf((byte) 1).equals(number)) {
                        sb.append("true");
                    } else {
                        sb.append("false");
                    }
                }
            }
            suggestions.add(sb.toString());
        }

        return suggestions;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy