All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.result.AbstractValueCountingAnalyzerResult Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.result;

import java.util.Collection;
import java.util.List;
import java.util.TreeSet;

import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.Func;
import org.datacleaner.api.Metric;
import org.datacleaner.util.LabelUtils;

/**
 * An abstract implementation of {@link ValueCountingAnalyzerResult} which
 * implements the most important metric: The value count.
 */
public abstract class AbstractValueCountingAnalyzerResult implements ValueCountingAnalyzerResult {

    private static final long serialVersionUID = 1L;

    @Metric(value = "Value count", supportsInClause = true)
    public final QueryParameterizableMetric getValueCount() {
        return new QueryParameterizableMetric() {

            @Override
            public Collection getParameterSuggestions() {
                final Collection valueCounts = AbstractValueCountingAnalyzerResult.this
                        .getValueCounts();
                final List result = CollectionUtils.map(valueCounts, new Func() {
                    @Override
                    public String eval(ValueFrequency vc) {
                        return vc.getName();
                    }
                });
                result.remove(null);
                result.remove(LabelUtils.NULL_LABEL);
                result.remove(LabelUtils.UNEXPECTED_LABEL);
                return result;
            }

            @Override
            public int getTotalCount() {
                return AbstractValueCountingAnalyzerResult.this.getTotalCount();
            }

            @Override
            public int getInstanceCount(String instance) {
                Integer count = getCount(instance);
                if (count == null) {
                    return 0;
                }
                return count;
            }
        };
    }

    @Override
    public Collection getReducedValueFrequencies(final int preferredMaximum) {
        final Collection original = getValueCounts();

        final Collection result = new TreeSet(original);

        if (original.size() <= preferredMaximum) {
            // check if any composite value freq's can be exploded
            for (ValueFrequency valueFrequency : original) {
                if (valueFrequency.isComposite()) {
                    List children = valueFrequency.getChildren();
                    if (children != null) {
                        if (result.size() - 1 + children.size() <= preferredMaximum) {
                            // replace with children
                            result.remove(valueFrequency);
                            result.addAll(children);
                        }
                    }
                }
            }
            return result;
        } else {
            return original;
        }
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        sb.append("Value distribution for: ");
        sb.append(getName());
        appendToString(sb, this, 4);
        return sb.toString();
    }

    /**
     * Appends a string representation with a maximum amount of entries
     * 
     * @param sb
     *            the StringBuilder to append to
     * 
     * @param maxEntries
     * @return
     */
    protected void appendToString(StringBuilder sb, ValueCountingAnalyzerResult groupResult, int maxEntries) {
        if (maxEntries != 0) {
            Collection valueCounts = groupResult.getValueCounts();
            for (ValueFrequency valueCount : valueCounts) {
                sb.append("\n - ");
                sb.append(valueCount.getName());
                sb.append(": ");
                sb.append(valueCount.getCount());

                maxEntries--;
                if (maxEntries == 0) {
                    sb.append("\n ...");
                    break;
                }
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy