All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pig.pen.util.MetricEvaluation Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pig.pen.util;

import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.LOFilter;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.util.IdentityHashSet;

//Evaluates various metrics
public class MetricEvaluation {
    public static float getRealness(LogicalOperator op,
            Map exampleData, boolean overallRealness) {
        // StringBuffer str = new StringBuffer();
        int noTuples = 0;
        int noSynthetic = 0;
        for (Map.Entry e : exampleData.entrySet()) {
            // if(e.getKey() instanceof LORead) continue;
            if (e.getKey().getAlias() == null)
                continue;
            DataBag bag;
            if (overallRealness) {
                bag = exampleData.get(e.getKey());
            } else {
                bag = exampleData.get(op);
            }
            noTuples += bag.size();
            for (Iterator it = bag.iterator(); it.hasNext();) {
                if (((ExampleTuple) it.next()).synthetic)
                    noSynthetic++;
            }
            if (!overallRealness)
                break;

        }

        if (noTuples == 0) {
            if (noSynthetic == 0)
                return 0.0f;
            else
                return 100.0f;
        }
        return 100 * (1 - ((float) noSynthetic / (float) noTuples));

    }

    public static float getConciseness(
            LogicalOperator op,
            Map exampleData,
            Map>> OperatorToEqClasses,
            boolean overallConciseness) {
        DataBag bag = exampleData.get(op);

        int noEqCl = OperatorToEqClasses.get(op).size();
        long noTuples = bag.size();

        float conciseness = 100 * ((float) noEqCl / (float) noTuples);
        if (!overallConciseness) {

            return ((conciseness > 100.0) ? 100.0f : conciseness);
        } else {

            noEqCl = 0;
            noTuples = 0;
            conciseness = 0;
            int noOperators = 0;

            for (Map.Entry>> e : OperatorToEqClasses
                    .entrySet()) {
                if (e.getKey().getAlias() == null)
                    continue;
                noOperators++; // we need to keep a track of these and not use
                               // OperatorToEqClasses.size() as LORead shouldn't
                               // be considered a operator
                bag = exampleData.get(e.getKey());

                noTuples = bag.size();
                noEqCl = e.getValue().size();
                float concise = 100 * ((float) noEqCl / (float) noTuples);
                concise = (concise > 100) ? 100 : concise;
                conciseness += concise;
            }
            conciseness /= (float) noOperators;

            return conciseness;
        }

    }

    public static float getCompleteness(
            LogicalOperator op,
            Map exampleData,
            Map>> OperatorToEqClasses,
            boolean overallCompleteness) {

        int noClasses = 0;
        int noCoveredClasses = 0;
        int noOperators = 0;
        Map coveredClasses;
        float completeness = 0;
        if (!overallCompleteness) {
            Collection> eqClasses = OperatorToEqClasses
                    .get(op);
            DataBag bag;

            if (op instanceof LOFilter)
                bag = exampleData.get(((LOFilter) op).getInput());
            else
                bag = exampleData.get(op);
            coveredClasses = getCompletenessLogic(bag, eqClasses);
            noClasses = eqClasses.size();
            for (Map.Entry e : coveredClasses.entrySet()) {
                if (e.getValue()) {
                    noCoveredClasses++;
                }
            }

            return 100 * ((float) noCoveredClasses) / (float) noClasses;
        } else {
            for (Map.Entry>> e : OperatorToEqClasses
                    .entrySet()) {
                noCoveredClasses = 0;
                noClasses = 0;

                // if(e.getKey() instanceof LORead) continue; //We don't
                // consider LORead a operator.
                if (e.getKey().getAlias() == null)
                    continue; // we want to consider join a single operator
                noOperators++;
                Collection> eqClasses = e.getValue();
                LogicalOperator lop = e.getKey();
                DataBag bag;
                if (lop instanceof LOFilter)
                    bag = exampleData.get(((LOFilter) lop).getInput());
                else
                    bag = exampleData.get(lop);
                coveredClasses = getCompletenessLogic(bag, eqClasses);
                noClasses += eqClasses.size();
                for (Map.Entry e_result : coveredClasses
                        .entrySet()) {
                    if (e_result.getValue()) {
                        noCoveredClasses++;
                    }
                }
                completeness += 100 * ((float) noCoveredClasses / (float) noClasses);
            }
            completeness /= (float) noOperators;

            return completeness;
        }

    }

    private static Map getCompletenessLogic(DataBag bag,
            Collection> eqClasses) {
        Map coveredClasses = new HashMap();

        for (Iterator it = bag.iterator(); it.hasNext();) {
            Tuple t = it.next();
            int classId = 0;
            for (IdentityHashSet eqClass : eqClasses) {

                if (eqClass.contains(t) || eqClass.size() == 0) {
                    coveredClasses.put(classId, true);
                }
                classId++;
            }
        }

        return coveredClasses;

    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy