org.apache.pig.pen.util.MetricEvaluation Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.pen.util;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.LOFilter;
import org.apache.pig.impl.logicalLayer.LogicalOperator;
import org.apache.pig.impl.util.IdentityHashSet;
//Evaluates various metrics
public class MetricEvaluation {
public static float getRealness(LogicalOperator op,
Map exampleData, boolean overallRealness) {
// StringBuffer str = new StringBuffer();
int noTuples = 0;
int noSynthetic = 0;
for (Map.Entry e : exampleData.entrySet()) {
// if(e.getKey() instanceof LORead) continue;
if (e.getKey().getAlias() == null)
continue;
DataBag bag;
if (overallRealness) {
bag = exampleData.get(e.getKey());
} else {
bag = exampleData.get(op);
}
noTuples += bag.size();
for (Iterator it = bag.iterator(); it.hasNext();) {
if (((ExampleTuple) it.next()).synthetic)
noSynthetic++;
}
if (!overallRealness)
break;
}
if (noTuples == 0) {
if (noSynthetic == 0)
return 0.0f;
else
return 100.0f;
}
return 100 * (1 - ((float) noSynthetic / (float) noTuples));
}
public static float getConciseness(
LogicalOperator op,
Map exampleData,
Map>> OperatorToEqClasses,
boolean overallConciseness) {
DataBag bag = exampleData.get(op);
int noEqCl = OperatorToEqClasses.get(op).size();
long noTuples = bag.size();
float conciseness = 100 * ((float) noEqCl / (float) noTuples);
if (!overallConciseness) {
return ((conciseness > 100.0) ? 100.0f : conciseness);
} else {
noEqCl = 0;
noTuples = 0;
conciseness = 0;
int noOperators = 0;
for (Map.Entry>> e : OperatorToEqClasses
.entrySet()) {
if (e.getKey().getAlias() == null)
continue;
noOperators++; // we need to keep a track of these and not use
// OperatorToEqClasses.size() as LORead shouldn't
// be considered a operator
bag = exampleData.get(e.getKey());
noTuples = bag.size();
noEqCl = e.getValue().size();
float concise = 100 * ((float) noEqCl / (float) noTuples);
concise = (concise > 100) ? 100 : concise;
conciseness += concise;
}
conciseness /= (float) noOperators;
return conciseness;
}
}
public static float getCompleteness(
LogicalOperator op,
Map exampleData,
Map>> OperatorToEqClasses,
boolean overallCompleteness) {
int noClasses = 0;
int noCoveredClasses = 0;
int noOperators = 0;
Map coveredClasses;
float completeness = 0;
if (!overallCompleteness) {
Collection> eqClasses = OperatorToEqClasses
.get(op);
DataBag bag;
if (op instanceof LOFilter)
bag = exampleData.get(((LOFilter) op).getInput());
else
bag = exampleData.get(op);
coveredClasses = getCompletenessLogic(bag, eqClasses);
noClasses = eqClasses.size();
for (Map.Entry e : coveredClasses.entrySet()) {
if (e.getValue()) {
noCoveredClasses++;
}
}
return 100 * ((float) noCoveredClasses) / (float) noClasses;
} else {
for (Map.Entry>> e : OperatorToEqClasses
.entrySet()) {
noCoveredClasses = 0;
noClasses = 0;
// if(e.getKey() instanceof LORead) continue; //We don't
// consider LORead a operator.
if (e.getKey().getAlias() == null)
continue; // we want to consider join a single operator
noOperators++;
Collection> eqClasses = e.getValue();
LogicalOperator lop = e.getKey();
DataBag bag;
if (lop instanceof LOFilter)
bag = exampleData.get(((LOFilter) lop).getInput());
else
bag = exampleData.get(lop);
coveredClasses = getCompletenessLogic(bag, eqClasses);
noClasses += eqClasses.size();
for (Map.Entry e_result : coveredClasses
.entrySet()) {
if (e_result.getValue()) {
noCoveredClasses++;
}
}
completeness += 100 * ((float) noCoveredClasses / (float) noClasses);
}
completeness /= (float) noOperators;
return completeness;
}
}
private static Map getCompletenessLogic(DataBag bag,
Collection> eqClasses) {
Map coveredClasses = new HashMap();
for (Iterator it = bag.iterator(); it.hasNext();) {
Tuple t = it.next();
int classId = 0;
for (IdentityHashSet eqClass : eqClasses) {
if (eqClass.contains(t) || eqClass.size() == 0) {
coveredClasses.put(classId, true);
}
classId++;
}
}
return coveredClasses;
}
}