org.metafacture.statistics.CooccurrenceMetricCalculator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of metafacture-statistics Show documentation
Show all versions of metafacture-statistics Show documentation
Modules for computing statistics
/*
* Copyright 2013, 2014 Deutsche Nationalbibliothek
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.metafacture.statistics;
import java.util.ArrayList;
import java.util.List;
import org.metafacture.framework.FluxCommand;
import org.metafacture.framework.annotations.Description;
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.objects.Triple;
/**
* Calculates values for various co-occurrence metrics. The expected inputs are
* triples containing as subject the var name and as object the count. Marginal
* counts must appear first, joint counts second. Marginal counts must be
* written as 1:A, Joint counts as 2:A&B
*
* @author Markus Michael Geipel
*/
@Description("Calculates values for various cooccurrence metrics. The expected inputs are triples containing as subject the var name and as object the count. "
+ "Marginal counts must appear first, joint counts second. Marinal counts must be written as 1:A, Joint counts as 2:A&B")
@In(Triple.class)
@Out(Triple.class)
@FluxCommand("calculate-metrics")
public final class CooccurrenceMetricCalculator extends AbstractCountProcessor {
/**
* implementation of the different metrics
*/
enum Metric {
X2 {
@Override
double calculate(final int countA, final int countB, final int countAandB, final int total) {
final double o12 = countA - countAandB;
final double o21 = countB - countAandB;
final double o22 = total - countAandB;
final double d = (countAandB * o22) - (o12 * o21);
final double x2 = total * Math.pow(d, 2)
/ ((countAandB + o12) * (countAandB + o21) * (o12 + o22) * (o21 + o22));
return x2 * Math.signum(d);
}
},
F {
@Override
double calculate(final int countA, final int countB, final int countAandB, final int total) {
final double pa = (double) countA / total;
final double pb = (double) countB / total;
final double pab = (double) countAandB / total;
final double precission = pab / pa;
final double recall = pab / pb;
return 2 * precission * recall / (precission + recall);
}
},
PRECISSION {
@Override
double calculate(final int countA, final int countB, final int countAandB, final int total) {
final double pa = (double) countA / total;
final double pab = (double) countAandB / total;
return pab / pa;
}
},
RECALL {
@Override
double calculate(final int countA, final int countB, final int countAandB, final int total) {
final double pb = (double) countB / total;
final double pab = (double) countAandB / total;
return pab / pb;
}
},
JACCARD {
@Override
double calculate(final int countA, final int countB, final int countAandB, final int total) {
return countAandB / (double)(countA + countB - countAandB);
}
};
abstract double calculate(final int countA, final int countB, final int countAandB, final int total);
}
private static final int MIN_COUNT = 5;
private final List metrics = new ArrayList();
public CooccurrenceMetricCalculator(final String allMetrics) {
final String[] metrics = allMetrics.split("\\s*,\\s*");
setMinCount(MIN_COUNT);
for (String metric : metrics) {
this.metrics.add(Metric.valueOf(metric));
}
}
public CooccurrenceMetricCalculator(final Metric... metrics) {
setMinCount(MIN_COUNT);
for (Metric metric : metrics) {
this.metrics.add(metric);
}
}
@Override
protected void processCount(final String varA, final String varB, final int countA, final int countB,
final int countAandB) {
for (Metric metric : metrics) {
final double value = metric.calculate(countA, countB, countAandB, getTotal());
getReceiver().process(new Triple(varA + "&" + varB, metric.toString(), String.valueOf(value)));
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy