com.davidbracewell.apollo.affinity.AssociationMeasures Maven / Gradle / Ivy
/*
* (c) 2005 David B. Bracewell
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.davidbracewell.apollo.affinity;
import com.davidbracewell.apollo.distribution.NormalDistribution;
import com.davidbracewell.guava.common.base.Preconditions;
import com.davidbracewell.guava.common.math.DoubleMath;
import com.davidbracewell.guava.common.primitives.Doubles;
import lombok.NonNull;
import org.apache.commons.math3.distribution.ChiSquaredDistribution;
import org.apache.commons.math3.distribution.TDistribution;
/**
* Common measures to determine the association, or dependence, of variables in a contingency table.
*
* @author David B. Bracewell
*/
public enum AssociationMeasures implements ContingencyTableCalculator {
/**
* Measures based on Mikolov et. al's "Distributed Representations of Words and Phrases and their Compositionality"
*/
Mikolov {
@Override
public double calculate(ContingencyTable table) {
Preconditions.checkArgument(table.rowCount() == table.columnCount() && table.rowCount() == 2,
"Only supports 2x2 contingency tables.");
double cooc = table.get(0, 0);
double w1Count = table.get(0, 1);
double w2Count = table.get(1, 0);
double minCount = Math.min(w1Count, w2Count);
double score = (cooc - minCount) / (w1Count * w2Count);
if (Double.isFinite(score)) {
return score;
}
return Double.MAX_VALUE;
}
},
/**
* Mutual Information
*/
MI {
@Override
public double calculate(@NonNull ContingencyTable table) {
double sum = 0d;
for (int row = 0; row < table.rowCount(); row++) {
for (int col = 0; col < table.columnCount(); col++) {
sum += table.get(row, col) / table.getSum() * DoubleMath.log2(
table.get(row, col) / table.getExpected(row, col));
}
}
return Doubles.isFinite(sum) ? sum : 0d;
}
},
/**
* Pointwise Mutual Information
*/
PMI {
@Override
public double calculate(@NonNull ContingencyTable table) {
Preconditions.checkArgument(table.rowCount() == table.columnCount() && table.rowCount() == 2,
"Only supports 2x2 contingency tables.");
return DoubleMath.log2(table.get(0, 0)) - DoubleMath.log2(table.getExpected(0, 0));
}
},
/**
* Odds Ratio
*/
ODDS_RATIO {
@Override
public double calculate(@NonNull ContingencyTable table) {
Preconditions.checkArgument(table.rowCount() == table.columnCount() && table.rowCount() == 2,
"Only supports 2x2 contingency tables.");
double v1 = table.get(0, 0) / table.get(0, 1);
double v2 = table.get(1, 0) / table.get(1, 1);
return v1 / v2;
}
@Override
public double pValue(@NonNull ContingencyTable table) {
NormalDistribution distribution = new NormalDistribution(0, 1);
return 1.0 - distribution.cumulativeProbability(Math.log(calculate(table)));
}
},
/**
* T-Score, which is a standard score with mean of 50
* and standard deviation of 10
*/
T_SCORE {
@Override
public double calculate(@NonNull ContingencyTable table) {
Preconditions.checkArgument(table.rowCount() == table.columnCount() && table.rowCount() == 2,
"Only supports 2x2 contingency tables.");
return (table.get(0, 0) - table.getExpected(0, 0)) / Math.sqrt(table.get(0, 0));
}
@Override
public double pValue(@NonNull ContingencyTable table) {
TDistribution distribution = new TDistribution(table.degreesOfFreedom());
return 1.0 - distribution.cumulativeProbability(calculate(table));
}
},
/**
* Normalized
* Pointwise Mutual Information
*/
NPMI {
@Override
public double calculate(@NonNull ContingencyTable table) {
Preconditions.checkArgument(table.rowCount() == table.columnCount() && table.rowCount() == 2,
"Only supports 2x2 contingency tables.");
if (table.get(0, 0) == 0) {
return -1;
}
return DoubleMath.log2(table.get(0, 0) / table.getExpected(0, 0)) /
-DoubleMath.log2(table.get(0, 0) / table.getSum());
}
},
/**
* Approximation to the Poisson Stirling likelihood.
*/
POISSON_STIRLING {
@Override
public double calculate(@NonNull ContingencyTable table) {
Preconditions.checkArgument(table.rowCount() == table.columnCount() && table.rowCount() == 2,
"Only supports 2x2 contingency tables.");
return table.get(0, 0) * (Math.log(table.get(0, 0) / table.getExpected(0, 0)) - 1);
}
},
/**
* x2 score
*/
CHI_SQUARE {
@Override
public double calculate(@NonNull ContingencyTable table) {
double sumSq = 0d;
for (int row = 0; row < table.rowCount(); row++) {
for (int col = 0; col < table.columnCount(); col++) {
double expected = table.getExpected(row, col);
sumSq += Math.pow(table.get(row, col) - expected, 2) / expected;
}
}
return Doubles.isFinite(sumSq) ? sumSq : 0d;
}
@Override
public double pValue(@NonNull ContingencyTable table) {
ChiSquaredDistribution distribution = new ChiSquaredDistribution(table.degreesOfFreedom());
return 1.0 - distribution.cumulativeProbability(calculate(table));
}
},
/**
* g^2 or log-likelihood
*/
G_SQUARE {
@Override
public double calculate(@NonNull ContingencyTable table) {
double sum = 0d;
for (int row = 0; row < table.rowCount(); row++) {
for (int col = 0; col < table.columnCount(); col++) {
sum += table.get(row, col) * Math.log(table.get(row, col) / table.getExpected(row, col));
}
}
return Doubles.isFinite(sum) ? 2 * sum : 0d;
}
@Override
public double pValue(@NonNull ContingencyTable table) {
ChiSquaredDistribution distribution = new ChiSquaredDistribution(table.degreesOfFreedom());
return 1.0 - distribution.cumulativeProbability(calculate(table));
}
},
/**
* Relative Risk
*/
RELATIVE_RISK {
@Override
public double calculate(@NonNull ContingencyTable table) {
Preconditions.checkArgument(table.rowCount() == table.columnCount() && table.rowCount() == 2,
"Only supports 2x2 contingency tables.");
double v1 = table.get(0, 0) / table.rowSum(0);
double v2 = table.get(1, 0) / table.rowSum(1);
return v1 / v2;
}
@Override
public double pValue(@NonNull ContingencyTable table) {
NormalDistribution distribution = new NormalDistribution(0, 1);
return 1.0 - distribution.cumulativeProbability(Math.log(calculate(table)));
}
}
}//END OF AssociationMeasures
© 2015 - 2025 Weber Informatics LLC | Privacy Policy