![JAR search and dependency download from the Maven repository](/logo.png)
eu.project.ttc.models.CrossTable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of termsuite-core Show documentation
Show all versions of termsuite-core Show documentation
A Java UIMA-based toolbox for multilingual and efficient terminology extraction an multilingual term alignment
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright 2, 2015nership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package eu.project.ttc.models;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.lang3.mutable.MutableDouble;
import com.google.common.collect.Maps;
import eu.project.ttc.metrics.AssociationRate;
/**
*
* An index (often called cross table) that helps to compute
* the association rate of two terms, i.e. their amount of co-occurrency
* in the corpus.
*
* @author Damien Cram
*
*/
public class CrossTable {
/**
* The term index of the corpus
*/
private TermIndex termIndex;
private Map aPlusB;
private Map aPlusC;
private int totalCoOccurrences;
// private int totalFrequency;
/**
*
* Constructs this cross table with a term index and inits the table
* with all co-occurrences of the term index's terms. This init process is
* based on terms' context vectors, which must be computed before
* invoking this constructor.
*
* @see Term#getContextVector()
* @throws NullPointerException
* if terms' context vectors are not already computed
* @param termIndex
* The term index of the corpus
* @throws NullPointerException
* when the context vectors of term index terms are not
*/
public CrossTable(TermIndex termIndex) {
super();
this.termIndex = termIndex;
init();
}
private void init() {
this.aPlusB = Maps.newHashMap();
this.aPlusC = Maps.newHashMap();
this.totalCoOccurrences = 0;
// this.totalFrequency = 0;
Term term;
for (Iterator it1 = this.termIndex.getTerms().iterator(); it1.hasNext() ;) {
term = it1.next();
// this.totalFrequency++;
if(!term.isContextVectorComputed())
continue;
ContextVector context = term.getContextVector();
for (ContextVector.Entry entry : context.getEntries()) {
this.totalCoOccurrences += entry.getNbCooccs();
getScoreFromMap(this.aPlusB, term).add(entry.getNbCooccs());
getScoreFromMap(this.aPlusC, entry.getCoTerm()).add(term.getFrequency());
}
}
}
private MutableDouble getScoreFromMap(Map aScoreMap, Term key) {
if(aScoreMap.get(key) == null)
aScoreMap.put(key, new MutableDouble(0));
return aScoreMap.get(key);
}
/**
*
* Computes coefficients a, b, c and d (available) and computes the association
* rate based on these coefficients.
*
* These coefficients are made available through the not-thread safe methods #getLastX()
.
*
* @see #getLastA()
* @see #getLastB()
* @see #getLastC()
* @see #getLastD()
*
* @param rate
* the association rate measure
* @param x
* the base term
* @param y
* the co term
* @return
* the association rate
*/
public double computeRate(AssociationRate rate, Term x, Term y) {
// A = (x & y)
int a = (int)x.getContextVector().getNbCooccs(y);
// B = (x & not(y))
MutableDouble a_plus_b = this.aPlusB.get(x);
int b = a_plus_b == null ? 0 : a_plus_b.intValue() - a;
// int b = x.getFrequency() - a;
// C = (not(x) & y)
MutableDouble a_plus_c = this.aPlusB.get(y);
int c = a_plus_c == null ? 0 : a_plus_c.intValue() - a;
// int c = y.getFrequency() - a;
// D = (not(x) & not(y))
int d = this.totalCoOccurrences - a - b - c;
// int d = this.totalFrequency - a - b - c;
// for DEBUG purpose only
lastA = a;
lastB = b;
lastC = c;
lastD = d;
// END DEBUG
final double value = rate.getValue(a, b, c, d);
return value;
}
private int lastA;
private int lastB;
private int lastC;
private int lastD;
/**
* The number of times coTerm
id a co-occurrence of term
*
* WARNING : not thread safe !
*
* @return
*/
public int getLastA() {
return lastA;
}
/**
* The number of times a co-occurrence of term
is not coTerm
*
* WARNING : not thread safe !
*
* @return
*/
public int getLastB() {
return lastB;
}
/**
* The number of times coTerm
is a co-occurrence of another term
* than term
.
*
* WARNING : not thread safe !
*
* @return
*/
public int getLastC() {
return lastC;
}
/**
* The number of times neither term
nor coTerm
is a co-occurrence of something else
*
* WARNING : not thread safe !
*
* @return
*/
public int getLastD() {
return lastD;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy