All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.project.ttc.models.CrossTable Maven / Gradle / Ivy

Go to download

A Java UIMA-based toolbox for multilingual and efficient terminology extraction an multilingual term alignment

There is a newer version: 3.0.10
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright 2, 2015nership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package eu.project.ttc.models;

import java.util.Iterator;
import java.util.Map;

import org.apache.commons.lang3.mutable.MutableDouble;

import com.google.common.collect.Maps;

import eu.project.ttc.metrics.AssociationRate;

/**
 * 
 * An index (often called cross table) that helps to compute 
 * the association rate of two terms, i.e. their amount of co-occurrency
 * in the corpus.
 * 
 * @author Damien Cram
 *
 */
public class CrossTable {

	/**
	 * The term index of the corpus
	 */
    private TermIndex termIndex;

    private Map aPlusB;
    private Map aPlusC;
    private int totalCoOccurrences;
//    private int totalFrequency;
    
    /**
     * 
     * Constructs this cross table with a term index and inits the table 
     * with all co-occurrences of the term index's terms. This init process is
     * based on terms' context vectors, which must be computed before
     * invoking this constructor.
     * 
     * @see Term#getContextVector()
     * @throws NullPointerException
     * 			if terms' context vectors are not already computed
     * @param termIndex
     * 			The term index of the corpus
     * @throws NullPointerException
     * 			when the context vectors of term index terms are not
     */
    public CrossTable(TermIndex termIndex) {
		super();
		this.termIndex = termIndex;
		init();
	}


	private void init() {
        this.aPlusB = Maps.newHashMap();
        this.aPlusC = Maps.newHashMap();
        this.totalCoOccurrences = 0;
//        this.totalFrequency = 0;
        
        Term term;
        for (Iterator it1 = this.termIndex.getTerms().iterator(); it1.hasNext() ;) {
            term = it1.next();
//            this.totalFrequency++;
            if(!term.isContextVectorComputed())
            	continue;
        	ContextVector context = term.getContextVector();
            for (ContextVector.Entry entry : context.getEntries()) {
            	this.totalCoOccurrences += entry.getNbCooccs();
                getScoreFromMap(this.aPlusB, term).add(entry.getNbCooccs());
                getScoreFromMap(this.aPlusC, entry.getCoTerm()).add(term.getFrequency());
            }
        }
    }


    private MutableDouble getScoreFromMap(Map aScoreMap, Term key) {
		if(aScoreMap.get(key) == null)
			aScoreMap.put(key, new MutableDouble(0));
		return aScoreMap.get(key);
	}

    /**
     * 
     * Computes coefficients a, b, c and d (available) and computes the association
     * rate based on these coefficients.
     * 
     * These coefficients are made available through the not-thread safe methods #getLastX().
     * 
     * @see #getLastA()
     * @see #getLastB()
     * @see #getLastC()
     * @see #getLastD()
     * 
     * @param rate
     * 			the association rate measure
     * @param x
     * 			the base term
     * @param y
     * 			the co term
     * @return
     * 			the association rate
     */
    public double computeRate(AssociationRate rate, Term x, Term y) {

    	// A = (x & y)
    	int a = (int)x.getContextVector().getNbCooccs(y);

    	// B = (x & not(y))
    	MutableDouble a_plus_b = this.aPlusB.get(x);
    	int b = a_plus_b == null ? 0 : a_plus_b.intValue() - a;
//        int b = x.getFrequency() - a;
    	
         // C = (not(x) & y)
    	MutableDouble a_plus_c = this.aPlusB.get(y);
    	int c = a_plus_c == null ? 0 : a_plus_c.intValue() - a;
//        int c = y.getFrequency() - a;
         
         // D = (not(x) & not(y))
    	int d = this.totalCoOccurrences -  a - b - c;
//    	int d = this.totalFrequency - a - b - c;
        
    	// for DEBUG purpose only
    	lastA = a;
    	lastB = b;
    	lastC = c;
    	lastD = d;
    	// END DEBUG
    	
    	
    	final double value = rate.getValue(a, b, c, d);
    	
		return value;
    }

	private int lastA;
    private int lastB;
    private int lastC;
    private int lastD;

    /**
     * The number of times coTerm id a co-occurrence of term
     * 
     * WARNING : not thread safe !
     * 
     * @return
     */
	public int getLastA() {
		return lastA;
	}

    /**
     * The number of times a co-occurrence of term is not coTerm
     * 
     * WARNING : not thread safe !
     *      
     * @return
     */
	public int getLastB() {
		return lastB;
	}

    /**
     * The number of times coTerm is a co-occurrence of another term 
     * than term.
     * 
     * WARNING : not thread safe !
     *
     * @return
     */
	public int getLastC() {
		return lastC;
	}

    /**
     * The number of times neither term nor coTerm is a co-occurrence of something else
     * 
     * WARNING : not thread safe !
     * 
     * @return
     */
	public int getLastD() {
		return lastD;
	}
}
    




© 2015 - 2025 Weber Informatics LLC | Privacy Policy