All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.univnantes.termsuite.model.ContextVector Maven / Gradle / Ivy

Go to download

A Java UIMA-based toolbox for multilingual and efficient terminology extraction an multilingual term alignment

The newest version!
/*******************************************************************************
 * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique)
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 *
 *******************************************************************************/
package fr.univnantes.termsuite.model;

import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;

import org.apache.commons.lang3.mutable.MutableInt;

import com.google.common.base.Joiner;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import fr.univnantes.termsuite.engines.contextualizer.AssociationRate;
import fr.univnantes.termsuite.engines.contextualizer.CrossTable;

/**
 * 
 * A sorted Term/Frequency map used as a context vector during alignment operations.
 * 
 * @author Damien Cram
 *
 */
public class ContextVector {
	private ConcurrentMap entries = Maps.newConcurrentMap();
	private List _sortedEntries = null;
	private int totalCooccurrences = 0;

	private Term term;
	
	/**
	 * Default constructor for {@link ContextVector}. Must be used if no normalization is required.
	 */
	public ContextVector() {
	}

	
	/**
	 * 
	 * Construct a context vector with a back reference to its owner term.
	 * The parent term is useful for normalization.
	 * 
	 * @see ContextVector#toAssocRateVector(CrossTable, AssociationRate, boolean)
	 * @param term
	 * 			the owner term
	 */
	public ContextVector(Term term) {
		this.term = term;
	}

	public void addAllCooccurrences(Iterator it) {
		while(it.hasNext()) {
			addCooccurrence(it.next());
		}
	}

	public void addCooccurrence(TermOccurrence occ) {
		Term term2 = occ.getTerm();
		if(!entries.containsKey(term2))
			entries.put(term2, new Entry(term2));
		entries.get(term2).increment();
		this.totalCooccurrences++;
		setDirty();
	}

	public void addEntry(Term coTerm, int nbCooccs, double assocRate) {
		Preconditions.checkNotNull(coTerm);
		Term termToAdd = coTerm;
		
		if(entries.containsKey(termToAdd))
			this.totalCooccurrences -= entries.get(termToAdd).getNbCooccs();
		entries.put(termToAdd, new Entry(termToAdd, nbCooccs, assocRate));
		this.totalCooccurrences+=nbCooccs;
		setDirty();
	}
	
	public void removeCoTerm(Term term) {
		Term termToAdd = term;
		if(this.entries.containsKey(termToAdd)) {
			Entry e = this.entries.remove(termToAdd);
			this.totalCooccurrences -= e.getNbCooccs();
		}
		setDirty();
	}

	private void setDirty() {
		this._sortedEntries = null;
	}
	
	/**
	 * Gives the list of terms in this context vector sorted by frequency
	 * (most frequent first)
	 * 
	 * @return
	 */
	public List getEntries() {
		if(_sortedEntries == null) {
			this._sortedEntries = Lists.newArrayListWithCapacity(entries.size());
			for(Map.Entry e:entries.entrySet())
				this._sortedEntries.add(e.getValue());
			Collections.sort(this._sortedEntries);
		}
		return _sortedEntries;
	}
	
	/**
	 * Normalizes all assocRates so that their sum is 1
	 */
	public void normalize() {
		double sum = 0;
		for(Entry e:entries.values())
			sum+=e.getAssocRate();
		if(sum != 0) {
			for(Entry e:entries.values())
				e.setAssocRate(e.getAssocRate()/sum);
		}
		setDirty();
	}
		

	public class Entry implements Comparable {
		private static final String STRING_FORMAT = "%s: %.3f (%d)";
		private Term coTerm;
		private MutableInt nbCooccs;
		private double assocRate;
		private Entry(Term term) {
			this(term, 0, 0d);
		}
		private Entry(Term coTerm, int nbCoccurrences, double assocRate) {
			this.coTerm = coTerm;
			this.nbCooccs = new MutableInt(nbCoccurrences);
			this.assocRate = assocRate;
		}
		private void increment() {
			this.nbCooccs.increment();
		}
		public void setAssocRate(double assocRate) {
			this.assocRate = assocRate;
		}
		public double getAssocRate() {
			return assocRate;
		}
		public Term getCoTerm() {
			return coTerm;
		}
		public int getNbCooccs() {
			return nbCooccs.intValue();
		}
		
		@Override
		public boolean equals(Object obj) {
			if (obj instanceof Entry) 
				return Objects.equal(((Entry) obj).coTerm, this.coTerm) 
					&& Objects.equal(((Entry) obj).nbCooccs, this.nbCooccs)
					&& Objects.equal(((Entry) obj).assocRate, this.assocRate);
			else
				return false;
		}
		
		@Override
		public int hashCode() {
			return this.coTerm.getGroupingKey().hashCode();
		}
		
		@Override
		public int compareTo(Entry o) {
			return ComparisonChain.start()
					.compare(o.assocRate, this.assocRate)
					.compare(o.nbCooccs, this.nbCooccs)
					.result();
		}
		
		@Override
		public String toString() {
			return String.format(STRING_FORMAT, coTerm.getLemma(), assocRate, nbCooccs.intValue());
		}
		
	}
	
	public int getNbCooccs(Term term) {
		return entries.containsKey(term) ? entries.get(term).getNbCooccs() : 0;
	}

	public void setAssocRate(Term coTerm, double assocRate) {
		entries.get(coTerm).setAssocRate(assocRate);
		setDirty();
	}

	public double getAssocRate(Term term) {
		return entries.containsKey(term) ? entries.get(term).getAssocRate() : 0d;
	}

	public Set terms() {
		return entries.keySet();
	}

	
	@Override
	public int hashCode() {
		return Objects.hashCode(this.term);
	}
	
	@Override
	public boolean equals(Object obj) {
		if (obj instanceof ContextVector) {
			ContextVector v = (ContextVector) obj;
			return Objects.equal(this.term, v.term) && Iterables.elementsEqual(this.getEntries(), v.getEntries());
		} else
			return false;
	}
	
	public Term getTerm() {
		return term;
	}
	

	private final static String STRING_FORMAT = "<%s> {%s}";
	@Override
	public String toString() {
		return String.format(STRING_FORMAT, this.term == null ? "no parent term" : this.term.getLemma(), Joiner.on(", ").join(getEntries()));
	}


	public int getTotalCoccurrences() {
		return this.totalCooccurrences;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy