All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucla.sspace.util.IndexedCounter Maven / Gradle / Ivy

Go to download

The S-Space Package is a collection of algorithms for building Semantic Spaces as well as a highly-scalable library for designing new distributional semantics algorithms. Distributional algorithms process text corpora and represent the semantic for words as high dimensional feature vectors. This package also includes matrices, vectors, and numerous clustering algorithms. These approaches are known by many names, such as word spaces, semantic spaces, or distributed semantics and rest upon the Distributional Hypothesis: words that appear in similar contexts have similar meanings.

The newest version!
/*
 * Copyright 2011 David Jurgens
 *
 * This file is part of the S-Space package and is covered under the terms and
 * conditions therein.
 *
 * The S-Space package is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation and distributed hereunder to you.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
 * EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
 * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
 * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
 * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
 * RIGHTS.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see .
 */

package edu.ucla.sspace.util;

import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;

import gnu.trove.iterator.TIntIntIterator;
import gnu.trove.map.TIntIntMap;
import gnu.trove.map.hash.TIntIntHashMap;


/** 
 * A utility for counting unique instance of an object.  Unlike an {@link
 * ObjectCounter}, this class uses an {@link Indexer} to identify object
 * equivalence and record the count.  This class is intended for use when
 * multiple {@link Counter} instances are needed to be run in parallel or to
 * keep a record of the counters without storing references to the object
 * instances in each of the {@code Counter} instances.  This results in a linear
 * space savings at the cost of some minor performance from needing to index the
 * objects.
 *
 * 

This class supports iterating over the set of instances being counted as * well as the instances and counts together. All collections views that are * returned are unmodifiable and will throw an exception if a mutating method is * called. * *

This class is not thread-safe * * @param T the type of object being counted. * * @see Indexer */ public class IndexedCounter implements Counter, java.io.Serializable { private static final long serialVersionUID = 1L; /** * A mapping from an object to the number of times it has been seen */ private final Indexer objectIndices; /** * The mapping from object indices to their counts */ private final TIntIntMap indexToCount; /** * A flag for whether to add objects that aren't mapped to indices to the * {@code objectIndices} Indexer. */ private final boolean allowNewIndices; /** * The total number of objects that have been counted */ private int sum; /** * Creates an empty {@code Counter} that uses the provided indexer to map * countable values to corresponding indices. Should an object be counted * that was not in the indexer, a new index will be added for it. */ public IndexedCounter(Indexer objectIndices) { this(objectIndices, true); } /** * Creates an empty {@code Counter} that uses the provided indexer to map * countable values to corresponding indices. Objects wth no corresponding * indices will only be counted (and added to the Indexer) if {@code * allowNewIndices} is true. * * @param allowNewIndices {@code true} if objects with new index should be * added to {@code objectIndices} */ public IndexedCounter(Indexer objectIndices, boolean allowNewIndices) { this.objectIndices = objectIndices; this.allowNewIndices = allowNewIndices; indexToCount = new TIntIntHashMap(); sum = 0; } /** * Adds the counts from the provided {@code Counter} to the current counts, * adding new elements as needed. */ public void add(Counter counter) { for (Map.Entry e : counter) count(e.getKey(), e.getValue()); } /** * Counts the object, increasing its total count by 1. */ public int count(T obj) { int objIndex = (allowNewIndices) ? objectIndices.index(obj) : objectIndices.find(obj); if (objIndex < 0) return 0; int curCount = indexToCount.get(objIndex); indexToCount.put(objIndex, curCount + 1); sum++; return curCount + 1; } /** * Counts the object, increasing its total count by the specified positive amount. * * @param count a positive value for the number of times the object occurred * * @throws IllegalArgumentException if {@code count} is not a positive value. */ public int count(T obj, int count) { if (count < 1) throw new IllegalArgumentException("Count must be positive: " + count); int objIndex = (allowNewIndices) ? objectIndices.index(obj) : objectIndices.find(obj); if (objIndex < 0) return 0; int curCount = indexToCount.get(objIndex); indexToCount.put(objIndex, curCount + count); sum += count; return curCount + count; } /** * {@inheritDoc} */ public void countAll(Collection c) { for (T t : c) count(t); } public boolean equals(Object o) { throw new Error(); } /** * Returns the number of times the specified object has been seen by this * counter. */ public int getCount(T obj) { int objIndex = (allowNewIndices) ? objectIndices.index(obj) : objectIndices.find(obj); return (objIndex < 0) ? 0 : indexToCount.get(objIndex); } /** * Returns the frequency of this object relative to the counts of all other * objects. This value may also be interpreted as the probability of the * instance of {@code obj} being seen in the items that have been counted. */ public double getFrequency(T obj) { double count = getCount(obj); return (sum == 0) ? 0 : count / sum; } public int hashCode() { throw new Error(); } /** * Returns a view of the items currently being counted. The returned view * is read-only; any attempts to modify this view will throw an {@link * UnsupportedOperationException}. */ public Set items() { return new ItemSet(); } /** * Returns an interator over the elements that have been counted thusfar and * their respective counts. */ public Iterator> iterator() { return new EntryIter(); } /** * Returns the element that currently has the largest count. If no objects * have been counted, {@code null} is returned. Ties in counts are * arbitrarily broken. */ public T max() { if (sum == 0) return null; int maxCount = -1; int maxIndex = -1; for (TIntIntIterator it = indexToCount.iterator(); it.hasNext(); ) { it.advance(); if (it.value() > maxCount) { maxIndex = it.key(); maxCount = it.value(); } } return objectIndices.lookup(maxIndex); } /** * Returns the element that currently has the smallest count. If no objects * have been counted, {@code null} is returned. Ties in counts are * arbitrarily broken. */ public T min() { if (sum == 0) return null; int minCount = Integer.MAX_VALUE; int minIndex = -1; for (TIntIntIterator it = indexToCount.iterator(); it.hasNext(); ) { it.advance(); if (it.value() < minCount) { minIndex = it.key(); minCount = it.value(); } } return objectIndices.lookup(minIndex); } /** * {@inheritDoc} Note that this does not affect the object indices. */ public void reset() { indexToCount.clear(); sum = 0; } /** * {@inheritDoc} */ public int size() { return indexToCount.size(); } /** * {@inheritDoc} */ public int sum() { return sum; } @Override public String toString() { return indexToCount.toString(); } private class ItemSet extends AbstractSet { @Override public boolean contains(Object o) { @SuppressWarnings("unchecked") T t = (T)o; int idx = objectIndices.find(t); return indexToCount.get(idx) > 0; } @Override public boolean isEmpty() { return IndexedCounter.this.size() == 0; } @Override public Iterator iterator() { return new ItemIter(); } @Override public int size() { return IndexedCounter.this.size(); } } private class ItemIter implements Iterator { private final Iterator> itemToIndex; private T next; public ItemIter() { itemToIndex = objectIndices.iterator(); next = null; advance(); } private void advance() { while (next == null && itemToIndex.hasNext()) { Map.Entry e = itemToIndex.next(); int count = indexToCount.get(e.getValue()); if (count > 0) next = e.getKey(); } } public boolean hasNext() { return next != null; } public T next() { if (next == null) throw new NoSuchElementException(); T t = next; advance(); return t; } public void remove() { throw new UnsupportedOperationException(); } } private class EntryIter implements Iterator> { private final Iterator> itemToIndex; private Map.Entry next; public EntryIter() { itemToIndex = objectIndices.iterator(); next = null; advance(); } private void advance() { next = null; while (next == null && itemToIndex.hasNext()) { Map.Entry e = itemToIndex.next(); int count = indexToCount.get(e.getValue()); if (count > 0) next = new AbstractMap.SimpleImmutableEntry( e.getKey(), count); } } public boolean hasNext() { return next != null; } public Map.Entry next() { if (next == null) throw new NoSuchElementException(); Map.Entry e = next; advance(); return e; } public void remove() { throw new UnsupportedOperationException(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy