All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.deeplearning4j.berkeley.CounterMap Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
/*
 *
 *  * Copyright 2015 Skymind,Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 *
 */

package org.deeplearning4j.berkeley;



import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;


/**
 * Maintains counts of (key, value) pairs.  The map is structured so that for
 * every key, one can getFromOrigin a counter over values.  Example usage: keys might be
 * words with values being POS tags, and the count being the number of
 * occurences of that word/tag pair.  The sub-counters returned by
 * getCounter(word) would be count distributions over tags for that word.
 *
 * @author Dan Klein
 */
public class CounterMap implements java.io.Serializable {
    private static final long serialVersionUID = 1L;
    MapFactory mf;
    Map> counterMap;
    double defltVal = 0.0;
    private static Logger log = LoggerFactory.getLogger(CounterMap.class);

    public interface CountFunction {
        double count(V v1, V v2);
    }

    /**
     * Build a counter map by iterating pairwise over the list.
     * This assumes that the given pair wise items are
     * the same symmetrically. (The relation at i and i + 1 are the same)
     * It creates a counter map such that the pairs are:
     * count(v1,v2) and count(v2,v1) are the same
     * @param items the items to iterate over
     * @param countFunction the function to count
     * @param  the type to count
     * @return the counter map pairwise
     */
    public static  CounterMap runPairWise(final List items,final CountFunction countFunction) {
        ExecutorService exec = new ThreadPoolExecutor(Runtime.getRuntime().availableProcessors(),
                Runtime.getRuntime().availableProcessors(),
                0L, TimeUnit.MILLISECONDS,
                new LinkedBlockingQueue(), new RejectedExecutionHandler() {
            @Override
            public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                }
                executor.submit(r);
            }
        });

        final AtomicInteger begin = new AtomicInteger(0);
        final AtomicInteger end = new AtomicInteger(items.size() - 1);
        List> futures = new ArrayList<>();

        final CounterMap count = parallelCounterMap();
        for(int i = 0; i < items.size() / 2; i++) {
            futures.add(exec.submit(new Callable() {
                @Override
                public V call() throws Exception {
                    int begin2 = begin.incrementAndGet();
                    int end2 = end.decrementAndGet();
                    V v = items.get(begin2);
                    V v2 = items.get(end2);
                    log.trace("Processing " + "(" + begin2 + "," + end2 + ")");
                    //don't double count
                    if(count.getCount(v,v2) > 0)
                        return v;
                    double cost = countFunction.count(v,v2);
                    count.incrementCount(v,v2,cost);
                    count.incrementCount(v2,v,cost);
                    return v;
                }
            }));
        }

        int futureCount = 0;
        for(Future future : futures) {
            try {
                future.get();
                log.trace("Done with " + futureCount++);
            } catch (InterruptedException e) {
                e.printStackTrace();
            } catch (ExecutionException e) {
                e.printStackTrace();
            }
        }
        exec.shutdown();
        try {
            exec.awaitTermination(1,TimeUnit.MINUTES);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }


        return count;

    }

    /**
     * Returns a thread safe counter map
     * @return
     */
    public static  CounterMap parallelCounterMap() {
        MapFactory factory = new MapFactory() {

            private static final long serialVersionUID = 5447027920163740307L;

            @Override
            public Map buildMap() {
                return new ConcurrentHashMap<>();
            }

        };

        CounterMap totalWords = new CounterMap(factory,factory);
        return totalWords;
    }

    protected Counter ensureCounter(K key) {
        Counter valueCounter = counterMap.get(key);
        if (valueCounter == null) {
            valueCounter = buildCounter(mf);
            valueCounter.setDeflt(defltVal);
            counterMap.put(key, valueCounter);
        }
        return valueCounter;
    }

    public Collection> getCounters() {
        return counterMap.values();
    }

    /**
     * @return
     */
    protected Counter buildCounter(MapFactory mf)
    {
        return new Counter<>(mf);
    }

    /**
     * Returns the keys that have been inserted into this CounterMap.
     */
    public Set keySet() {
        return counterMap.keySet();
    }

    /**
     * Sets the count for a particular (key, value) pair.
     */
    public void setCount(K key, V value, double count) {
        Counter valueCounter = ensureCounter(key);
        valueCounter.setCount(value, count);
    }

//	public void setCount(Pair pair) {
//		
//	}

    /**
     * Increments the count for a particular (key, value) pair.
     */
    public void incrementCount(K key, V value, double count) {
        Counter valueCounter = ensureCounter(key);
        valueCounter.incrementCount(value, count);
    }

    /**
     * Gets the count of the given (key, value) entry, or zero if that entry is
     * not present.  Does not createComplex any objects.
     */
    public double getCount(K key, V value) {
        Counter valueCounter = counterMap.get(key);
        if (valueCounter == null) return defltVal;
        return valueCounter.getCount(value);
    }

    /**
     * Gets the sub-counter for the given key.  If there is none, a counter is
     * created for that key, and installed in the CounterMap.  You can, for
     * example, add to the returned empty counter directly (though you shouldn't).
     * This is so whether the key is present or not, modifying the returned
     * counter has the same effect (but don't do it).
     */
    public Counter getCounter(K key) {
        return ensureCounter(key);
    }

    public void incrementAll(Map map, double count) {
        for (Map.Entry entry : map.entrySet()) {
            incrementCount(entry.getKey(), entry.getValue(), count);
        }
    }

    public void incrementAll(CounterMap cMap) {
        for (Map.Entry> entry: cMap.counterMap.entrySet()) {
            K key = entry.getKey();
            Counter innerCounter = entry.getValue();
            for (Map.Entry innerEntry: innerCounter.entrySet()) {
                V value = innerEntry.getKey();
                incrementCount(key,value,innerEntry.getValue());
            }
        }
    }

    /**
     * Gets the total count of the given key, or zero if that key is
     * not present.  Does not createComplex any objects.
     */
    public double getCount(K key) {
        Counter valueCounter = counterMap.get(key);
        if (valueCounter == null) return 0.0;
        return valueCounter.totalCount();
    }

    /**
     * Returns the total of all counts in sub-counters.  This implementation is
     * linear; it recalculates the total each time.
     */
    public double totalCount() {
        double total = 0.0;
        for (Map.Entry> entry : counterMap.entrySet()) {
            Counter counter = entry.getValue();
            total += counter.totalCount();
        }
        return total;
    }

    /**
     * Returns the total number of (key, value) entries in the CounterMap (not
     * their total counts).
     */
    public int totalSize() {
        int total = 0;
        for (Map.Entry> entry : counterMap.entrySet()) {
            Counter counter = entry.getValue();
            total += counter.size();
        }
        return total;
    }

    /**
     * The number of keys in this CounterMap (not the number of key-value entries
     * -- use totalSize() for that)
     */
    public int size() {
        return counterMap.size();
    }

    /**
     * True if there are no entries in the CounterMap (false does not mean
     * totalCount > 0)
     */
    public boolean isEmpty() {
        return size() == 0;
    }

    /**
     * Finds the key with maximum count.  This is a linear operation, and ties are broken arbitrarily.
     *
     * @return a key with minumum count
     */
    public Pair argMax() {
        double maxCount = Double.NEGATIVE_INFINITY;
        Pair maxKey = null;
        for (Map.Entry> entry : counterMap.entrySet()) {
            Counter counter = entry.getValue();
            V localMax = counter.argMax();
            if (counter.getCount(localMax) > maxCount || maxKey == null) {
                maxKey = new Pair<>(entry.getKey(), localMax);
                maxCount = counter.getCount(localMax);
            }
        }
        return maxKey;
    }


    public String toString(int maxValsPerKey) {
        StringBuilder sb = new StringBuilder("[\n");
        for (Map.Entry> entry : counterMap.entrySet()) {
            sb.append("  ");
            sb.append(entry.getKey());
            sb.append(" -> ");
            sb.append(entry.getValue().toString(maxValsPerKey));
            sb.append("\n");
        }
        sb.append("]");
        return sb.toString();
    }

    @Override
    public String toString() {
        return toString(20);
    }

    public String toString(Collection keyFilter) {
        StringBuilder sb = new StringBuilder("[\n");
        for (Map.Entry> entry : counterMap.entrySet()) {
            String keyString = entry.getKey().toString();
            if (keyFilter != null && !keyFilter.contains(keyString)) {
                continue;
            }
            sb.append("  ");
            sb.append(keyString);
            sb.append(" -> ");
            sb.append(entry.getValue().toString(20));
            sb.append("\n");
        }
        sb.append("]");
        return sb.toString();
    }


    public CounterMap(CounterMap cm)
    {
        this();
        incrementAll(cm);
    }

    public CounterMap() {
        this(false);
    }


    public boolean isEqualTo(CounterMap map)
    {
        boolean tmp = true;
        CounterMap bigger = map.size() > size() ? map : this;
        for (K k : bigger.keySet())
        {
            tmp &= map.getCounter(k).isEqualTo(getCounter(k));
        }
        return tmp;
    }





    public CounterMap(MapFactory> outerMF, MapFactory innerMF) {
        mf = innerMF;
        counterMap = outerMF.buildMap();
    }

    public CounterMap(boolean identityHashMap) {
        this(identityHashMap ? new MapFactory.IdentityHashMapFactory>()
                        : new MapFactory.HashMapFactory>(),
                identityHashMap ? new MapFactory.IdentityHashMapFactory()
                        : new MapFactory.HashMapFactory());
    }

    public static void main(String[] args) {
        CounterMap bigramCounterMap = new CounterMap<>();
        bigramCounterMap.incrementCount("people", "run", 1);
        bigramCounterMap.incrementCount("cats", "growl", 2);
        bigramCounterMap.incrementCount("cats", "scamper", 3);
        System.out.println(bigramCounterMap);
        System.out.println("Entries for cats: " + bigramCounterMap.getCounter("cats"));
        System.out.println("Entries for dogs: " + bigramCounterMap.getCounter("dogs"));
        System.out.println("Count of cats scamper: "
                + bigramCounterMap.getCount("cats", "scamper"));
        System.out.println("Count of snakes slither: "
                + bigramCounterMap.getCount("snakes", "slither"));
        System.out.println("Total size: " + bigramCounterMap.totalSize());
        System.out.println("Total count: " + bigramCounterMap.totalCount());
        System.out.println(bigramCounterMap);
    }

    public void normalize() {
        for (K key : keySet()) {
            getCounter(key).normalize();
        }
    }

    public void normalizeWithDiscount(double discount) {
        for (K key : keySet()) {
            Counter ctr = getCounter(key);
            double totalCount = ctr.totalCount();
            for (V value : ctr.keySet()) {
                ctr.setCount(value, (ctr.getCount(value) - discount) / totalCount);
            }
        }
    }

    /**
     * Constructs reverse CounterMap where the count of a pair (k,v)
     * is the count of (v,k) in the current CounterMap
     * @return
     */
    public CounterMap invert() {
        CounterMap invertCounterMap = new CounterMap<>();
        for (K key: this.keySet()) {
            Counter keyCounts = this.getCounter(key);
            for (V val: keyCounts.keySet()) {
                double count = keyCounts.getCount(val);
                invertCounterMap.setCount(val, key, count);
            }
        }
        return invertCounterMap;
    }

    /**
     * Scale all entries in CounterMap
     * by scaleFactor
     * @param scaleFactor
     */
    public void scale(double scaleFactor) {
        for (K key: keySet()) {
            Counter counts = getCounter(key);
            counts.scale(scaleFactor);
        }
    }

    public boolean containsKey(K key) {
        return counterMap.containsKey(key);
    }

    public Iterator> getPairIterator() {

        class PairIterator implements Iterator> {

            Iterator outerIt ;
            Iterator innerIt ;
            K curKey ;

            public PairIterator() {
                outerIt = keySet().iterator();
            }

            private boolean advance() {
                if (innerIt == null || !innerIt.hasNext()) {
                    if (!outerIt.hasNext()) {
                        return false;
                    }
                    curKey = outerIt.next();
                    innerIt = getCounter(curKey).keySet().iterator();
                }
                return true;
            }

            public boolean hasNext() {
                return advance();
            }

            public Pair next() {
                advance();
                assert curKey != null;
                return Pair.newPair(curKey, innerIt.next());
            }

            public void remove() {
                // TODO Auto-generated method stub

            }

        }
        return new PairIterator();
    }

    public Set>> getEntrySet() {
        // TODO Auto-generated method stub
        return counterMap.entrySet();
    }

    public void removeKey(K oldIndex)
    {
        counterMap.remove(oldIndex);
    }

    public void setCounter(K newIndex, Counter counter)
    {
        counterMap.put(newIndex, counter);

    }

    public void setDefault(double defltVal) {
        this.defltVal = defltVal;
        for (Counter vCounter : counterMap.values()) {
            vCounter.setDeflt(defltVal);
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy