All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.stats.TwoDimensionalIntCounter Maven / Gradle / Ivy

Go to download

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.stats;

import java.io.Serializable;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;

import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.MapFactory;
import edu.stanford.nlp.util.MutableInteger;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;


/**
 * A class representing a mapping between pairs of typed objects and int values.
 * (Copied from TwoDimensionalCounter)
 *
 * @author Teg Grenager
 * @author Angel Chang
 */
public class TwoDimensionalIntCounter implements Serializable {

  private static final long serialVersionUID = 1L;

  // the outermost Map
  private Map> map;

  // the total of all counts
  private int total;

  // the MapFactory used to make new maps to counters
  private MapFactory> outerMF;

  // the MapFactory used to make new maps in the inner counter
  private MapFactory innerMF;

  private int defaultValue = 0;

  public void defaultReturnValue(double rv) { defaultValue = (int) rv; }

  public void defaultReturnValue(int rv) { defaultValue = rv; }

  public int defaultReturnValue() { return defaultValue; }

  @Override
  public boolean equals(Object o) {
    if (o == this) return true;
    if (!(o instanceof TwoDimensionalIntCounter)) return false;

    return ((TwoDimensionalIntCounter) o).map.equals(map);
  }

  @Override
  public int hashCode() {
    return map.hashCode() + 17;
  }

  /**
   * @return the inner Counter associated with key o
   */
  public IntCounter getCounter(K1 o) {
    IntCounter c = map.get(o);
    if (c == null) {
      c = new IntCounter<>(innerMF);
      c.setDefaultReturnValue(defaultValue);
      map.put(o, c);
    }
    return c;
  }

  public Set>> entrySet(){
    return map.entrySet();
  }

  /**
   * @return total number of entries (key pairs)
   */
  public int size() {
    int result = 0;
    for (K1 o : firstKeySet()) {
      IntCounter c = map.get(o);
      result += c.size();
    }
    return result;
  }

  public boolean containsKey(K1 o1, K2 o2) {
    if (!map.containsKey(o1)) return false;
    IntCounter c = map.get(o1);
    return c.containsKey(o2);
  }

  /**
   */
  public void incrementCount(K1 o1, K2 o2) {
    incrementCount(o1, o2, 1);
  }

  /**
   */
  public void incrementCount(K1 o1, K2 o2, double count) {
    incrementCount(o1, o2, (int) count);
  }

  /**
   */
  public void incrementCount(K1 o1, K2 o2, int count) {
    IntCounter c = getCounter(o1);
    c.incrementCount(o2, count);
    total += count;
  }

  /**
   */
  public void decrementCount(K1 o1, K2 o2) {
    incrementCount(o1, o2, -1);
  }

  /**
   */
  public void decrementCount(K1 o1, K2 o2, double count) {
    incrementCount(o1, o2, -count);
  }

  /**
   */
  public void decrementCount(K1 o1, K2 o2, int count) {
    incrementCount(o1, o2, -count);
  }

  /**
   */
  public void setCount(K1 o1, K2 o2, double count) {
    setCount(o1, o2, (int) count);
  }

  /**
   */
  public void setCount(K1 o1, K2 o2, int count) {
    IntCounter c = getCounter(o1);
    int oldCount = getCount(o1, o2);
    total -= oldCount;
    c.setCount(o2, count);
    total += count;
  }

  public int remove(K1 o1, K2 o2) {
    IntCounter c = getCounter(o1);
    int oldCount = getCount(o1, o2);
    total -= oldCount;
    c.remove(o2);
    if (c.isEmpty()) {
      map.remove(o1);
    }
    return oldCount;
  }

  /**
   */
  public int getCount(K1 o1, K2 o2) {
    IntCounter c = getCounter(o1);
    if (c.totalCount() == 0 && !c.keySet().contains(o2)) { return defaultReturnValue(); }
    return c.getIntCount(o2);
  }

  /**
   * Takes linear time.
   *
   */
  public int totalCount() {
    return total;
  }

  /**
   */
  public int totalCount(K1 k1) {
    IntCounter c = getCounter(k1);
    return c.totalIntCount();
  }

  public IntCounter totalCounts() {
    IntCounter tc = new IntCounter<>();
    for (K1 k1:map.keySet()) {
      tc.setCount(k1, map.get(k1).totalCount());
    }
    return tc;
  }

  public Set firstKeySet() {
    return map.keySet();
  }

  /**
   * replace the counter for K1-index o by new counter c
   */
  public IntCounter setCounter(K1 o, IntCounter c) {
    IntCounter old = getCounter(o);
    total -= old.totalIntCount();
    map.put(o, c);
    total += c.totalIntCount();
    return old;
  }

  /**
   * Produces a new ConditionalCounter.
   *
   * @return a new ConditionalCounter, where order of indices is reversed
   */
  @SuppressWarnings({"unchecked"})
  public static  TwoDimensionalIntCounter reverseIndexOrder(TwoDimensionalIntCounter cc) {
    // the typing on the outerMF is violated a bit, but it'll work....
    TwoDimensionalIntCounter result = new TwoDimensionalIntCounter<>(
            (MapFactory) cc.outerMF, (MapFactory) cc.innerMF);

    for (K1 key1 : cc.firstKeySet()) {
      IntCounter c = cc.getCounter(key1);
      for (K2 key2 : c.keySet()) {
        int count = c.getIntCount(key2);
        result.setCount(key2, key1, count);
      }
    }
    return result;
  }

  /**
   * A simple String representation of this TwoDimensionalCounter, which has
   * the String representation of each key pair
   * on a separate line, followed by the count for that pair.
   * The items are tab separated, so the result is a tab-separated value (TSV)
   * file.  Iff none of the keys contain spaces, it will also be possible to
   * treat this as whitespace separated fields.
   */
  @Override
  public String toString() {
    StringBuilder buff = new StringBuilder();
    for (K1 key1 : map.keySet()) {
      IntCounter c = getCounter(key1);
      for (K2 key2 : c.keySet()) {
        double score = c.getCount(key2);
        buff.append(key1).append("\t").append(key2).append("\t").append(score).append("\n");
      }
    }
    return buff.toString();
  }

  @SuppressWarnings({"unchecked"})
  public String toMatrixString(int cellSize) {
    List firstKeys = new ArrayList<>(firstKeySet());
    List secondKeys = new ArrayList<>(secondKeySet());
    Collections.sort((List)firstKeys);
    Collections.sort((List)secondKeys);
    int[][] counts = toMatrix(firstKeys, secondKeys);
    return ArrayMath.toString(counts, firstKeys.toArray(), secondKeys.toArray(), cellSize, cellSize, new DecimalFormat(), true);
  }

  /**
   * Given an ordering of the first (row) and second (column) keys, will produce a double matrix.
   *
   */
  public int[][] toMatrix(List firstKeys, List secondKeys) {
    int[][] counts = new int[firstKeys.size()][secondKeys.size()];
    for (int i = 0; i < firstKeys.size(); i++) {
      for (int j = 0; j < secondKeys.size(); j++) {
        counts[i][j] = getCount(firstKeys.get(i), secondKeys.get(j));
      }
    }
    return counts;
  }

  @SuppressWarnings({"unchecked"})
  public String toCSVString(NumberFormat nf) {
    List firstKeys = new ArrayList<>(firstKeySet());
    List secondKeys = new ArrayList<>(secondKeySet());
    Collections.sort((List)firstKeys);
    Collections.sort((List)secondKeys);
    StringBuilder b = new StringBuilder();
    String[] headerRow = new String[secondKeys.size() + 1];
    headerRow[0] = "";
    for (int j = 0; j < secondKeys.size(); j++) {
      headerRow[j + 1] = secondKeys.get(j).toString();
    }
    b.append(StringUtils.toCSVString(headerRow)).append("\n");
    for (K1 rowLabel : firstKeys) {
      String[] row = new String[secondKeys.size() + 1];
      row[0] = rowLabel.toString();
      for (int j = 0; j < secondKeys.size(); j++) {
        K2 colLabel = secondKeys.get(j);
        row[j + 1] = nf.format(getCount(rowLabel, colLabel));
      }
      b.append(StringUtils.toCSVString(row)).append("\n");
    }
    return b.toString();
  }

  public static , CK2 extends Comparable> String toCSVString(
          TwoDimensionalIntCounter counter,
          NumberFormat nf, Comparator key1Comparator, Comparator key2Comparator) {
    List firstKeys = new ArrayList<>(counter.firstKeySet());
    List secondKeys = new ArrayList<>(counter.secondKeySet());
    Collections.sort(firstKeys, key1Comparator);
    Collections.sort(secondKeys, key2Comparator);
    StringBuilder b = new StringBuilder();
    int secondKeysSize = secondKeys.size();
    String[] headerRow = new String[secondKeysSize + 1];
    headerRow[0] = "";

    for (int j = 0; j < secondKeysSize; j++) {
      headerRow[j + 1] = secondKeys.get(j).toString();
    }
    b.append(StringUtils.toCSVString(headerRow)).append('\n');
    for (CK1 rowLabel : firstKeys) {
      String[] row = new String[secondKeysSize + 1];
      row[0] = rowLabel.toString();
      for (int j = 0; j < secondKeysSize; j++) {
        CK2 colLabel = secondKeys.get(j);
        row[j + 1] = nf.format(counter.getCount(rowLabel, colLabel));
      }
      b.append(StringUtils.toCSVString(row)).append('\n');
    }
    return b.toString();
  }

  public Set secondKeySet() {
    Set result = Generics.newHashSet();
    for (K1 k1 : firstKeySet()) {
      for (K2 k2 : getCounter(k1).keySet()) {
        result.add(k2);
      }
    }
    return result;
  }

  public boolean isEmpty() {
    return map.isEmpty();
  }

  public IntCounter> flatten() {
    IntCounter> result = new IntCounter<>();
    result.setDefaultReturnValue(defaultValue);
    for (K1 key1 : firstKeySet()) {
      IntCounter inner = getCounter(key1);
      for (K2 key2 : inner.keySet()) {
        result.setCount(new Pair<>(key1, key2), inner.getIntCount(key2));
      }
    }
    return result;
  }

  public void addAll(TwoDimensionalIntCounter c) {
    for (K1 key : c.firstKeySet()) {
      IntCounter inner = c.getCounter(key);
      IntCounter myInner = getCounter(key);
      Counters.addInPlace(myInner, inner);
      total += inner.totalIntCount();
    }
  }

  public void addAll(K1 key, IntCounter c) {
    IntCounter myInner = getCounter(key);
    Counters.addInPlace(myInner, c);
    total += c.totalIntCount();
  }

  public void subtractAll(K1 key, IntCounter c) {
    IntCounter myInner = getCounter(key);
    Counters.subtractInPlace(myInner, c);
    total -= c.totalIntCount();
  }



  public void subtractAll(TwoDimensionalIntCounter c, boolean removeKeys) {
    for (K1 key : c.firstKeySet()) {
      IntCounter inner = c.getCounter(key);
      IntCounter myInner = getCounter(key);
      Counters.subtractInPlace(myInner, inner);
      if (removeKeys) {
        Counters.retainNonZeros(myInner);
      }
      total -= inner.totalIntCount();
    }
  }

  public void removeZeroCounts() {
    Set firstKeySet = Generics.newHashSet(firstKeySet());
    for (K1 k1 : firstKeySet) {
      IntCounter c = getCounter(k1);
      Counters.retainNonZeros(c);
      if (c.isEmpty()) {
        map.remove(k1); // it's empty, get rid of it!
      }
    }
  }

  public void remove(K1 key) {
    IntCounter counter = map.get(key);
    if (counter != null) { total -= counter.totalIntCount(); }
    map.remove(key);
  }

  public void clean() {
    for (K1 key1 : Generics.newHashSet(map.keySet())) {
      IntCounter c = map.get(key1);
      for (K2 key2 : Generics.newHashSet(c.keySet())) {
        if (c.getIntCount(key2) == 0) {
          c.remove(key2);
        }
      }
      if (c.keySet().isEmpty()) {
        map.remove(key1);
      }
    }
  }

  public MapFactory> getOuterMapFactory() {
    return outerMF;
  }

  public MapFactory getInnerMapFactory() {
    return innerMF;
  }

  public TwoDimensionalIntCounter() {
    this(MapFactory.>hashMapFactory(), MapFactory.hashMapFactory());
  }

  public TwoDimensionalIntCounter(int initialCapacity) {
    this(MapFactory.>hashMapFactory(), MapFactory.hashMapFactory(), initialCapacity);
  }

  public TwoDimensionalIntCounter(MapFactory> outerFactory, MapFactory innerFactory) {
    this(outerFactory, innerFactory, 100);
  }

  public TwoDimensionalIntCounter(MapFactory> outerFactory, MapFactory innerFactory, int initialCapacity) {
    innerMF = innerFactory;
    outerMF = outerFactory;
    map = outerFactory.newMap(initialCapacity);
    total = 0;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy