is2.data.MFC Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of anna Show documentation
Show all versions of anna Show documentation
Tools for Natural Language Analysis, Generation and Machine Learning
The newest version!
package is2.data;
import is2.util.DB;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map.Entry;
/**
* Map Features, do not map long to integer
*
* @author Bernd Bohnet, 17.09.2011
*/
final public class MFC implements IEncoderPlus {
/** The features and its values */
private final HashMap> m_featureSets = new HashMap>();
/** The feature class and the number of values */
private final HashMap m_featureCounters = new HashMap();
/** The number of bits needed to encode a feature */
final HashMap m_featureBits = new HashMap();
/** Integer counter for long2int */
private int count=0;
public MFC () {}
public int size() {return count;}
/**
* Register an attribute class, if it not exists and add a possible value
* @param type
* @param type2
*/
final public int register(String a, String v) {
synchronized(m_featureCounters) {
HashMap fs = getFeatureSet().get(a);
if (fs==null) {
fs = new HashMap();
getFeatureSet().put(a, fs);
fs.put(NONE, 0);
getFeatureCounter().put(a, 1);
}
Integer i = fs.get(v);
if (i==null) {
Integer c = getFeatureCounter().get(a);
fs.put(v, c);
c++;
getFeatureCounter().put(a,c);
return c-1;
} else return i;
}
}
/**
* Calculates the number of bits needed to encode a feature
*/
public void calculateBits() {
int total=0;
for(Entry e : getFeatureCounter().entrySet() ){
int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
m_featureBits.put(e.getKey(), bits);
total+=bits;
// System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
}
// System.out.println("total number of needed bits "+total);
}
public String toString() {
StringBuffer content = new StringBuffer();
for(Entry e : getFeatureCounter().entrySet() ){
content.append(e.getKey()+" "+e.getValue());
content.append(':');
// HashMap vs = getFeatureSet().get(e.getKey());
content.append(getFeatureBits(e.getKey()));
/*if (vs.size()<120)
for(Entry e2 : vs.entrySet()) {
content.append(e2.getKey()+" ("+e2.getValue()+") ");
}*/
content.append('\n');
}
return content.toString();
}
final public short getFeatureBits(String a) {
if(m_featureBits.get(a)==null) return 0;
return (short)m_featureBits.get(a).intValue();
}
/**
* Get the integer place holder of the string value v of the type a
*
* @param t the type
* @param v the value
* @return the integer place holder of v
*/
final public int getValue(String t, String v) {
if (m_featureSets.get(t)==null) return -1;
Integer vi = m_featureSets.get(t).get(v);
if (vi==null) return -1; //stop &&
return vi.intValue();
}
/**
* Static version of getValue
* @see getValue
*/
final public int getValueS(String a, String v) {
if (m_featureSets.get(a)==null) return -1;
Integer vi = m_featureSets.get(a).get(v);
if (vi==null) return -1; //stop &&
return vi.intValue();
}
public int hasValue(String a, String v) {
Integer vi = m_featureSets.get(a).get(v);
if (vi==null) return -1;
return vi.intValue();
}
public static String printBits(int k) {
StringBuffer s = new StringBuffer();
for(int i =0;i<31;i++) {
s.append((k&0x00000001)==1?'1':'0');
k=k>>1;
}
s.reverse();
return s.toString();
}
/**
* Maps a long to a integer value. This is very useful to save memory for sparse data long values
* @param l
* @return the integer
*/
static public int misses = 0;
static public int good = 0;
/**
* Write the data
* @param dos
* @throws IOException
*/
public void writeData(DataOutputStream dos) throws IOException {
dos.writeInt(getFeatureSet().size());
// DB.println("write"+getFeatureSet().size());
for(Entry> e : getFeatureSet().entrySet()) {
dos.writeUTF(e.getKey());
dos.writeInt(e.getValue().size());
for(Entry e2 : e.getValue().entrySet()) {
if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
dos.writeUTF(e2.getKey());
dos.writeInt(e2.getValue());
}
}
}
public void read(DataInputStream din) throws IOException {
int size = din.readInt();
for(int i=0; i h = new HashMap();
getFeatureSet().put(k,h);
for(int j = 0;j getFeatureCounter() {
return m_featureCounters;
}
public HashMap> getFeatureSet() {
return m_featureSets;
}
public String[] reverse(HashMap v){
String[] set = new String[v.size()];
for(Entry e : v.entrySet()) {
set[e.getValue()]=e.getKey();
}
return set;
}
}