jmaxent.Dictionary Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of heideltime Show documentation
Show all versions of heideltime Show documentation
HeidelTime is a multilingual cross-domain temporal tagger that extracts temporal expressions from documents and normalizes them according to the TIMEX3 annotation standard.
/*
Copyright (C) 2010 by
*
* Cam-Tu Nguyen
* [email protected] or [email protected]
*
* Xuan-Hieu Phan
* [email protected]
*
* College of Technology, Vietnamese University, Hanoi
* Graduate School of Information Sciences, Tohoku University
*
* JVnTextPro-v.2.0 is a free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published
* by the Free Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* JVnTextPro-v.2.0 is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with JVnTextPro-v.2.0); if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
*/
package jmaxent;
import java.io.*;
import java.util.*;
// TODO: Auto-generated Javadoc
/**
* The Class Dictionary.
*/
public class Dictionary {
/** The dict. */
public Map dict = null;
/** The option. */
public Option option = null; // reference to option object
/** The data. */
public Data data = null; // reference to data object
/**
* Instantiates a new dictionary.
*/
public Dictionary() {
dict = new HashMap();
}
/**
* Instantiates a new dictionary.
*
* @param option the option
* @param data the data
*/
public Dictionary(Option option, Data data) {
this.option = option;
this.data = data;
dict = new HashMap();
}
// read dictionary from model file
/**
* Read dict.
*
* @param fin the fin
* @throws IOException Signals that an I/O exception has occurred.
*/
public void readDict(BufferedReader fin) throws IOException {
dict.clear();
String line;
// get dictionary size
if ((line = fin.readLine()) == null) {
System.out.println("No dictionary size information");
return;
}
int dictSize = Integer.parseInt(line);
if (dictSize <= 0) {
System.out.println("Invalid dictionary size");
}
System.out.println("Reading dictionary ...");
// main loop for reading dictionary content
for (int i = 0; i < dictSize; i++) {
line = fin.readLine();
if (line == null) {
System.out.println("Invalid dictionary line");
return;
}
StringTokenizer strTok = new StringTokenizer(line, " \t\r\n");
int len = strTok.countTokens();
if (len < 2) {
// invalid line
continue;
}
StringTokenizer cpTok = new StringTokenizer(strTok.nextToken(), ":");
int cp = Integer.parseInt(cpTok.nextToken());
int cpCount = Integer.parseInt(cpTok.nextToken());
// create a new element
Element elem = new Element();
elem.count = cpCount;
elem.chosen = 1;
while (strTok.hasMoreTokens()) {
StringTokenizer lbTok = new StringTokenizer(strTok.nextToken(), ":");
int label = Integer.parseInt(lbTok.nextToken());
int count = Integer.parseInt(lbTok.nextToken());
int fidx = Integer.parseInt(lbTok.nextToken());
CountFIdx cntFIdx = new CountFIdx(count, fidx);
elem.lbCntFidxes.put(new Integer(label), cntFIdx);
}
// insert the element to the dictionary
dict.put(new Integer(cp), elem);
}
System.out.println("Reading dictionary (" + Integer.toString(dict.size()) +
" entries) completed!");
// read the line ###...
line = fin.readLine();
}
// write dictionary to model file
/**
* Write dict.
*
* @param fout the fout
* @throws IOException Signals that an I/O exception has occurred.
*/
public void writeDict(PrintWriter fout) throws IOException {
Iterator it = null;
int count = 0;
for (it = dict.keySet().iterator(); it.hasNext(); ) {
Integer cpInt = (Integer)it.next();
Element elem = (Element)dict.get(cpInt);
if (elem.chosen == 1) {
count++;
}
}
// write the dictionary size
fout.println(Integer.toString(count));
for (it = dict.keySet().iterator(); it.hasNext(); ) {
Integer cpInt = (Integer)it.next();
Element elem = (Element)dict.get(cpInt);
if (elem.chosen == 0) {
continue;
}
// write the context predicate and its count
fout.print(cpInt.toString() + ":" + Integer.toString(elem.count));
for (Iterator lbIt = elem.lbCntFidxes.keySet().iterator(); lbIt.hasNext(); ) {
Integer labelInt = (Integer)lbIt.next();
CountFIdx cntFIdx = (CountFIdx)elem.lbCntFidxes.get(labelInt);
if (cntFIdx.fidx < 0) {
continue;
}
fout.print(" " + labelInt.toString() + ":" +
Integer.toString(cntFIdx.count) + ":" +
Integer.toString(cntFIdx.fidx));
}
fout.println();
}
// write the line ###...
fout.println(Option.modelSeparator);
}
// add a context predicate (and the label it supports) to dictionary
/**
* Adds the dict.
*
* @param cp the cp
* @param label the label
* @param count the count
*/
public void addDict(int cp, int label, int count) {
Element elem = (Element)dict.get(new Integer(cp));
if (elem == null) {
// if the context predicate is not found
elem = new Element();
elem.count = count;
CountFIdx cntFIdx = new CountFIdx(count, -1);
elem.lbCntFidxes.put(new Integer(label), cntFIdx);
// insert the new element to the dict
dict.put(new Integer(cp), elem);
} else {
// update the total count
elem.count += count;
CountFIdx cntFIdx = (CountFIdx)elem.lbCntFidxes.get(new Integer(label));
if (cntFIdx == null) {
// the label not found
cntFIdx = new CountFIdx(count, -1);
elem.lbCntFidxes.put(new Integer(label), cntFIdx);
} else {
// if label found, update the count only
cntFIdx.count += count;
}
}
}
// generating dictionary from training data
/**
* Generate dict.
*/
public void generateDict() {
if (data.trnData == null) {
System.out.println("No data available for generating dictionary");
return;
}
// scan all data observations of the training data
for (int i = 0; i < data.trnData.size(); i++) {
Observation obsr = (Observation)data.trnData.get(i);
for (int j = 0; j < obsr.cps.length; j++) {
addDict(obsr.cps[j], obsr.humanLabel, 1);
}
}
}
/**
* Size.
*
* @return the int
*/
public int size() {
if (dict == null) {
return 0;
} else {
return dict.size();
}
}
} // end of class Dictionary
© 2015 - 2024 Weber Informatics LLC | Privacy Policy