All Downloads are FREE. Search and download functionalities are using the official Maven repository.

relations.EventExtraction Maven / Gradle / Ivy

/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package relations;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import utils.BioSemException;
import utils.DBUtils;

import java.io.File;
import java.io.FileWriter;
import java.util.*;

/**
 * 
 * @author Chinh Date: 22 - 08 - 2011 Extracting event from sentence
 */
public class EventExtraction {

	private static final Logger log = LoggerFactory.getLogger(EventExtraction.class);
	
	SenAnalyzer analyzer; // use default database
	SenSimplifier simp;
	ChunkAnalyzer op = new ChunkAnalyzer();
	DBUtils db_sr, db;
	Map rules;
	Map dic;
	int noun_len = 12;
	int verb_len = 8;
	List usedPro = new ArrayList();
	BasicChunk cur_sub = null;
	boolean check_pro = false; // whether protein can be reused (for simple and
								// binding only)
	boolean same_role = false;
	Chunk curr_verb = null;
	Set sameRole = new HashSet();
	FileWriter writer = null;
	int curr_verb_type = 0;
	String curr_pmid;
	int curr_senID;
	double ecause = 0.1;
	double escore = 0.1;
	double pscore = 0.3;
	double upperValue = 0.85;
	int etype = 0;
	boolean debug = false;
	Map rset;

	/**
	 * Extract event from the given data
	 * 
	 * @param db
	 */
	public EventExtraction(DBUtils sr, DBUtils db) {
		this.db = db;
		db_sr = sr;
		analyzer = new SenAnalyzer(sr, db);
		simp = analyzer.simp;

	}

	public void init() {
		// if dic is null, we haven't initialized yet
		if (null == dic) {
			analyzer.init();
			dic = simp.sharedDic;
			Map[] rls = simp.loadPatterns(db_sr);
			rset = groupRules(rls);
		}
	}

	private boolean hasIts(int pos, String tokens[]) {
		if (pos > 0) {
			if (tokens[pos - 1].equals("its")) {
				return true;
			}
		}
		return false;
	}

	/**
	 * Evaluating candidate triggers whether they can form events
	 * 
	 * @param bs
	 *            : BasicChunk contains trgs
	 * @param preps
	 *            : preposition list
	 * @param mods
	 *            : modifier words
	 * @param tokens
	 *            : list of words/tokens
	 * @param Trgs
	 *            : detected TG (used to determine trg type)
	 */
	public void evaluateTrg(BasicChunk bs, List preps, List mods,
			String[] tokens, List Trgs) {
		bs.mergeNP();
		if (bs.trgList.isEmpty()) {
			return;
		}
		int i = 0;
		Word tg;
		Set checked = new HashSet();
		RuleSets rule;
		KeyData kdt;
		int idx;
		String key;
		double score = 0.15;
		boolean has_its;
		while (i < bs.trgList.size()) { // check in-chunk: PRO - TG
			tg = bs.trgList.get(i);
			has_its = hasIts(tg.pos, tokens);
			kdt = dic.get(tg.word);
			if (kdt.keytype > 1) {
				if (kdt.score < score) {
					bs.removeTrg(tg);
					continue;
				}
				kdt = kdt.getDefault();
			}

			if (kdt != null && kdt.type != null) {
				idx = SenSimplifier.hashType.get(kdt.type);
			} else {
				System.out.println("--> BUG: " + tg.word + " --> no type");
				bs.removeTrg(tg);
				continue;
			}
			key = tg.word + idx + tg.pos_tag + "NP";
			if (bs.inChunkTrg(tg, tokens)
					|| (has_its && tg.pos_tag.equals("NN"))) {
				rule = rset.get(key);
				if (rule == null) {
					bs.removeTrg(tg);
					continue;
				}
				boolean found = false;
				if (rule != null
						&& (rule.inChunk() || rule.getFrontScore() > 0.01)
						&& !rule.isSkipped(idx)) {
					found = true;
				}
				if (!found) { // no in-chunk pattern, remove this trigger
					bs.removeTrg(tg);
					continue;
				} else {
					boolean has_type = false;
					kdt = dic.get(tg.word);
					if (kdt.keytype == 1 && (!kdt.required || has_its)) {
						tg.type = kdt.type;
						has_type = true;
						tg.keytype = 1;
					} else if (kdt.keytype > 1) {
						List mod = getMod(bs, tg, mods);
						String evt_type = kdt.getType(mod);
						if (evt_type != null) {
							tg.type = evt_type;
							tg.keytype = kdt.keytype == 2 ? 2 : 1;
							has_type = true;
						}
					} else if (kdt.keytype == 1 && kdt.required) {
						Word wd = findTrg(tg.pos, Trgs);
						if (wd != null && kdt.parent != null) {
							if (kdt.parent.contains(wd.word)) {
								has_type = true;
								tg.type = kdt.type;
								tg.keytype = 1;
							}
						} else if (cur_sub == null && curr_verb_type == 1
								&& kdt.parent != null) {
							if (curr_verb.trigs.size() > 0) {
								wd = curr_verb.trigs.get(0);
								if (kdt.parent.contains(wd.word)) {
									has_type = true;
									tg.type = kdt.type;
									tg.keytype = 1;
								}
							}
						}
					}
					if (!has_type) {
						bs.removeTrg(tg);
						continue;
					} else {
						tg.inchunk = true;
						checked.add(tg);
					}
				}
			}
			i++;
		}
		i = 0;
		while (i < bs.trgList.size()) { // check TG-prep-PRO/EVT
			tg = bs.trgList.get(i);
			if (checked.contains(tg)) {
				i++;
				continue;
			}
			kdt = dic.get(tg.word);
			if (kdt.keytype > 1) {
				if (kdt.score < score) {
					bs.removeTrg(tg);
					continue;
				}
				kdt = kdt.getDefault();
			}
			idx = SenSimplifier.hashType.get(kdt.type);
			key = tg.word + idx + tg.pos_tag + "NP";
			rule = rset.get(key);
			if (rule == null || rule.isSkipped(idx)) {
				bs.removeTrg(tg);
				continue;
			}
			String prep;
			List pros = null;
			if (tg.pos_tag.startsWith("NN")) { // find
				if (i + 1 < bs.trgList.size()) {
					Word tg2 = bs.trgList.get(i + 1);
					if (bs.isSameRole(tg, tg2, tokens)) {
						int tgpos1 = bs.getChunkPos(tg.pos);
						int tgpos2 = bs.getChunkPos(tg2.pos);
						if (tgpos2 - tgpos1 == 2) {
							Chunk c = bs.chunkList.get(tgpos1);
							c.merge(bs.chunkList.get(tgpos1 + 1));
							c.merge(bs.chunkList.get(tgpos2));
							bs.chunkList.remove(tgpos1 + 1);
							bs.chunkList.remove(tgpos1 + 1);
						}
					}
				}

			}
			prep = getPrep(tg, bs, rule.prep);
			if (tg.pos_tag.startsWith("NN") && !prep.isEmpty()) {
				pros = findPro(bs, tg, prep, rule.dist1);
			} else if (!tg.pos_tag.startsWith("NN")) {
				pros = findPro(bs, tg, "", rule.dist1);
			}
			if (idx == 5 && pros != null && pros.isEmpty()
					&& rule.getFrontScore() > 0.15) {
				pros = findFrontPro(bs, tg, 5);
			}
			// now determine trg type
			kdt = dic.get(tg.word);
			if (kdt.keytype > 1 || kdt.required) { // mix/share or tg requires
													// modifiers
				boolean has_type = false;
				if (pros != null && pros.size() > 0) {
					// now determine trg type for share/mix case
					List mod = getMod(bs, tg, pros, mods);
					String evt_type = kdt.getType(mod);
					if (evt_type != null) {
						tg.type = evt_type;
						tg.keytype = kdt.keytype == 2 ? 2 : 1;
						has_type = true;
					} else if (kdt.keytype == 1 && kdt.required) {
						Word wd = findTrg(tg.pos, Trgs);
						if (wd != null && kdt.parent != null) {
							if (kdt.parent.contains(wd.word)) {
								has_type = true;
								tg.type = kdt.type;
								tg.keytype = 1;
							}
						}
					}
				} else if (idx > 5) { // for regulatory event
					if (i < bs.trgList.size() - 1) {
						Word tg2 = bs.trgList.get(i + 1);
						int pos2 = bs.getChunkPos(tg2.pos);
						int pos1 = bs.getChunkPos(tg.pos);
						kdt = kdt.getDefault();
						if (kdt.child.contains(tg2.word)
								|| (pos2 - pos1) <= rule.dist1 || true) {
							tg.type = kdt.type;
							tg.keytype = 1;
							has_type = true;
						}
					}
				}
				if (!has_type) {
					bs.removeTrg(tg);
					continue;
				}
			} else { // normal trigger
				boolean has_type = false;
				if (pros != null && pros.size() > 0) {
					tg.type = kdt.type;
					tg.keytype = 1;
					has_type = true;
				} else if (idx > 5) {
					if (i < bs.trgList.size() - 1) {
						Word tg2 = bs.trgList.get(i + 1);
						int pos2 = bs.getChunkPos(tg2.pos);
						int pos1 = bs.getChunkPos(tg.pos);
						if (kdt.child.contains(tg2.word)
								|| (pos2 - pos1) <= rule.dist1 || true) {
							tg.type = kdt.type;
							tg.keytype = 1;
							has_type = true;
						}
					}
				}
				if (!has_type) {
					bs.removeTrg(tg);
					continue;
				}
			}
			i++;
		}
	}

	private Word findAnaphora(BasicChunk bs, int dist, Word tg) {
		if (bs.proList.size() == 1) {
			Word pr = bs.proList.get(0);
			if (pr.pos < tg.pos) {
				if (bs.getChunkPos(tg.pos) - bs.getChunkPos(pr.pos) <= dist) {
					return pr;
				}
			}
		} else if (cur_sub != null && cur_sub.proList.size() == 1) {
			Word pr = cur_sub.proList.get(0);
			if (cur_sub.getChunkPos(pr.pos) < dist) {
				return pr;
			}
		}
		return null;
	}

	public Map groupRules(Map[] map) {
		Map ls = new HashMap();
		Map rule;
		Rules r;
		RuleSets rs = new RuleSets();
		List list;
		for (int i = 0; i < SenSimplifier.trigger_type.length; i++) {
			rule = map[i];
			for (String s : rule.keySet()) {
				if (!dic.containsKey(s)) {
					continue;
				}
				r = rule.get(s);
				r.initMap();
				for (String key : r.data.keySet()) {
					list = r.data.get(key);
					RuleSets rlset = rs.createRule(list);
					if (rlset.total >= 1) {
						ls.put(s + i + key, rlset); // trg + type + pos +
													// chunk_type
					}
				}
			}
		}
		return ls;
	}

	private Word findTrg(int pos, List ls) {
		if (ls == null || ls.isEmpty()) {
			return null;
		}
		int i = ls.size() - 1;
		Word w;
		while (i >= 0) {
			w = ls.get(i);
			if (w.pos < pos && Math.abs(w.pos - pos) <= 10) {
				return w;
			}
			i--;
		}
		return null;
	}

	/**
	 * Find a trg within a given range
	 * 
	 * @param start
	 *            : start position
	 * @param ls
	 *            : list of tg;
	 * @param end
	 *            : end position
	 * @return
	 */
	private Word findTrg(int start, int end, BasicChunk bs) {
		if (bs.trgList == null || bs.trgList.isEmpty()) {
			return null;
		}
		int pos;
		for (Word w : bs.trgList) {
			pos = bs.getChunkPos(w.pos);
			if (pos > start && pos < end) {
				return w;
			} else if (pos > end) {
				return null;
			}
		}
		return null;
	}

	/**
	 * Extracting simple event from BasicChunk with a given trigger (+type)
	 * 
	 * @param tg
	 * @param bc
	 * @param preps
	 * @param mods
	 * @return
	 */
	public List extractSimpleNP(Word tg, BasicChunk bc, List preps) {
		List list = new ArrayList();
		String ev_type = tg.type;
		// There are 3 cases: in-chunk, NN with prep, and JJ/VB with/out prep
		// Check in-chunk first
		int idx = SenSimplifier.hashType.get(tg.type);
		String key = tg.word + idx + tg.pos_tag + "NP";
		RuleSets rule = rset.get(key);
		if (rule == null) {
			return list;
		}
		List pros = null;
		if ((rule.inChunk() || rule.getFrontScore() > 0.01) && tg.inchunk) { // in
																				// chunk
			// find pros in the same chunk with trg
			pros = getInChunkPro(bc, tg); // 11.10.2011-> Checked!
			if (pros.isEmpty() && hasIts(tg.pos, tokens)
					&& rule.getFrontScore() > 0.01) {
				Word pr = findAnaphora(bc, 5, tg);
				if (pr != null) {
					pros = new ArrayList();
					pros.add(pr);
				}
			}
		}
		if (pros == null || pros.isEmpty()) { // pro follows trg
			if (tg.pos_tag.startsWith("NN")) { // find
				String prep = getPrep(tg, bc, rule.prep);
				if (!prep.isEmpty()) {
					pros = findPro(bc, tg, prep, rule.dist1);
				} else {
					pros = findPro(bc, tg, "", 3);
				}
			} else {
				pros = findPro(bc, tg, "", rule.dist1);
				if (pros.isEmpty() && rule.getFrontScore() > 0.2) { // 13.12.2011
					pros = findFrontPro(bc, tg, rule.fdist);
				}
			}
		}
		if (pros != null && pros.size() > 0) {
			Word tg2 = null;
			if (tg.keytype == 20) { // SKIP <--- shared trigger, clone one trg
									// for Genexpression
				tg2 = tg; // keep the original for Positive_regulation
				tg = new Word(tg.word, tg.pos, tg.loc);
				tg.locs = tg2.locs;
				tg.type = "Gene_expression";
				tg.keytype = 1;
				tg2.type = "Positive_regulation"; // change type into Pos_Reg
				tg2.keytype = 1;
			}
			tg.TID = getTrgID(); // generate TrgID;
			for (Word w : pros) {
				PData p = new PData(tg, w, ev_type);
				p.PID = getEventID(); // Event ID
				extractedSet.add(p);
				list.add(p);
				bc.usedPro.add(w);
			}
			if (tg2 != null) {
				extractedMap.put(tg, list); // put Gene_expression into
											// extractedMap
				key = tg2.word + 6 + tg2.pos_tag + "NP";
				rule = rset.get(key);
				List list1 = new ArrayList();
				tg2.TID = getTrgID();
				tg2.used = true;
				boolean form_event = true;
				if (tg2.inchunk) {
					if (rule.getEvtScore(0) < 0.5) {
						form_event = false;
					}
				} else {
					if (rule.getEvtScore(2) < 0.5) {
						form_event = false;
					}
				}
				if (form_event) {
					for (PData pdt : list) {
						PData p = new PData(tg2, pdt, tg2.type);
						p.PID = getEventID();
						extractedSet.add(p);
						list1.add(p);
					}
				} else {
					for (Word w : pros) {
						PData p = new PData(tg2, w, tg2.type);
						p.PID = getEventID(); // Event ID
						extractedSet.add(p);
						list1.add(p);
						bc.usedPro.add(w);
					}
				}
				return list1;
			}

		}
		return list;

	}

	public List extractRegNP(Word tg, BasicChunk bc, List preps) {
		List list = new ArrayList();
		List pro1 = null, pro2 = null;
		KeyData kdt = dic.get(tg.word).getKeyData(tg.type);
		int idx = SenSimplifier.hashType.get(tg.type);
		if ((kdt.score < 0.15 && idx >= 7) || (kdt.score < 0.2 && idx == 6)) {
			// return list;
		}
		Set childSet = kdt.child;
		Set parentSet = kdt.parent;
		String key = tg.word + idx + tg.pos_tag + "NP";
		RuleSets rule = rset.get(key);
		String prep1;
		if (rule == null) {
			return list;
		}
		int len = Math.min(rule.dist1, 0);
		if ((rule.inChunk() || rule.getFrontScore() > 0.01) && tg.inchunk) {
			prep1 = getPrep(tg, bc, rule.prep);
			if (rule.getInchunkCause() > 0.05) {
				// find (theme)
				if ((pro1 == null || pro1.isEmpty())
						&& rule.getInChunkEvtScore() < upperValue) { // theme as
																		// pro
					if (tg.pos_tag.startsWith("NN")) {
						pro1 = findPro(bc, tg, prep1, rule.dist1);
					} else {
						pro1 = findRegPro(bc, tg, bc.getChunkPos(tg.pos),
								rule.dist1);
					}
				}
				if ((pro1 == null || pro1.isEmpty())
						&& rule.getInChunkEvtScore() > escore) {// PRO-TG - EVT
					if (tg.pos_tag.startsWith("NN")) { // NNx -> requires prep
						if (curr_tg < bc.trgList.size() - 1) {
							Word tg2 = bc.trgList.get(curr_tg + 1);
							prep1 = getPrep(tg, bc, rule.prep);
							if (prep1 != null
									&& !prep1.isEmpty()
									&& ((childSet.contains(tg2.word) || Math
											.abs(bc.getChunkPos(tg2.pos)
													- bc.getChunkPos(tg.pos)) <= len))) {
								curr_tg++;
								List temp = extractNP(curr_tg, bc, preps);
								if (temp != null && !temp.isEmpty()) {
									pro1 = temp;
								}
							}
						}
					} else if (curr_tg < bc.trgList.size() - 1) { // VB/JJ do
																	// not
																	// require
																	// prep
						Word tg2 = bc.trgList.get(curr_tg + 1);
						if (childSet.contains(tg2.word)) {
							curr_tg++;
							List temp = extractNP(curr_tg, bc, preps);
							if (temp != null && !temp.isEmpty()) {
								pro1 = temp;
							}
						}
					}
				}
			}
			List pcause = findCausePro(bc, tg);
			if (pcause.size() > 0) {
				pro1 = getInChunkPro(bc, tg);
				pro2 = pcause;
			}
			if ((pro1 == null || pro1.isEmpty())
					&& rule.count[0] * 1f / (rule.count[0] + rule.count[1]) > escore) { // PRO-TG
																						// form
				pro1 = getInChunkPro(bc, tg);
				if (pro1 != null && pro1.size() > 0) {
					bc.usedPro.addAll(pro1);
				}
			} else if (pro1 != null && pro1.size() > 0 && pro2 == null) { // PRO-TG
																			// theme
																			// (PRO/EVT)
				pro2 = getInChunkPro(bc, tg); // cause , assume all causes are
												// pros
				if (pro2 != null && pro2.size() > 0) {
					bc.usedPro.addAll(pro2);
				}
				if (rule.causePrep.contains(prep1)) {
					List temp = pro1;
					pro1 = pro2;
					pro2 = temp;
				}
			}
			if (pro1 != null && pro1.isEmpty() && hasIts(tg.pos, tokens)
					&& rule.getFrontScore() > 0.01) {
				Word pr = findAnaphora(bc, 5, tg);
				if (pr != null) {
					pro1 = new ArrayList();
					pro1.add(pr);
				}
			}
		} else { // TG - PRO
			// First attempt; find event
			prep1 = getPrep(tg, bc, rule.prep);
			if (tg.pos_tag.startsWith("NN")) {
				if (!prep1.isEmpty()) {
					if (rule.prepPair.get(prep1) != null) {
						int pairs[] = getPrepPairs(tg.pos, prep1, bc,
								rule.prepPair.get(prep1),
								Math.max(rule.dist1, rule.dist2));
						if (pairs != null) {
							boolean order = false;
							String txt = bc.chunkList.get(pairs[0]).txt
									+ bc.chunkList.get(pairs[1]).txt;
							if (rule.order.contains(txt)) {
								order = true;
							}
							if (order) { // TG - cause - theme
								// find pro/event after prep2
								Word tg2 = findTrg(pairs[0], pairs[1], bc); // check
																			// whether
																			// any
																			// trg
																			// between
																			// prep1
																			// and
																			// prep2
								if (tg2 != null && parentSet.contains(tg2.word)) {
									curr_tg++;
									pro2 = extractNP(curr_tg, bc, preps);
								}
								if ((pro2 == null || pro2.isEmpty())
										&& rule.getEvtCause(3) < upperValue
										&& rule.getProCause(3) > pscore) {
									pro2 = findPro(bc, pairs[0], pairs[1]
											- pairs[0] - 1);
								}
								if (rule.getEvtScore(3) < upperValue
										&& rule.getProScore(3) > pscore) {// try
																			// to
																			// find
																			// pro
																			// first,
																			// if
																			// no
																			// pro
																			// found,then
																			// find
																			// event
									pro1 = findRegPro(bc, tg, pairs[1],
											rule.dist2);
								}
								if (pro1 == null || pro1.isEmpty()) { // find
																		// event
									if (curr_tg < bc.trgList.size() - 1) {
										tg2 = bc.trgList.get(curr_tg + 1);
										if (childSet.contains(tg2.word)) {
											curr_tg++;
											List temp = extractNP(curr_tg, bc,
													preps);
											if (temp != null && !temp.isEmpty()) {
												pro1 = temp;
											}
										}
									}
								}
								if (pro1 == null || pro1.isEmpty()) {
									return list;
								}
							} else { // TG - theme - cause
								// find pro/event after prep1
								Word tg2 = findTrg(pairs[0], pairs[1], bc); // check
																			// whether
																			// any
																			// trg
																			// between
																			// prep1
																			// and
																			// prep2
								if (tg2 != null && childSet.contains(tg2.word)) {
									curr_tg++;
									pro1 = extractNP(curr_tg, bc, preps);
								}
								if ((pro1 == null || pro1.isEmpty())
										&& rule.getEvtScore(3) < upperValue
										&& rule.getProScore(3) > pscore) {
									pro1 = findPro(bc, pairs[0], pairs[1]
											- pairs[0] - 1);
								}
								// Find cause
								if (rule.getEvtCause(3) < upperValue
										&& rule.getProCause(3) > pscore) {// try
																			// to
																			// find
																			// pro
																			// first,
																			// if
																			// no
																			// pro
																			// found,then
																			// find
																			// event
									pro2 = findRegPro(bc, tg, pairs[1],
											rule.dist2);
								}
								if (pro2 == null || pro2.isEmpty()) { // find
																		// event
									if (curr_tg < bc.trgList.size() - 1) {
										tg2 = bc.trgList.get(curr_tg + 1);
										if (parentSet.contains(tg2.word)) {
											curr_tg++;
											List temp = extractNP(curr_tg, bc,
													preps);
											if (temp != null && !temp.isEmpty()) {
												pro2 = temp;
											}
										}
									}
								}
								if (pro2 != null && pro2.isEmpty()) {
									pro2 = null;
								}
							}
						}
					}
					// no prep pairs or cannot find pro2
					if (pro1 == null || pro1.isEmpty()) {
						if (rule.getEvtScore(2) < upperValue
								&& rule.getProScore(2) > pscore) { // find pro
																	// as theme
							pro1 = findPro(bc, tg, prep1, rule.dist1);
						}
						if (pro1 == null || pro1.isEmpty()) {
							if (curr_tg < bc.trgList.size() - 1) {
								Word tg2 = bc.trgList.get(curr_tg + 1);
								if (childSet.contains(tg2.word)) {
									curr_tg++;
									List temp = extractNP(curr_tg, bc, preps);
									if (temp != null && !temp.isEmpty()) {
										pro1 = temp;
									}
								}
							}
						}
					}
					List pcause = findCausePro(bc, tg);
					if (pcause.size() > 0 && pro1 != null && pro1.size() > 0) {
						pro2 = pcause;
					}
					// Check group 6: cause - tg - theme
					Word frontPrep = findFrontPrep(tg, bc);
					if (pro1 != null && pro1.size() > 0 && frontPrep != null
							&& cur_sub != null) {
						key = tg.word + idx + tg.pos_tag + "CP";
						RuleSets rule2 = rset.get(key);
						if (rule2 != null
								&& rule2.frontPrep.contains(frontPrep.word)) {
							pro2 = findFrontPro(cur_sub, tg, rule2.fdist);
						}
					}
					// Check group 5: theme - tg - cause
					if (pro1 != null && pro1.size() > 0 && pro2 == null) {
						if (rule.causePrep.contains(prep1)
								&& rule.getEvtCause(5) < upperValue
								&& rule.getProCause(5) > pscore) {
							List temp = findFrontPro(bc, tg, rule.fdist);
							if (temp.size() > 0) {
								pro2 = pro1;
								pro1 = temp;
							}
						}
					}

				} else { // has no prep
					if (rule.getBehindScore() > rule.getFrontScore()) {
						// TG - PRO
					} else {
						// PRO - TG
					}
				}
			} else { // TG is VBx or JJ
				if (rule.getBehindScore() > rule.getFrontScore()) {
					if (rule.getEvtScore(2) < upperValue
							&& rule.getProScore(3) > pscore) { // find pro as
																// theme
						pro1 = findRegPro(bc, tg, bc.getChunkPos(tg.pos),
								rule.dist1);
					}
					if (pro1 == null || pro1.isEmpty()) {
						if (curr_tg < bc.trgList.size() - 1) {
							Word tg2 = bc.trgList.get(curr_tg + 1);
							if (childSet.contains(tg2.word)) {
								curr_tg++;
								List temp = extractNP(curr_tg, bc, preps);
								if (temp != null && !temp.isEmpty()) {
									pro1 = temp;
								}
							}
						}
					}
					// Check group 6: cause - tg - theme
					Word frontPrep = findFrontPrep(tg, bc);
					if (pro1 != null && pro1.size() > 0 && frontPrep != null) {
						if (rule.frontPrep.contains(frontPrep.word)) {
							List pros2 = findFrontPro(bc, tg, rule.fdist);
							if (pros2.size() > 0) {
								pro2 = pro1;
								pro1 = pros2;
							}
						}
					}
					// Check group 5: theme - tg - cause
					if (pro1 != null && pro1.size() > 0 && pro2 == null) {
						if (rule.causePrep.contains(prep1)
								&& rule.getEvtCause(5) < upperValue) {
							List temp = findFrontPro(bc, tg, rule.fdist);
							if (temp.size() > 0) {
								pro2 = pro1;
								pro1 = temp;
							}
						}
					}
				}
				if ((pro1 == null || pro1.isEmpty())
						&& rule.getBehindScore() > 0.1) {
					if (rule.getEvtCause(5) < upperValue) {
						pro1 = findFrontPro(bc, tg, rule.fdist);
					}
					if ((pro1 == null || pro1.isEmpty())
							&& rule.getEvtCause(4) > ecause) {
						if (curr_tg >= 1) {
							Word tg2 = bc.trgList.get(curr_tg - 1);
							if (childSet.contains(tg2.word)
									&& extractedMap.containsKey(tg2)) {
								pro1 = extractedMap.get(tg2);
							}
						}
					}
				}
			}
		}
		// forming event pairs
		if (pro1 != null && pro1.size() > 0 && pro2 != null && pro2.size() > 0) {
			removeDuplicate(pro1, pro2);
		}
		if (pro1 != null && pro1.size() > 0 && pro2 != null && pro2.size() > 0) {

			tg.TID = getTrgID();
			for (Object obj1 : pro1) {
				for (Object obj2 : pro2) {
					PData pair = new PData(tg, obj1, obj2, tg.type);
					pair.PID = getEventID();
					extractedSet.add(pair);
					list.add(pair);
				}
			}
			return list;
		} else if (pro1 != null && pro1.size() > 0) {
			tg.TID = getTrgID();
			for (Object obj1 : pro1) {
				PData pair = new PData(tg, obj1, tg.type);
				pair.PID = getEventID();
				extractedSet.add(pair);
				list.add(pair);
			}
			return list;
		}
		return list;
	}

	public List extractRegVP(Word tg, VerbChunk vc, List preps) {
		List list = new ArrayList();
		List pro1 = null, pro2 = null;
		int idx = SenSimplifier.hashType.get(tg.type);
		KeyData kdt = dic.get(tg.word).getKeyData(tg.type);
		Set childSet = kdt.child;
		Set parentSet = kdt.parent;
		String key = tg.word + idx + tg.pos_tag + "VP";
		RuleSets rule = rset.get(key);
		if (rule == null || rule.isSkipped()) {
			return list;
		}
		// find theme
		String prep1 = getPrep(tg, vc.object, rule.prep);
		if ((vc.verb_type == 1 && tg.pos_tag.equals("VBN") && rule.passive_order)
				|| (prep1 != null && prep1.equals("by"))) { // Inverse case:
															// Theme - TG -
															// Cause
			// find theme
			if (rule.passive_order) { // normal case -> theme -tg - cause
				double cause_score = rule.count[5] * 1f
						/ (rule.count[5] + rule.count[4]);
				if (rule.causePrep.contains(prep1) || cause_score > ecause) { // has
																				// cause,
																				// check
																				// group
																				// 5
					// find theme
					if (rule.getEvtScore(4) < upperValue) { // try to find pro
						pro1 = findFrontPro(vc.subject, tg, rule.fdist);
					}
					if ((pro1 == null || pro1.isEmpty())
							&& rule.getEvtScore(4) > escore) { // no pro, find
																// event
						pro1 = findEvent(vc.subject, rule.fdist);
					}
					// has theme, find cause
					if (pro1 != null && pro1.size() > 0 && cause_score > ecause) { // find
																					// cause
						if (rule.getEvtCause(5) < upperValue) {
							pro2 = findRegPro(vc.object, tg, -1, rule.dist1);
						}
						if ((pro2 == null || pro2.isEmpty())
								&& rule.getEvtCause(5) > ecause) {
							pro2 = findEvent(vc.object, rule.dist1);
							if (pro2 == null || pro2.isEmpty()) {
								return list;
							}
						}
					}
				} else { // no cause : theme - TG
					if (rule.getEvtScore(4) < upperValue) { // try to find pro
						pro1 = findFrontPro(vc.subject, tg, rule.fdist);
					}
					if ((pro1 == null || pro1.isEmpty())
							&& rule.getEvtScore(4) > escore) { // no pro, find
																// event
						pro1 = findEvent(vc.subject, rule.fdist);
					}
				}
			}
		} else { // normal case: Cause - TG - Theme
			double fcause = rule.count[5] * 1f
					/ (rule.count[4] + rule.count[5]);
			double bcause = rule.count[6] * 1f
					/ (rule.count[2] + rule.count[6]);

			if (rule.getEvtScore(2) < upperValue) {
				pro1 = findRegPro(vc.object, tg, -1, rule.dist1);
			}
			if ((pro1 == null || pro1.isEmpty())
					&& rule.getEvtScore(2) > escore) {
				pro1 = findEvent(vc.object, rule.dist1);
			}
			if (pro1 != null && pro1.size() > 0 && bcause > ecause) {
				if (rule.getEvtCause(6) < upperValue) { // try to find pro
					pro2 = findFrontPro(vc.subject, tg, rule.fdist);
				}
				if ((pro2 == null || pro2.isEmpty())
						&& rule.getEvtCause(6) > ecause) { // no pro, find event
					pro2 = findEvent(vc.subject, rule.fdist);
				}
			}
			if ((pro1 == null || pro1.isEmpty())
					&& (rule.count[4] + rule.count[5]) * 1f / rule.total > escore) {
				if (rule.getEvtScore(4) < upperValue) { // try to find pro
					pro1 = findFrontPro(vc.subject, tg, rule.fdist);
				}
				if ((pro1 == null || pro1.isEmpty())
						&& rule.getEvtScore(4) > escore) { // no pro, find event
					pro1 = findEvent(vc.subject, rule.fdist);
				}
				if (pro1 != null && pro1.size() > 0 && fcause > ecause) { // find
																			// cause
					if (rule.getEvtCause(5) < upperValue) {
						pro2 = findRegPro(vc.object, tg, -1, rule.dist1);
					}
					if ((pro2 == null || pro2.isEmpty())
							&& rule.getEvtCause(5) > ecause) {
						pro2 = findEvent(vc.object, rule.dist1);
						if (pro2 == null || pro2.isEmpty()) {
							return list;
						}
					}
				}
			}

		}
		// forming event pairs
		if (pro1 != null && pro1.size() > 0 && pro2 != null && pro2.size() > 0) {
			tg.TID = getTrgID();
			for (Object obj1 : pro1) {
				for (Object obj2 : pro2) {
					PData pair = new PData(tg, obj1, obj2, tg.type);
					pair.PID = getEventID();
					extractedSet.add(pair);
					list.add(pair);
				}
			}
			return list;
		} else if (pro1 != null && pro1.size() > 0) {
			tg.TID = getTrgID();
			for (Object obj1 : pro1) {
				PData pair = new PData(tg, obj1, tg.type);
				pair.PID = getEventID();
				extractedSet.add(pair);
				list.add(pair);
			}
			return list;
		}
		return list;
	}

	/**
	 * Split list of proteins using 'and' break
	 * 
	 * @param ls
	 *            : protein
	 * @param tokens
	 *            : tokens from sentence
	 * @return: two lists of proteins if break exists
	 */
	private List[] splitProByAnd(List ls, String[] tokens) {
		List rs[] = new ArrayList[2];
		rs[0] = new ArrayList();
		rs[1] = new ArrayList();
		int idx = 0;
		if (ls.size() <= 1) {// cannot split
			rs[0] = ls;
			return rs;
		} else {
			int pos1 = ls.get(0).pos; // first pro
			int pos2 = ls.get(ls.size() - 1).pos; // last pro
			for (int i = pos1 + 1; i < pos2; i++) {
				if (tokens[i].equals("and")) { // has break
					idx = i;
					break;
				}
			}
			if (idx > pos1 && idx < pos2) { // found 'and' break
				for (Word w : ls) {
					if (w.pos < idx) {
						rs[0].add(w);
					} else {
						rs[1].add(w);
					}
				}
			}
		}
		return rs;
	}

	/**
	 * Split compound pros into two list
	 * 
	 * @param pro1
	 * @return
	 */
	private List splitProPair(List pros) {
		List ls = new ArrayList();
		int i, j;
		Word pr, pr2;
		for (i = 0; i < pros.size() - 1; i++) {
			pr = pros.get(i);
			for (j = i + 1; j < pros.size(); j++) {
				pr2 = pros.get(j);
				if (pr2.compound && pr2.pos == pr.pos) {
					Word[] pair = new Word[2];
					pair[0] = pr;
					pair[1] = pr2;
					ls.add(pair);
					break;
				} else if (pr2.pos > pr.pos) {
					break;
				}
			}
		}
		return ls;
	}

	/**
	 * 12.10.2011 Extracting binding events
	 * 
	 * @param bc
	 * @param preps
	 * @param mods
	 * @return
	 */
	public List extractBindNP(Word tg, BasicChunk bc, List preps) {
		List list = new ArrayList();
		String ev_type = tg.type;
		RuleSets rule;
		String prep;
		Word frontPrep;
		String key = tg.word + 5 + tg.pos_tag + "NP";
		rule = rset.get(key);
		List pro1 = null, pro2 = null; // theme1 and theme2
		// Finding pro1 and pro2 based on patterns
		boolean inchunk = false;
		if (rule == null) {
			return list;
		}
		if ((rule.getFrontScore() > 0.01 || rule.inChunk()) && tg.inchunk) { // in
																				// chunk:
																				// PRO
																				// -
																				// TG
			// find pros in the same chunk with trg
			pro1 = getInChunkPro(bc, tg);
			if (pro1 != null && pro1.size() > 0 && rule.getInchunkCause() > 0.1) {
				prep = getPrep(tg, bc, rule.prep);
				if (!prep.isEmpty()) {
					pro2 = findPro(bc, tg, prep, rule.dist1);
				}
			}
			if (pro1 != null && pro1.size() > 0) {
				inchunk = true;
			}
			if (pro1.isEmpty() && hasIts(tg.pos, tokens)
					&& rule.getFrontScore() > 0.01) {
				Word pr = findAnaphora(bc, 5, tg);
				if (pr != null) {
					pro1 = new ArrayList();
					pro1.add(pr);
				}
			}
		}
		if (pro1 == null || pro1.isEmpty()) { // pro follows trg: TG-PRO
			prep = getPrep(tg, bc, rule.prep);
			// check prep-> has prep or empty
			// // find pro in chunk with maximun distance
			if (tg.pos_tag.startsWith("NN")) { // tg is noun
				if (!prep.isEmpty()) {
					if (rule.prepPair.get(prep) != null) {
						int pairs[] = getPrepPairs(tg.pos, prep, bc,
								rule.prepPair.get(prep), rule.dist2);
						if (pairs != null) {
							List pros1, pros2;
							int p1 = pairs[0];
							int p2 = pairs[1];
							pros1 = findPro(bc, p1, p2 - p1 - 1);
							pros2 = findPro(bc, p2, rule.dist2);
							if (!pros1.isEmpty() && !pros2.isEmpty()) {
								pro1 = pros1;
								pro2 = pros2;
							} else if (!pros1.isEmpty()) {
								pro1 = pros1;
							}
						}
					}
					if (pro1 == null || pro1.isEmpty()) { // failed to find
															// prep2 /or no pro
															// for theme2
						pro1 = findPro(bc, tg, prep, rule.dist1);
						frontPrep = findFrontPrep(tg, bc);
						if (pro1.size() > 0) {
							if (prep.equals("between")
									|| (tg.word.startsWith("interaction") && prep
											.equals("of"))) { // hardwire
																// code
																// since
																// 'and'
																// is
																// not
																// prep
								List prs[] = splitProByAnd(pro1, tokens);
								if (prs != null) {
									pro1 = prs[0];
									pro2 = prs[1];
								}
							} else if (frontPrep != null) {
								if (cur_sub != null) { // try CP pattern
									key = tg.word + 5 + tg.pos_tag + "CP";
									RuleSets rule2 = rset.get(key);
									if (rule2 != null
											&& rule2.frontPrep
													.contains(frontPrep.word)) {
										String prep_2 = getPrep(tg, bc,
												rule2.prep);
										if (!prep_2.isEmpty()) {
											pro1 = findFrontPro(cur_sub, tg,
													rule2.fdist);
											pro2 = findPro(bc, tg, prep_2,
													rule2.dist1);
										}
									}
								}
							} else if (rule.count[3] * 1f / rule.total > 0.2) { // split
																				// protein
								// split by and
								// split pair
							}
						}
					}
				} else { // no prep1, use front and behind score to determine
							// theme
					if (rule.getFrontScore() < rule.getBehindScore()) {
						pro1 = findPro(bc, tg, prep, rule.dist1);
					}
					if ((pro1 == null || pro1.isEmpty())
							&& rule.getFrontScore() > 0.1) {
						pro1 = findPro(bc, tg, prep, rule.fdist);
					}
				}

			} else { // trg is JJ or VB
				List pros2;
				pro1 = findPro(bc, tg, prep, rule.dist1);
				frontPrep = findFrontPrep(tg, bc);
				if (frontPrep != null && cur_sub != null) { // try CP pattern
					key = tg.word + 5 + tg.pos_tag + "CP";
					RuleSets rule2 = rset.get(key);
					if (rule2 != null
							&& rule2.frontPrep.contains(frontPrep.word)) {
						String prep_2 = getPrep(tg, bc, rule2.prep);
						if (!prep_2.isEmpty()) {
							pro1 = findFrontPro(cur_sub, tg, rule2.fdist);
							pro2 = findPro(bc, tg, prep_2, rule2.dist1);
						}
					}
				} else if (pro1.size() > 0 && prep != null) { // try to expand
																// by adding
																// theme2
					if (rule.count[5] * 1f / rule.total > 0.1) { //
						pros2 = findFrontPro(bc, tg, rule.fdist);
						if (pros2.size() > 0) {
							pro2 = pro1;
							pro1 = pros2;
						}
					}
				} else if (pro1.isEmpty() && rule.getFrontScore() > 0.1) {
					pro1 = findFrontPro(bc, tg, rule.fdist);
				}
			}
		}
		// forming pairs based on pro1 and pro2
		if (pro1 != null && pro1.size() > 0 && pro2 != null && pro2.size() > 0) {
			removeDuplicate(pro1, pro2);
		}
		if (pro1 != null && pro1.size() > 0) { // check case has_theme2 first
			bc.usedPro.addAll(pro1);
			if (pro2 != null && pro2.size() > 0) { // has pro2 list
				bc.usedPro.addAll(pro2);
				tg.TID = getTrgID();
				for (Word pr1 : pro1) {
					for (Word pr2 : pro2) {
						PData p = new PData(tg, pr1, pr2, ev_type);
						p.PID = getEventID();
						extractedSet.add(p);
						list.add(p);
					}
				}
				return list;
			} else { // no theme 2
				// TODO: add condition in case this trg always requires theme2;
				// loop the rest for any pattern satisfies
				if (inchunk && pro1.size() == 2 && rule.getInchunkCause() > 0.2) { // compound
																					// case
					List pair = splitProPair(pro1);
					if (pair.size() > 0) {
						tg.TID = getTrgID();
						for (Word[] pw : pair) {
							PData p = new PData(tg, pw[0], pw[1], ev_type);
							p.PID = getEventID();
							extractedSet.add(p);
							list.add(p);
						}
						return list;
					}
				}
				tg.TID = getTrgID();
				for (Word w : pro1) {
					PData p = new PData(tg, w, ev_type);
					p.PID = getEventID();
					extractedSet.add(p);
					list.add(p);
				}
				return list;
			}
		}
		return list;
	}

	/**
	 * get prep pairs from basic chunk
	 * 
	 * @param pos
	 *            : tg pos
	 * @param prep1
	 *            : first prep
	 * @param bs
	 *            : basic chunk
	 * @param set
	 *            : prep 2 list
	 * @param dist
	 *            : distance to search
	 * @return : prep pair if found, null otherwise.
	 */
	private int[] getPrepPairs(int pos, String prep1, BasicChunk bs,
			Set set, int dist) {
		int pair[] = new int[2];
		int idx = bs.getChunkPos(pos);
		int stop = Math.min(idx + dist, bs.chunkList.size() - 1);
		int prepcount = 0;
		for (int i = idx + 1; i < stop; i++) {
			Chunk c = bs.chunkList.get(i);
			if (c.txt.equals(prep1)) {
				for (int j = i + 1; j < stop; j++) {
					Chunk c2 = bs.chunkList.get(j);
					if (set.contains(c2.txt)) {
						pair[0] = i;
						pair[1] = j;
						return pair;
					}
					if (c2.type.equals("PP")) {
						prepcount++;
					}
					if (prepcount > 1) {
						break;
					}
				}
			}
		}
		return null;
	}

	private Word findFrontPrep(Word tg, BasicChunk bc) {
		int tg_pos = bc.getChunkPos(tg.pos);
		int idx;
		if (tg_pos > 0) {
			for (int i = 1; i <= 4; i++) {
				idx = tg_pos - i;
				if (idx >= 0) {
					Chunk c = bc.chunkList.get(idx);
					if (prepSet.contains(c.txt)) {
						return new Word(c.txt, c.begin, 0);
					}
				}
			}
		}
		return null;
	}

	private void removeDuplicate(List l1, List l2) {
		if (l1.equals(l2)) {
			l2.clear();
		} else {
			for (Object ob : l2) {
				if (l1.contains(ob)) {
					l1.remove(ob);
				}
			}
		}
	}

	/**
	 * Get position of second prep follows PRO list
	 * 
	 * @param pro
	 * @param bs
	 * @param set
	 * @return chunk position, -1 otherwise
	 */
	private int getPrep(List pro, BasicChunk bs, Set set,
			int limit) {
		int pos = 0;
		for (Word w : pro) {
			pos = Math.max(pos, w.pos);
		}
		pos = bs.getChunkPos(pos);
		if (pos >= 0) {
			return getPrep(pos, bs, set, limit);
		}

		return -1;
	}

	private List findEvent(BasicChunk bc, int dist) {
		int pos;
		for (Word w : bc.trgList) {
			pos = bc.getChunkPos(w.pos);
			if (bc.failed.contains(w)) {
				continue;
			} else if (extractedMap.containsKey(w) && pos <= dist) {
				return extractedMap.get(w);
			}
		}
		return null;
	}

	/**
	 * Get prep for trg from list of preps
	 * 
	 * @param pos
	 *            : trigger position
	 * @param preps
	 *            : list of preposition
	 * @return: prep (Word) if found / null otherwise
	 */
	private int getPrep(int pos, BasicChunk bs, Set prep2, int limit) {
		int stop = Math.min(bs.chunkList.size() - 1, limit);
		int i = bs.getChunkPos(pos) + 1;
		for (; i <= stop; i++) {
			Chunk c = bs.chunkList.get(i);
			if (prep2.contains(c.txt)) {
				return i;
			}
		}
		return -1;
	}

	public void extractNP(BasicChunk bs, List preps) {
		curr_tg = 0;// index of trgs belong to current NP, reset when start a
					// new NP
		sameRole.clear();
		if (bs.extracted) {
			return;
		}
		while (curr_tg < bs.trgList.size()) {
			extractNP(curr_tg, bs, preps);
			curr_tg++;
		}
		bs.extracted = true;
	}

	public List extractNP(int idx, BasicChunk bs, List preps) {
		List temp = null;
		if (idx < bs.trgList.size()) {
			Word tg = bs.trgList.get(idx);
			if (usedTrg.contains(tg) || tg.used) {
				return temp;
			}
			if (idx + 1 < bs.trgList.size()) {
				Word tg2 = bs.trgList.get(idx + 1);
				if (bs.isSameRole(tg, tg2, tokens)) {
					sameRole.add(tg);
					sameRole.add(tg2);
				}

			}
			same_role = false;
			if (sameRole.contains(tg)) {
				same_role = true;
			}
			int ev_type = SenSimplifier.hashType.get(tg.type);
			if (ev_type < 5) {
				temp = extractSimpleNP(tg, bs, preps);
			} else if (ev_type == 5) {
				temp = extractBindNP(tg, bs, preps);
			} else {
				temp = extractRegNP(tg, bs, preps);
			}
			if (temp != null && temp.size() > 0) {
				extractedMap.put(tg, temp);
				usedTrg.add(tg);
			} else if (temp != null && temp.isEmpty()) {
				bs.failed.add(tg);
			}
		}
		return temp;
	}

	/**
	 * Get modifier for trigger in order to determine trigger type
	 * 
	 * @param c1
	 * @param c2
	 * @param ls
	 * @return
	 */
	public List getMod(Chunk c1, Chunk c2, List ls) {
		List list = new ArrayList();
		int pos1, pos2;
		pos1 = Math.min(c1.begin, c2.begin);
		pos2 = Math.max(c1.end, c2.end);
		for (Word w : ls) {
			if (w.pos >= pos1 && w.pos <= pos2) {
				list.add(w);
			}
		}
		return list;

	}

	/**
	 * Get modifier for trigger in order to determine trigger type
	 * 
	 * @param ls
	 *            : list of modifiers
	 * @return: a modifier for tg/pro
	 */
	public List getMod(int pos1, int pos2, List ls) {
		List list = new ArrayList();
		for (Word w : ls) {
			if (w.pos >= pos1 && w.pos <= pos2) {
				list.add(w);
			}
		}
		return list;
	}

	/**
	 * Get modifier for trigger in order to determine trigger type
	 * 
	 * @param bs
	 *            : BasicChunk
	 * @param tg
	 *            : trg
	 * @param ls
	 *            : list of modifiers
	 * @return: a modifier for tg/pro
	 */
	public List getMod(BasicChunk bs, Word tg, List ls) {
		List list = new ArrayList();
		Chunk c = bs.getChunk(tg.pos);
		int pos1 = c.begin, pos2 = c.end;
		for (Word w : ls) {
			if (w.pos >= pos1 && w.pos <= pos2) {
				list.add(w);
			}
		}
		return list;
	}

	/**
	 * Get modifier for trigger in order to determine trigger type
	 * 
	 * @param bs
	 *            : BasicChunk
	 * @param tg
	 *            : trg
	 * @param pros
	 *            :list of pro
	 * @param ls
	 *            : modifiers
	 * @return: a modifier for tg/pro
	 */
	public List getMod(BasicChunk bs, Word tg, List pros,
			List ls) {
		Chunk c1 = bs.getChunk(tg.pos);
		int max = 0;
		int min = 1000000;
		for (Word pr : pros) {
			if (pr.pos > max) {
				max = pr.pos;
			}
			if (pr.pos < min) {
				min = pr.pos;
			}
		}
		if (c1 == null) {
			c1 = bs.getChunk(min);
		}
		Chunk c2 = bs.getChunk(max);
		return getMod(c1, c2, ls);
	}

	/**
	 * Find pros for binding event which has pattern as: Pro1 - Trg - Pro2
	 * 
	 * @param tg
	 *            : trigger
	 * @param ls
	 *            : Basic Chunk
	 * @return : list of pros
	 */
	public List extractSimpleVP(Word tg, VerbChunk vc, List mods) {
		List list = new ArrayList();
		String ev_type;
		KeyData kdt;
		int idx;
		List mod;
		RuleSets rule;
		BasicChunk bc;
		kdt = dic.get(tg.word);
		if (kdt != null) {
			ev_type = kdt.getDefault().type;
			idx = SenSimplifier.hashType.get(ev_type);
			if (kdt.keytype == 2) { // shared trigger, set to Gene_expression
				idx = 0;
			}
			String key = tg.word + idx + tg.pos_tag + "VP";
			rule = rset.get(key);
			if (rule == null || rule.isSkipped()) {
				return list;
			}
			List pros;
			if (vc.verb_type == 1 && tg.pos_tag.equals("VBN")) { // passive
				pros = null;
				bc = vc.object;
				if (bc.chunkList.size() > 0) {
					String prep = getPrepVerb(bc, rule.prep); // give tg with
																// prep higher
																// priority
					if (!prep.isEmpty()) {
						pros = findPro(bc, tg, prep, rule.dist1);
					}
					if (pros == null || pros.isEmpty()) {
						bc = vc.subject;
						pros = findFrontPro(bc, tg, rule.fdist);
					}
				}
			} else { // pro in front of trg
				// pro follows trg
				// check prep-> has prep or empty
				// // find pro in chunk with maximun distance
				bc = vc.object;
				pros = findPro(bc, tg, "", rule.dist1);
				if (pros.isEmpty() && tg.pos_tag.equals("VBN")
						&& rule.getFrontScore() > 0.1) { // this tg migh has
															// passive form
					pros = findFrontPro(bc, tg, rule.fdist);
				}
			}
			if (pros != null && pros.size() > 0) {
				Word tg2 = null;
				if (kdt.keytype == 3 || (kdt.required && kdt.keytype == 1)) { // determine
																				// type
					mod = getMod(bc, tg, pros, mods);
					ev_type = kdt.getType(mod);
					if (ev_type == null) {
						return list;
					}
					tg.keytype = kdt.keytype == 2 ? 2 : 1;
				}
				tg.type = ev_type;
				if (tg.keytype == 20) { // shared trigger, clone one trg for
										// Genexpression
					tg2 = tg; // keep the original for Positive_regulation
					tg = new Word(tg.word, tg.pos, tg.loc);
					tg.locs = tg2.locs;
					tg.type = "Gene_expression";
					tg.keytype = 1;
					tg2.type = "Positive_regulation"; // change type into
														// Pos_Reg
					tg2.keytype = 1;
				}
				tg.TID = getTrgID();
				for (Word w : pros) {
					PData p = new PData(tg, w, ev_type);
					p.PID = getEventID();
					extractedSet.add(p);
					list.add(p);
					bc.usedPro.add(w);
				}
				if (tg2 != null) {
					extractedMap.put(tg, list); // put Gene_expression into
												// extractedMap
					List list1 = new ArrayList();
					tg2.TID = getTrgID();
					for (PData pdt : list) {
						PData p = new PData(tg2, pdt, tg2.type);
						p.PID = getEventID();
						extractedSet.add(p);
						list1.add(p);
					}
					return list1;
				}
			}
		}

		return list;
	}

	public List extractBindVP(Word tg, VerbChunk vc, List preps) {
		List list = new ArrayList();
		String ev_type = tg.type;
		String key = tg.word + 5 + tg.pos_tag + "VP";
		RuleSets rule = rset.get(key);
		if (rule == null || rule.isSkipped()) {
			return list;
		}
		List pro1 = null, pro2 = null;
		String prep = getPrepVerb(vc.object, rule.prep);
		// find theme1 and theme2 for binding events
		pro1 = findPro(vc.object, tg, prep, rule.dist1);
		if (pro1.size() > 0) {
			if ((rule.count[5] * 1f) / rule.total > 0.1) {
				List pros2 = findFrontPro(vc.subject, tg, rule.fdist);
				if (pros2.size() > 0) {
					pro2 = pro1;
					pro1 = pros2;
				}
			}
		}
		if (pro1 == null || pro1.isEmpty()) {
			pro1 = findFrontPro(vc.subject, tg, rule.dist1);
		}
		// forming event from pro1 and pro2 list
		if (pro1 != null && pro1.size() > 0) { // PRO1 - TG - PRO2
			if (pro2 != null && pro2.size() > 0) {
				tg.TID = getTrgID();
				for (Word pr1 : pro1) {
					for (Word pr2 : pro2) {
						PData p = new PData(tg, pr1, pr2, ev_type);
						p.PID = getEventID();
						extractedSet.add(p);
						list.add(p);
					}
				}
			} else {
				tg.TID = getTrgID();
				for (Word w : pro1) {
					PData p = new PData(tg, w, ev_type);
					p.PID = getEventID();
					extractedSet.add(p);
					list.add(p);
				}
			}
		}
		return list;
	}

	/**
	 * 11.10.2011. Checked!
	 * 
	 * @param bs
	 *            : BasicChunk
	 * @param tg
	 *            : trigger
	 * @return : list of protein that can use to form event
	 */
	public List getInChunkPro(BasicChunk bs, Word tg) {
		Chunk chunk = bs.getChunk(tg.pos);
		if (chunk != null) {
			return chunk.getInChunkPro(tg, tokens);
		} else {
			return new ArrayList();
		}
	}

	/**
	 * 11.10.2011. Checked! Find pro for noun phrase patterns: TG - PRO
	 * 
	 * @param bs
	 *            : BasicChunk
	 * @param tg
	 *            : trig
	 * @param prep
	 *            :prep
	 * @param len
	 *            : number of chunks
	 * @return: list of pros
	 */
	public List findPro(BasicChunk bs, Word tg, String prep, int len) {
		List ls = new ArrayList();
		boolean found_pro = false;
		len = Math.min(len, noun_len);
		Chunk chunk = bs.getChunk(tg.pos);
		int pos = 0; // for verb phrase ; tg not belong to this chunk ; so start
						// from 0
		Chunk c;
		if (chunk != null) { // noun phrase
			pos = bs.chunkList.indexOf(chunk);
		} else {
			if (!bs.proList.isEmpty()) {
				if (!bs.trgList.isEmpty()) {
					Word p1 = bs.proList.get(0);
					Word t1 = bs.trgList.get(0);
					if (p1.pos > t1.pos) {
						return ls;
					}
				}
			}
		}
		int stop = Math.min(len + pos, bs.chunkList.size() - 1);// offset
		// stop = Math.min(stop, getStop(bs,tg));
		int idx = pos;
		if (!prep.isEmpty() && chunk != null) {
			idx = pos + 2;
		} else if (chunk != null) { // no prep && chunk!=null
			// noun phrase contain trg
			List temp = chunk.getPro(tg, tokens); // for tg with JJ and
														// VBz tag
			if (temp.size() > 0) { //
				for (Word w : temp) {
					if (w.pos > tg.pos
							&& (!bs.usedPro.contains(w) || same_role)) {
						ls.add(w);
					}
				}
			}
			if (ls.size() > 0) {
				found_pro = true;
			}
			idx++; // TO DO: check whether to skip this chunk?
		}
		for (int i = idx; i <= stop; i++) {
			c = bs.chunkList.get(i);
			if (c.type.equals("VP") || (c.type.equals("PP") && found_pro)) {
				break;
			} else {
				List temp = c.getPro(tokens);
				if (temp.size() > 0) { //
					if (temp.size() > 0) { //
						for (Word w : temp) {
							if (!bs.usedPro.contains(w) || same_role) {
								ls.add(w);
							}
						}
						if (!ls.isEmpty()) {
							found_pro = true;
						}
					}
				}
			}
			if (c.trigs.size() > 0 && !same_role) {
				break;
			} else if (c.trigs.size() > 0 && same_role) {
				same_role = false;
			}

		}
		return ls;
	}

	private int getStop(BasicChunk bs, Word tg) {
		if (bs.trgList.isEmpty()) {
			return bs.chunkList.size() - 1;
		} else {
			int i = 0;
			while (i < bs.trgList.size()) {
				Word c = bs.trgList.get(i);
				if (c.pos < tg.pos) {
					i++;
				} else if (bs.isSameRole(tg, c, tokens)) {
					i++;
				} else {
					return bs.getChunkPos(c.pos);
				}
			}
		}
		return bs.chunkList.size() - 1;

	}

	/**
	 * Get Pro as cause for : PRO-xx TG PRO/EVENT
	 * 
	 * @param bs
	 * @param tg
	 * @return
	 */
	public List findCausePro(BasicChunk bs, Word tg) {
		List ls = new ArrayList();
		int pos = bs.getChunkPos(tg.pos);
		if (pos >= 0) {
			Chunk c = bs.chunkList.get(pos);
			return c.cause;
		}
		return ls;
	}

	/**
	 * Find pro after a given position (for use with tg- prep1 - prep2) Only
	 * looks for pro after prep2
	 * 
	 * @param bs
	 *            : BasicChunk
	 * @param start
	 *            : start position
	 * @param len
	 *            : distance to look for
	 * @return : list of pros if found
	 */
	public List findPro(BasicChunk bs, int start, int len) {
		List ls = new ArrayList();
		int stop = Math.min(len + start, bs.chunkList.size() - 1);// offset
		int idx = start;
		boolean found_pro = false;
		Chunk c;
		for (int i = idx; i <= stop; i++) {
			c = bs.chunkList.get(i);
			List temp = c.getPro(tokens);
			if (temp.size() > 0) { //
				if (temp.size() > 0) { //
					for (Word w : temp) {
						if (!bs.usedPro.contains(w) || same_role) {
							ls.add(w);
						}
					}
					if (!ls.isEmpty()) {
						found_pro = true;
					}
				}
			}
			if (c.type.equals("VP") || (c.type.equals("PP") && found_pro)) {
				break;
			} else if (c.trigs.size() > 0 && !same_role) {
				break;
			} else if (c.trigs.size() > 0 && same_role) {
				same_role = false;
			}
		}
		return ls;
	}

	/**
	 * Find pro after a given position (for use with tg- prep1 - prep2) Only
	 * looks for pro after prep2
	 * 
	 * @param bs
	 *            : BasicChunk
	 * @param start
	 *            : start position
	 * @param len
	 *            : distance to look for
	 * @return : list of pros if found
	 */
	public List findRegPro(BasicChunk bs, Word tg, int start, int len) {
		List ls = new ArrayList();
		int pos = start;
		int tg_pos = -1;
		int prep_count = 0;
		boolean found_pro = false;
		int limit = bs.chunkList.size() - 1;
		if (pos >= 0) { // NP chunk
			int pos1 = bs.trgList.indexOf(tg);
			if (pos1 < bs.trgList.size() - 1) {
				Word tg2 = bs.trgList.get(pos1 + 1);
				if (sameRole.contains(tg2)) {
					if (pos1 < bs.trgList.size() - 2) {
						tg_pos = bs.trgList.get(pos1 + 2).pos;
					}
				} else {
					tg_pos = tg2.pos;
				}
			} else { // no more trg
			}
			Chunk chunk = bs.chunkList.get(pos);
			List temp = chunk.getPro(tg, tokens); // for tg with JJ and
														// VBz tag
			if (temp.size() > 0) { //
				for (Word w : temp) {
					if (w.pos > tg.pos && !bs.usedPro.contains(w) || same_role) {
						ls.add(w);
					}
				}
			}
			if (ls.size() > 0) {
				found_pro = true;
			}
		} else { // VP chunk
			if (bs.trgList.size() > 0) {
				tg_pos = bs.trgList.get(0).pos;
			}
			pos = 0;
		}
		if (tg_pos > 0) { // has_tg
			limit = bs.getChunkPos(tg_pos);
		}
		int idx = pos;
		int stop = Math.min(len + pos, limit);// offset
		Chunk c;
		for (int i = idx; i <= stop; i++) {
			c = bs.chunkList.get(i);
			List temp = c.getPro(tokens);
			if (temp.size() > 0) { //
				if (temp.size() > 0) { //
					for (Word w : temp) {
						if (!bs.usedPro.contains(w) || same_role) {
							ls.add(w);
						}
					}
					if (!ls.isEmpty()) {
						found_pro = true;
					}
				}
			}
			if (c.type.equals("VP") || (c.type.equals("PP") && found_pro)) {
				break;
			} else if (c.trigs.size() > 0 && !same_role) {
				break;
			} else if (c.trigs.size() > 0 && same_role) {
				same_role = false;
			}
		}
		return ls;
	}

	/**
	 * 12.10.2011 Find pro in front of tg for noun phrase/subject patterns: PRO
	 * - TG
	 * 
	 * @param bs
	 *            : BasicChunk
	 * @param tg
	 *            : trig
	 * @param prep
	 *            :prep
	 * @param len
	 *            : number of chunks
	 * @return: list of pros
	 */
	public List findFrontPro(BasicChunk bs, Word tg, int len) {
		List ls = new ArrayList();
		boolean found_pro = false;
		len = Math.min(len, noun_len);
		// for subject case, chunk do not contains trg, therefore use last
		// position
		int pos, idx;
		Chunk chunk = bs.getChunk(tg.pos);
		int start = 0;
		Chunk c;
		if (chunk == null) { // subject case, start from begining, search to the
								// right
			if (!bs.proList.isEmpty()) {
				if (!bs.trgList.isEmpty()) {
					Word p1 = bs.proList.get(0);
					Word t1 = bs.trgList.get(0);
					if (p1.pos > t1.pos) {
						return ls;
					}
				}
			}
			pos = bs.chunkList.size() - 1;
			idx = Math.min(len, pos);
			for (int i = start; i <= idx; i++) {
				c = bs.chunkList.get(i);
				List temp = c.getPro(tokens);
				if (temp.size() > 0) { //
					for (Word w : temp) {
						if (!bs.usedPro.contains(w) || check_pro) {
							ls.add(w);
						}
					}
					if (!ls.isEmpty()) {
						found_pro = true;
					}
				}
				if ((c.type.equals("VP") || c.type.equals("PP")) && found_pro) {
					break;
				} else if (c.pros.isEmpty() && c.trigs.size() > 0) {
					break;
				}
			}
			return ls;
		} else { // noun phrase case, start from tg position, search to the left
			pos = bs.chunkList.indexOf(chunk); // noun phrase case
			int end = Math.max(0, pos - len);
			idx = pos;
			List temp1 = bs.chunkList.get(idx).getInChunkPro(tg, tokens); // pro
																				// in
																				// the
																				// same
																				// chunk
																				// with
																				// trg
			if (temp1.size() > 0) { //
				for (Word w : temp1) {
					if (!bs.usedPro.contains(w) || check_pro || same_role) {
						ls.add(w);
					}
				}
				if (!ls.isEmpty()) {
					found_pro = true;
				}
			} else {
				temp1 = chunk.getPro(tokens);
				for (Word w : temp1) {
					if (!bs.usedPro.contains(w) || check_pro || same_role) {
						ls.add(w);
					}
				}
				if (!ls.isEmpty()) {
					found_pro = true;
				}
			}
			for (int i = idx - 1; i >= end; i--) {
				c = bs.chunkList.get(i);
				List temp = c.getProFront();
				if (temp.size() > 0) { //
					if (temp.size() > 0) { //
						for (Word w : temp) {
							if (!bs.usedPro.contains(w) || check_pro
									|| same_role) {
								ls.add(w);
							}
						}
						if (!ls.isEmpty()) {
							found_pro = true;
						}
					}
				}
				if (c.type.equals("VP") || (c.type.equals("PP") && found_pro)) {
					break;
				} else if (c.pros.isEmpty() && c.trigs.size() > 0) {
					break;
				}
			}
		}
		return ls;
	}

	/**
	 * Get preposition follows a given trg
	 * 
	 * @param tg
	 *            : Tg
	 * @param bs
	 *            : basic chunk
	 * @param prep
	 *            : list of allowed prep
	 * @return: prep if found, null otherwise
	 */
	private String getPrep(Word tg, BasicChunk bs, Set prep) {
		int tg_pos = bs.getChunkPos(tg.pos);
		if (tg_pos >= 0 && tg_pos < bs.chunkList.size() - 1) { // NP
			int pos = bs.trgList.indexOf(tg);
			if (pos < bs.trgList.size() - 1) {
				Word tg2 = bs.trgList.get(pos + 1);
				int pos2 = bs.getChunkPos(tg2.pos);
				if (tg_pos == pos2 && tg2.pos - tg.pos == 1) {
					return "";
				}
			}
			Chunk c = bs.chunkList.get(tg_pos + 1);
			if (prep.contains(c.txt)) {
				return c.txt;
			}
		} else if (bs.chunkList.size() > 0) { // VP
			Chunk c = bs.chunkList.get(0);
			if (prep.contains(c.txt)) {
				return c.txt;
			}
		}
		return "";
	}

	/**
	 * Get preposition follows a given trg
	 * 
	 * @param tg
	 *            : Tg
	 * @param bs
	 *            : basic chunk
	 * @param prep
	 *            : list of allowed prep
	 * @return: prep if found, null otherwise
	 */
	private String getPrepVerb(BasicChunk bs, Set prep) {
		if (bs.chunkList.isEmpty()) {
			return "";
		}
		for (Chunk c : bs.chunkList) {
			if (prep.contains(c.txt)) {
				return c.txt;
			} else if (c.type.startsWith("AD")) {
				continue;
			} else {
				return "";
			}
		}
		return "";
	}

	public void initSentence(String id) {
		out = analyzer.analyze(id);
		trg_ID = analyzer.proList.size(); // trg ID follows number of protein +1
											// ;
		evt_ID = 0;// reset new Event list
	}

	public void extractSentence(int i) {
		List prep, mods;
		if (out[i] == null) {
			return;
		}
		curr_senID = i;
		tokens = analyzer.tokenList.get(i);
		prep = analyzer.getPreps(tokens);
		mods = analyzer.getModifier(tokens);
		if (debug) {
			System.out.print("----Text: ");
			System.out.println(analyzer.shortsen[i]);
			analyzer.printChunk(out[i]);
		}
		op.curr_text = analyzer.shortsen[i];
		tags = analyzer.tagList.get(i);
		op.analyzeChunk(out[i], tags, tokens); // split sentence into chunks
												// (BasicChunk/VerbChunk)
		// Extract from BasicChunk and VerbChunk
		// Evaluating trg from BasicChunk
		for (BasicChunk bc : op.bsList) {
			evaluateTrg(bc, prep, mods, tokens, analyzer.detectedTrg[i]);
			extractNP(bc, prep);
		}
		// evaluating candidate trg from clause
		usedTrg.clear();
		for (VerbChunk vc : op.verbList) {
			cur_sub = null;
			curr_verb = vc.verb;
			curr_verb_type = vc.verb_type;
			check_pro = vc.subject_type == 1 ? true : false;
			evaluateTrg(vc.subject, prep, mods, tokens, analyzer.detectedTrg[i]);
			extractNP(vc.subject, prep);
			cur_sub = vc.subject;
			evaluateTrg(vc.object, prep, mods, tokens, analyzer.detectedTrg[i]);
			extractNP(vc.object, prep);
			extractVP(vc, prep, mods);
		}
	}

	public void printChunkList() {
		for (BasicChunk bs : op.bsList) {
			bs.printChunk();
			System.out.println("");
		}
		for (VerbChunk vc : op.verbList) {
			if (!vc.isQualify()) {
				// continue;
			}
			vc.print();
			System.out.println("");
		}
	}

	public void extractVP(VerbChunk vc, List preps, List mods) {
		int i = 0;
		while (i < vc.verb.trigs.size()) {
			Word tg = vc.verb.trigs.get(i); // last tg
			if (i + 1 < vc.verb.trigs.size()) {
				Word tg2 = vc.verb.trigs.get(i + 1);
				if (vc.verb.isSameRole(tg, tg2, tokens)) {
					sameRole.add(tg2);
				}
			}
			same_role = false;
			if (sameRole.contains(tg)) {
				same_role = true;
				sameRole.clear();
			}
			KeyData kdt = dic.get(tg.word);
			String ev_type;
			int idx;
			List ls = null;
			if (kdt != null) {
				if (kdt.keytype > 1) {
					ev_type = kdt.getDefault().type;
					idx = SenSimplifier.hashType.get(ev_type);
					if (idx >= 5) {
						tg.type = ev_type;
					}
				} else {
					tg.type = kdt.type;
					idx = SenSimplifier.hashType.get(tg.type);
				}
				if (idx < 5) {
					ls = extractSimpleVP(tg, vc, mods);
				} else if (idx == 5) {
					ls = extractBindVP(tg, vc, preps);
				} else {
					ls = extractRegVP(tg, vc, preps);
				}
				if (ls != null && !ls.isEmpty()) {
					extractedMap.put(tg, ls);
				}
			}
			i++;
		}
	}

	private String getTrgID() {
		trg_ID++;
		return "T" + trg_ID;
	}

	private String getEventID() {
		evt_ID++;
		return "E" + evt_ID;
	}

	public void Test(String outPath) {
		try {
			init();
			removeOldFiles(outPath);
			List ids = simp.loadPMIDs();
			// ids.clear();
			// ids.add("PMC-1134658-01-Background");
//			System.out.println("Loading abstracts: " + ids.size());
			log.debug("Loading abstracts: {}", ids.size());
			for (String id : ids) {
				// System.out.println(" Extracting ... " + id);
				curr_pmid = id;
				extractEvents(id);
				writeResult(id, outPath);
			}
		} catch (Exception ex) {
//			ex.printStackTrace();
//			System.out.println(ex.getLocalizedMessage());
//			log.error("Caught exception, recognition of events is skipped for current document(s). Error occurred in document " + curr_pmid + ":", ex);
			throw new BioSemException(ex);
		}
		// RuleLearner learner = new RuleLearner();
		// learner.storeRuleSet(rules, db_sr);
	}

	public void Test() {
		try {
			init();
			List ids = simp.loadPMIDs();
			// ids.clear();
			// ids.add("PMC-1134658-01-Background");
//			System.out.println("Loading abstracts: " + ids.size());
			log.debug("Loading abstracts: {}", ids.size());
			for (String id : ids) {
				// System.out.println(" Extracting ... " + id);
				curr_pmid = id;
				extractEvents(id);
			}
		} catch (Exception ex) {
//			ex.printStackTrace();
//			System.out.println(ex.getLocalizedMessage());
			log.debug("Caught exception, recognition of events is skipped for current document(s). Error occurred in document " + curr_pmid + ":", ex);
		}
		// RuleLearner learner = new RuleLearner();
		// learner.storeRuleSet(rules, db_sr);
	}

	/**
	 * Extracting events from abstract/paragraph.
	 * 
	 * @param id
	 *            : PMID (abstract/paragraph)
	 * @return List of events
	 */
	public void extractEvents(String id) {
		extractedMap.clear();// clear all events
		extractedSet.clear();
		initSentence(id);
		for (int i = 0; i < analyzer.senpos.length; i++) { // loop over
															// sentences
			extractSentence(i);
		}
		unifyEvents(id);
	}

	private void unifyEvents(String id) {
		boolean duplicatesFound;
		do {
			Set uniqueEvents = new HashSet<>();
			Set duplicates = new HashSet<>();
			List lst = new ArrayList<>(extractedSet);

			duplicatesFound = false;
			Map> eqClasses = new HashMap<>();
			for (int i = 0; i < lst.size(); ++i) {
				PData pdt = lst.get(i);
				// skip events that have already been declared to equal another
				// event occurring earlier in the list
				if (duplicates.contains(pdt))
					continue;
				List equals = new ArrayList<>();
				for (int j = i + 1; j < lst.size(); ++j) {
					PData pdt2 = lst.get(j);
					boolean equal = true;
					if (!pdt.evt_type.equals(pdt2.evt_type))
						equal = false;
					else if (pdt.trg == null ^ pdt2.trg == null)
						equal = false;
					else if (pdt.pro1 == null ^ pdt2.pro1 == null)
						equal = false;
					else if (pdt.pro2 == null ^ pdt2.pro2 == null)
						equal = false;
					else if (pdt.pdata1 == null ^ pdt2.pdata1 == null)
						equal = false;
					else if (pdt.pdata2 == null ^ pdt2.pdata2 == null)
						equal = false;
					else if (pdt.trg != null && pdt2.trg != null
							&& !pdt.trg.TID.equals(pdt2.trg.TID))
						equal = false;
					else if (pdt.pro1 != null && pdt2.pro1 != null
							&& !pdt.pro1.TID.equals(pdt2.pro1.TID))
						equal = false;
					else if (pdt.pro2 != null && pdt2.pro2 != null
							&& !pdt.pro2.TID.equals(pdt2.pro2.TID))
						equal = false;
					else if (pdt.pdata1 != null && pdt2.pdata1 != null
							&& !pdt.pdata1.PID.equals(pdt2.pdata1.PID))
						equal = false;
					else if (pdt.pdata2 != null && pdt2.pdata2 != null
							&& !pdt.pdata2.PID.equals(pdt2.pdata2.PID))
						equal = false;
					if (equal) {// && id.equals("PMC-2222968-06-Results"))
						// System.out.println(pdt.PID + ", " + pdt2.PID +": " +
						// pdt.getWriteID() + " " +
						// pdt2.getWriteID() +
						// " " + id);
						equals.add(pdt2);
						duplicates.add(pdt2);
						duplicatesFound = true;
					}
				}
				eqClasses.put(pdt, equals);
			}
			for (PData pdt : eqClasses.keySet()) {
				List equals = eqClasses.get(pdt);
				// for each event, check whether it is referring to an element
				// of the current equivalence class
				// if so, replace the reference with the canonical element
				for (PData pdt3 : lst) {
					for (PData pdt4 : equals) {
						// replace duplicate events with the canonical event
						if (pdt3.pdata1 != null
								&& pdt3.pdata1.PID.equals(pdt4.PID))
							pdt3.pdata1 = pdt;
						if (pdt3.pdata2 != null
								&& pdt3.pdata2.PID.equals(pdt4.PID))
							pdt3.pdata2 = pdt;
					}
				}
			}
			// create a new set of events, filtering out the duplicates
			for (PData pdt5 : lst) {
				if (!duplicates.contains(pdt5))
					uniqueEvents.add(pdt5);
			}
			extractedSet = uniqueEvents;
			// it is possible that by replacing references to duplicate events,
			// some referring events have actually
			// become equal; repeat until no duplicates are found any more
		} while (duplicatesFound);

	}

	private void writeResult(String id, String path) {
		try {
			File dPath = new File(path);
			if (!dPath.exists()) {
				dPath.mkdirs();
			}
			writer = new FileWriter(path + "/" + id + ".a2");
			for (Word w : extractedMap.keySet()) {
				writer.append(w.toString());
			}
			// for (Word w : extractedMap.keySet()) {
			// List ls = extractedMap.get(w);
			// for(PData pdt:ls){
			// writer.append(pdt.toString());
			// }
			// }
			writeEvent.clear();
			String e_id;
			for (PData pdt : extractedSet) {
				e_id = pdt.getWriteID();
				// if (id.equals("PMC-2222968-06-Results"))
				// System.out.println(e_id + " ---> " + pdt.toString());
				if (!writeEvent.contains(e_id)) {
					writer.append(pdt.toString());
					writeEvent.add(e_id);
				}
			}
			closeFile();
		} catch (Exception ex) {
			System.out.println("Loi roi :-(");
			System.out.println(ex.getLocalizedMessage());
			throw new RuntimeException(ex);
		}
	}

	/**
	 * Remove old output files
	 * 
	 * @param filename
	 */
	private void removeOldFiles(String filename) {
		try {
			File file = new File(filename);
			if (file.isDirectory()) {
				File[] list = file.listFiles();
				for (File f : list) {
					f.delete();
				}
			}
		} catch (Exception e) {
			System.out.println(e.getLocalizedMessage());
		}
	}

	private void closeFile() {
		try {
			if (writer != null) {
				writer.close();
			}
		} catch (Exception e) {
			System.out.println(e.getLocalizedMessage());
		}
	}

	/**
	 * Available after {@link #Test()} or {@link #Test(String)} has been run.
	 * Contains the words identified as being triggers.
	 * 
	 * @return
	 */
	public Set getExtractedTriggers() {
		return extractedMap.keySet();
	}

	/**
	 * Available after {@link #Test()} or {@link #Test(String)} has been run.
	 * Contains identified events mentions.
	 * 
	 * @return
	 */
	public Set getExtractedEvents() {
		return extractedSet;
	}

	public static void main(String[] args) {
		String sr_path = null;
		String dest_path = null;
		String outPath = null;
		if (args.length == 3) {
			sr_path = args[0];
			dest_path = args[1];
			outPath = args[2];
		}
		else {
			System.out.println("Declare the trained database, the test database and the output path.");
			System.exit(1);
		}
//		String sr_path = "D:/DataNLP/Mix2011/Data";
//		String dest_path = "D:/DataNLP/Test2011/Data";
//		String outPath = "d:/Output/test";
//		String sr_path = "data_deleteme/db-2011/mix";
//		String dest_path = "D:/DataNLP/Data2011TestPrepared/Data";
//		String outPath = "data_deleteme/annotation_output_test";
		DBUtils sr = new DBUtils();
		sr.openDB(sr_path);
		DBUtils dest = new DBUtils();
		dest.openDB(dest_path);
		EventExtraction xtr = new EventExtraction(sr, dest);
		xtr.Test(outPath);
		sr.closeDB();
	}

	/**
	 * Global variables for extracting events
	 */
	List[] out;
	String tokens[]; // tokens of current sentence
	String tags[]; // POS tags of current sentence
	Map skipTrg = new HashMap();
	Map> extractedMap = new HashMap>();
	Set extractedSet = new HashSet();
	Set writeEvent = new HashSet();
	Set usedTrg = new HashSet();
	int curr_tg;
	int trg_ID; // trigger ID
	int evt_ID; // Event_ID
	public final static Set prepSet = new HashSet();
	public static final String ccList[] = { "and", "or", "but", "as well as",
			"but not" };
	public final static String prepList[] = { "by", "after", "through", "via",
			"upon" };
	public static final Set ccSet = new HashSet();

	static {
		prepSet.addAll(Arrays.asList(prepList));
		ccSet.addAll(Arrays.asList(ccList));
	}

	public void setDb(DBUtils docDb) {
		analyzer.setDB(docDb);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy