All Downloads are FREE. Search and download functionalities are using the official Maven repository.

application.BioSemApplication Maven / Gradle / Ivy

/**
 * BioSemApplication.java
 *
 * Author: matthies
 *
 * Current version: //TODO insert current version number
 * Since version:   //TODO insert version number of first appearance of this class
 *
 * Creation date: Jun 4, 2015
 *
 * This class is a convenient wrapper for the Event Extraction System of Bui & Sloot
 * see:	Bui, Q.-C., & Sloot, P. M. A. (2012). A robust approach to extract biomedical
 * 		events from literature. Bioinformatics (Oxford, England), 28(20), 2654–2661.
 *		[http://www.ncbi.nlm.nih.gov/pubmed/22859502]
 **/

package application;

import corpora.DataLoader;
import org.apache.commons.cli.*;
import relations.EventExtraction;
import relations.RuleLearner;
import relations.TriggerLearner;
import utils.DBUtils;

import java.util.ArrayList;
//import jargs.gnu.CmdLineParser;
//import jargs.gnu.CmdLineParser.IllegalOptionValueException;
//import jargs.gnu.CmdLineParser.UnknownOptionException;

public class BioSemApplication {
	static final String PATH_DELIM = System.getProperty("file.separator");
	static final String NEWLINE = System.getProperty("line.separator");
	static final ArrayList SET_NAMES = new ArrayList() {
		{
			add("train");
			add("test");
			add("devel");
		}
	};

	private static String input_root;
	private static String db_root;
	private static String modus;
	private static String set;
	private static String destination;

	private void readCommandLineArgs(String[] args) {
		CommandLineParser cmd = new BasicParser();
		CommandLine paramParser = null;
		Options options = new Options();
		options.addOption("i", "input", true, "input folder");
		options.addOption("o", "output", true, "database folder");
		options.addOption("m", "modus", true, "step of BioSEM Extractor");
		options.addOption("s", "set", true, "what set");
		options.addOption("d", "destination", true, "destination");
		options.addOption("x", "mix", false, "whether to use mix set");
		// final CmdLineParser paramParser = new CmdLineParser();
		// final CmdLineParser.Option textFolderOption =
		// paramParser.addStringOption(
		// 'i', "input");
		// final CmdLineParser.Option dbFolderOption =
		// paramParser.addStringOption(
		// 'o', "output");
		// final CmdLineParser.Option modusOption = paramParser.addStringOption(
		// 'm', "modus");
		// final CmdLineParser.Option setOption = paramParser.addStringOption(
		// 's', "set");
		// final CmdLineParser.Option destOption = paramParser.addStringOption(
		// 'd', "destination");
		// final CmdLineParser.Option mixOption = paramParser.addStringOption(
		// 'x', "mix");

		try {
			paramParser = cmd.parse(options, args);
			Object param;

			/** ----- Modus ----- **/
			param = paramParser.getOptionValue("m");
			if (param != null) {
				modus = (String) param;
				if (!(getModus().equalsIgnoreCase("complete") || getModus().equalsIgnoreCase("data-preparation")
						|| getModus().equalsIgnoreCase("trigger-learn") || getModus().equalsIgnoreCase("pattern-learn")
						|| getModus().equalsIgnoreCase("event-extraction"))) {
					System.out.println("Modus \"" + getModus() + "\" is not known. Aborting.");
					System.exit(-1);
				}
				System.out.println("Modus: " + getModus());
			} else {
				modus = "complete";
				System.out.println("No modus specified; using default." + NEWLINE + "Modus: \"complete\".");
			}

			/** ----- Input Folder ----- **/
			param = paramParser.getOptionValue("i");
			if (param != null) {
				input_root = (String) param;
				System.out.println("Input: " + getInput_root());
			} else {
				System.out.println("Specify input directory root (-i). Aborting.");
				System.exit(-1);
			}

			/** ----- DataBase Folder ----- **/
			param = paramParser.getOptionValue("o");
			if (param != null) {
				db_root = (String) param;
				System.out.println("Database-Root: " + getDb_root());
			} else {
				System.out.println("Specify output database directory (-o). Aborting.");
			}

			/** ----- Data Set ----- **/
			param = paramParser.getOptionValue("s");
			if (param != null) {
				set = (String) param;
				if (!(getSet().equalsIgnoreCase("train") || getSet().equalsIgnoreCase("test")
						|| getSet().equalsIgnoreCase("devel") || getSet().equalsIgnoreCase("mix"))) {
					System.out.println(NEWLINE + "A specific set needs to be declared (-s). \"" + getSet()
							+ "\" is no valid set; choose: \"train\",\"test\", \"devel\" or \"mix\". Aborting.");
					System.exit(-1);
				}
				if (getSet().equalsIgnoreCase("mix")) {
					SET_NAMES.add("mix");
					System.out.println(
							NEWLINE + "\"mix\" option was not set but declared in set-option; added automatically.");
				}
			} else {
				if (!getModus().equalsIgnoreCase("complete")) {
					System.out.println(NEWLINE + "A specific set needs to be declared (-s);"
							+ " choose: \"train\",\"test\", \"devel\" or \"mix\". Aborting.");
					System.exit(-1);
				}
			}

			/** ----- Mix Train and Devel ----- **/
			param = paramParser.getOptionValue("x");
			if (param != null) {
				String mix = (String) param;
				if (mix.equalsIgnoreCase("true")) {
					SET_NAMES.add("mix");
				}
			}

			/** ----- Destination ----- **/
			param = paramParser.getOptionValue("d");
			if (param != null) {
				destination = (String) param;
				if (!SET_NAMES.contains(getDestination().toLowerCase())) {
					System.out.println(
							NEWLINE + "\"" + getDestination() + "\" is no valid destination; choose: \"train\","
									+ " \"test\", \"devel\" or \"mix\" (if mix option is set to \"true\"). Aborting.");
					System.exit(-1);
				} else if (getDestination().equalsIgnoreCase("mix")) {
					SET_NAMES.add("mix");
					System.out.println(NEWLINE
							+ "\"mix\" option was not set but declared in destination-option; added automatically.");
				}
			} else {
				if (!getModus().equalsIgnoreCase("data-preparation")) {
					System.out.println(NEWLINE + "For modes other than \"data-preparation\" there needs to be"
							+ " a destination database declared (-d). Aborting.");
					System.exit(-1);
				}
			}
		} catch (ParseException e) {
			System.out.println(e);
		}
	}

	private void loadData(String set) {
		DataLoader data = new DataLoader();
		boolean a2 = true;
		if (set.equalsIgnoreCase("test")) {
			a2 = false;
		}
		data.Txt2Db(getInput_root() + PATH_DELIM + set, getDb_root() + PATH_DELIM + set, a2);
	}

	private void learnTrigger(String src, String dest) {
		// source database
		String dbsr = getDb_root() + PATH_DELIM + src;
		DBUtils dbsrc = new DBUtils();
		dbsrc.openDB(dbsr);

		// destination database
		String dbname = getDb_root() + PATH_DELIM + dest;
		DBUtils dbdest = new DBUtils();
		dbdest.openDB(dbname);

		TriggerLearner learner = new TriggerLearner(dbsrc, dbdest); // store all
																	// triggers
																	// (both
																	// train and
																	// dev) into
																	// train DB
		learner.createTriggers();
		dbdest.shutdownDB();
		if (!src.equals(dest))
			dbsrc.closeDB();
	}

	private void learnRules(String src, String dest) {
		// source database
		String dbsr = getDb_root() + PATH_DELIM + src;
		DBUtils dbsrc = new DBUtils();
		dbsrc.openDB(dbsr);

		// destination database
		String dbname = getDb_root() + PATH_DELIM + dest;
		DBUtils dbdest = new DBUtils();
		dbdest.openDB(dbname);

		RuleLearner learner = new RuleLearner(dbsrc, dbdest);
		learner.LearnData();
		dbsrc.shutdownDB();
		if (!src.equals(dest))
			dbdest.shutdownDB();
	}

	private void extractEvents(String src, String dest) {
		// source database
		String sr_path = getDb_root() + PATH_DELIM + src;
		DBUtils sr = new DBUtils();
		sr.openDB(sr_path);

		// database on which to extract events
		String dest_path = getDb_root() + PATH_DELIM + dest;
		DBUtils des = new DBUtils();
		des.openDB(dest_path);

		EventExtraction xtr = new EventExtraction(sr, des);
		xtr.Test(getDb_root() + PATH_DELIM + "results" + PATH_DELIM + getDestination());
		sr.closeDB();
		if (!src.equals(dest))
			des.closeDB();
	}

	public static void main(String[] args) {
		BioSemApplication app = new BioSemApplication();
		app.readCommandLineArgs(args);

		/** ----- Loading Data ----- **/
		if (modus.equalsIgnoreCase("data-preparation")) {
			app.loadData(set);
		}

		/** ----- Learning Trigger ----- **/
		if (modus.equalsIgnoreCase("trigger-learn")) {
			app.learnTrigger(set, destination);
		}

		/** ----- Learning Pattern ----- **/
		if (modus.equalsIgnoreCase("pattern-learn")) {
			app.learnRules(set, destination);
		}

		/** ----- Extracting Events ----- **/
		if (modus.equalsIgnoreCase("event-extraction")) {
			app.extractEvents(set, destination);
		}

		/** ----- Complete Run ----- **/
		if (modus.equalsIgnoreCase("complete")) {
			for (String s : SET_NAMES) {
				if (s.equalsIgnoreCase(app.getSet()) || s.equalsIgnoreCase(app.getDestination())) {
					System.out.println(NEWLINE + "[Loading " + s + "-set]");
					app.loadData(s);
				}
			}
			System.out.println(NEWLINE);
			app.learnTrigger(set, set);
			System.out.println(NEWLINE);
			app.learnRules(set, set);
			System.out.println(NEWLINE);
			app.extractEvents(set, destination);
		}

		System.out.println(NEWLINE + "DONE PROCESSING");
	}

	public String getInput_root() {
		return input_root;
	}

	public String getDb_root() {
		return db_root;
	}

	public String getModus() {
		return modus;
	}

	public String getSet() {
		return set;
	}

	public String getDestination() {
		return destination;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy