All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.analysis.AnalysisAnnotator Maven / Gradle / Ivy

Go to download

Contains the SCIE main application and the CLI interface. This project integrates the named entity recognition (NER), the PDF import and the classification and interfaces with the UIMA framework. The command line interface can be used to produce a set of UIMA XCAS files.

The newest version!
/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.scie.analysis;

import de.citec.scie.annotators.AnnotationCounter;
import de.citec.scie.descriptors.Aggregated;
import de.citec.scie.descriptors.Drug;
import de.citec.scie.descriptors.Error;
import de.citec.scie.descriptors.Injury;
import de.citec.scie.descriptors.InjuryType;
import de.citec.scie.descriptors.InvestigationMethods;
import de.citec.scie.descriptors.LabGroup;
import de.citec.scie.descriptors.Organism;
import de.citec.scie.descriptors.Result;
import de.citec.scie.descriptors.Treatment;
import de.citec.scie.util.CachedJCasUtil;
import java.util.Collection;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;

public class AnalysisAnnotator extends org.apache.uima.fit.component.JCasAnnotator_ImplBase {

	//TODO:replace this after integration with the value that have been learned
	private static final double THRESHOLD = 0.2;
	//Every probability of a relation above THRESHOLD*DISTANCE is good
	private static final double DISTANCE = 1.1;
	//Relations to check
	private static final Class[] RELATIONS = {Treatment.class, Injury.class, Result.class, LabGroup.class};
	//Cores for Relations to check
	private static final Class[] CORES = {Drug.class, InjuryType.class, InvestigationMethods.class, Organism.class};

	@Override
	public void process(JCas jcas) throws AnalysisEngineProcessException {
		final CachedJCasUtil jcasUtil = CachedJCasUtil.getInstance(jcas);
		StringBuffer databases = new StringBuffer();
		StringBuffer relations = new StringBuffer();
		for (Class core : CORES) {
			Collection typeCore = jcasUtil.select(core);
			if (typeCore.isEmpty()) {
				String name = core.getName();
				String[] names = name.split("\\.");
				databases.append(names[names.length - 1] + ";");
			}
		}
		for (Class relation : RELATIONS) {
			Collection typeRelation = jcasUtil.select(relation);
			double maxProb = 0;
			for (Aggregated agg : typeRelation) {
				if (agg.getProbability() > maxProb) {
					maxProb = agg.getProbability();
				}
			}
			if (maxProb < THRESHOLD * DISTANCE) {
				String name = relation.getName();
				String[] names = name.split("\\.");
				relations.append(names[names.length - 1] + ";");
			}
		}
		Error error = new Error(jcas, 0, jcas.getDocumentText().length());
		error.setDatabases(databases.toString());
		error.setRelations(relations.toString());
		error.setAnnotationId(AnnotationCounter.getUniqueId());
		error.addToIndexes();
	}

}