All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unistuttgart.quadrama.core.SpeakerAssignmentRules Maven / Gradle / Ivy

The newest version!
package de.unistuttgart.quadrama.core;

import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.IOUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

import de.unistuttgart.ims.drama.api.Drama;
import de.unistuttgart.ims.drama.api.Figure;
import de.unistuttgart.ims.drama.api.Speaker;

/**
 * This component reads manually created assignment rules from a CSV file. The
 * assignment rules must contain lines that look like this:
 * 

* * DRAMAID\tSPEAKER\tFIGURE_REFERENCE * * *

* * * * * * * * * * * * * *
DRAMAIDThe document id of the drama. If the textgrid reader has been used, this * is the 6 to 7 character string similar to vndf.0.
SPEAKERThe speaker entry within the drama text without punctuation. In TEI, this * is the string enclosed in <speaker> tags.
FIGURE_REFERENCEThis is the entry from the dramatis personae table, in one of two * variants: *
    *
  1. up to the first punctuation string. E.g., the FIGURE_REFERENCE for * "Romeo, Montagues Sohn" would be "Romeo"
  2. *
  3. The entire string covered by the {@link Figure} annotation.
  4. *
*
* An example for such a speaker assignment file can be found * online or in this package unter * src/test/resources/SpakerAssignmentRules. * * @author reiterns * */ @TypeCapability(inputs = { "de.unistuttgart.quadrama.api.Figure", "de.unistuttgart.quadrama.api.Figure:Reference", "de.unistuttgart.quadrama.api.Speaker" }, outputs = { "de.unistuttgart.quadrama.api.Speaker:Figure" }) public class SpeakerAssignmentRules extends JCasAnnotator_ImplBase { public static final String PARAM_RULE_FILE_URL = "Rule File"; @ConfigurationParameter(name = PARAM_RULE_FILE_URL) String ruleFileUrlString; Map> ruleMap = new HashMap>(); @Override public void initialize(final UimaContext context) throws ResourceInitializationException { super.initialize(context); CSVParser p = null; URL ruleFileUrl = null; try { ruleFileUrl = new URL(ruleFileUrlString); } catch (MalformedURLException e1) { throw new ResourceInitializationException(e1); } try { p = new CSVParser(new InputStreamReader(ruleFileUrl.openStream()), CSVFormat.TDF.withHeader((String) null)); Iterator iter = p.iterator(); while (iter.hasNext()) { CSVRecord rec = iter.next(); if (!ruleMap.containsKey(rec.get(0))) ruleMap.put(rec.get(0), new HashMap()); ruleMap.get(rec.get(0)).put(rec.get(1), rec.get(2)); } } catch (Exception e) { throw new ResourceInitializationException(e); } finally { IOUtils.closeQuietly(p); } } @Override public void process(JCas jcas) throws AnalysisEngineProcessException { Map referenceMap = new HashMap(); for (Figure figure : JCasUtil.select(jcas, Figure.class)) { referenceMap.put(figure.getReference(), figure); referenceMap.put(figure.getCoveredText(), figure); } String tgId = JCasUtil.selectSingle(jcas, Drama.class).getDocumentId(); if (ruleMap.containsKey(tgId)) { Map myMap = ruleMap.get(tgId); for (Speaker speaker : JCasUtil.select(jcas, Speaker.class)) { if (myMap.containsKey(speaker.getCoveredText())) { speaker.setFigure(referenceMap.get(myMap.get(speaker.getCoveredText()))); } } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy