de.unistuttgart.quadrama.core.SpeakerAssignmentRules Maven / Gradle / Ivy
The newest version!
package de.unistuttgart.quadrama.core;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.IOUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import de.unistuttgart.ims.drama.api.Drama;
import de.unistuttgart.ims.drama.api.Figure;
import de.unistuttgart.ims.drama.api.Speaker;
/**
* This component reads manually created assignment rules from a CSV file. The
* assignment rules must contain lines that look like this:
*
*
* DRAMAID\tSPEAKER\tFIGURE_REFERENCE
*
*
*
*
*
* DRAMAID
* The document id of the drama. If the textgrid reader has been used, this
* is the 6 to 7 character string similar to vndf.0
.
*
*
* SPEAKER
* The speaker entry within the drama text without punctuation. In TEI, this
* is the string enclosed in <speaker> tags.
*
*
* FIGURE_REFERENCE
* This is the entry from the dramatis personae table, in one of two
* variants:
*
* - up to the first punctuation string. E.g., the FIGURE_REFERENCE for
* "Romeo, Montagues Sohn" would be "Romeo"
* - The entire string covered by the {@link Figure} annotation.
*
*
*
*
* An example for such a speaker assignment file can be found
* online or in this package unter
* src/test/resources/SpakerAssignmentRules
.
*
* @author reiterns
*
*/
@TypeCapability(inputs = { "de.unistuttgart.quadrama.api.Figure", "de.unistuttgart.quadrama.api.Figure:Reference",
"de.unistuttgart.quadrama.api.Speaker" }, outputs = { "de.unistuttgart.quadrama.api.Speaker:Figure" })
public class SpeakerAssignmentRules extends JCasAnnotator_ImplBase {
public static final String PARAM_RULE_FILE_URL = "Rule File";
@ConfigurationParameter(name = PARAM_RULE_FILE_URL)
String ruleFileUrlString;
Map> ruleMap = new HashMap>();
@Override
public void initialize(final UimaContext context) throws ResourceInitializationException {
super.initialize(context);
CSVParser p = null;
URL ruleFileUrl = null;
try {
ruleFileUrl = new URL(ruleFileUrlString);
} catch (MalformedURLException e1) {
throw new ResourceInitializationException(e1);
}
try {
p = new CSVParser(new InputStreamReader(ruleFileUrl.openStream()), CSVFormat.TDF.withHeader((String) null));
Iterator iter = p.iterator();
while (iter.hasNext()) {
CSVRecord rec = iter.next();
if (!ruleMap.containsKey(rec.get(0)))
ruleMap.put(rec.get(0), new HashMap());
ruleMap.get(rec.get(0)).put(rec.get(1), rec.get(2));
}
} catch (Exception e) {
throw new ResourceInitializationException(e);
} finally {
IOUtils.closeQuietly(p);
}
}
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
Map referenceMap = new HashMap();
for (Figure figure : JCasUtil.select(jcas, Figure.class)) {
referenceMap.put(figure.getReference(), figure);
referenceMap.put(figure.getCoveredText(), figure);
}
String tgId = JCasUtil.selectSingle(jcas, Drama.class).getDocumentId();
if (ruleMap.containsKey(tgId)) {
Map myMap = ruleMap.get(tgId);
for (Speaker speaker : JCasUtil.select(jcas, Speaker.class)) {
if (myMap.containsKey(speaker.getCoveredText())) {
speaker.setFigure(referenceMap.get(myMap.get(speaker.getCoveredText())));
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy