All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unistuttgart.quadrama.core.ReadDlinaMetadata Maven / Gradle / Ivy

The newest version!
package de.unistuttgart.quadrama.core;

import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.util.HashMap;
import java.util.Map;

import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

import de.unistuttgart.ims.drama.api.Date;
import de.unistuttgart.ims.drama.api.DatePremiere;
import de.unistuttgart.ims.drama.api.DatePrint;
import de.unistuttgart.ims.drama.api.DateWritten;
import de.unistuttgart.ims.drama.api.Drama;
import de.unistuttgart.ims.drama.util.DramaUtil;
import nu.xom.Builder;
import nu.xom.Document;
import nu.xom.Element;
import nu.xom.Elements;

/**
 * This component reads meta data provided by the dlina project. To use it,
 * please clone the dlina
 * repository and provide a path to the data/zwischenformat
 * directory. This component then reads the xml files and copies the dates.
 * 
 * The component has been tested with revision 
 * b5c5650.
 * 
 * A clone of the project can be found
 * here, if the
 * original project gets removed.
 * 
 * @author reiterns
 *
 */
@TypeCapability(inputs = { "de.unistuttgart.ims.drama.api.Drama" }, outputs = {
		"de.unistuttgart.ims.drama.api.DateWritten", "de.unistuttgart.ims.drama.api.DatePremiere",
		"de.unistuttgart.ims.drama.api.DatePrint" })
public class ReadDlinaMetadata extends JCasAnnotator_ImplBase {

	public static final String PARAM_DLINA_DIRECTORY = "Dlina Directory";

	@ConfigurationParameter(name = PARAM_DLINA_DIRECTORY)
	String dlinaDirectoryName;

	File dlinaDirectory;

	Map fileIndex = new HashMap();

	String namespaceUri = "http://lina.digital";

	@Override
	public void initialize(final UimaContext context) throws ResourceInitializationException {
		super.initialize(context);
		dlinaDirectory = new File(dlinaDirectoryName);
		if (!dlinaDirectory.isDirectory())
			throw new ResourceInitializationException();

		for (File f : dlinaDirectory.listFiles(new FilenameFilter() {
			@Override
			public boolean accept(File dir, String name) {
				return name.endsWith(".xml");
			}
		})) {
			try {
				Builder parser = new Builder();
				Document doc = parser.build(new FileInputStream(f));
				String sourceUrl = doc.getRootElement().getFirstChildElement("header", namespaceUri)
						.getFirstChildElement("source", namespaceUri).getValue();
				String sourceId = sourceUrl.substring(56).replace("/data", "");
				fileIndex.put(sourceId, doc);
			} catch (Exception e) {
				throw new ResourceInitializationException(e);
			}
		}
	}

	@Override
	public void process(JCas jcas) throws AnalysisEngineProcessException {
		Drama d = JCasUtil.selectSingle(jcas, Drama.class);
		Document doc = fileIndex.get(d.getDocumentId());
		if (doc == null)
			return;

		Elements dateElements = doc.getRootElement().getFirstChildElement("header", namespaceUri)
				.getChildElements("date", namespaceUri);
		for (int i = 0; i < dateElements.size(); i++) {
			Element dateElement = dateElements.get(i);
			if (dateElement.getAttributeValue("when") != null) {
				String whenAttVal = dateElement.getAttributeValue("when");
				Date date = null;
				if (dateElement.getAttributeValue("type").equals("print")) {
					d.setDatePrinted(Integer.valueOf(whenAttVal));
					date = DramaUtil.createFeatureStructure(jcas, DatePrint.class);
				} else if (dateElement.getAttributeValue("type").equals("written")) {
					d.setDateWritten(Integer.valueOf(whenAttVal));
					date = DramaUtil.createFeatureStructure(jcas, DateWritten.class);
				} else if (dateElement.getAttributeValue("type").equals("premiere")) {
					d.setDatePremiere(Integer.valueOf(whenAttVal));
					date = DramaUtil.createFeatureStructure(jcas, DatePremiere.class);
				}
				if (date != null) {
					date.setYear(Integer.valueOf(whenAttVal));
					date.setSource("dlina");
				}
			}
		}
		return;

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy