All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.PDFImporter Maven / Gradle / Ivy

/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.scie;

import de.citec.scie.annotators.structure.StructureAnnotator;
import de.citec.scie.pdf.PDFStructuredTextExtractor;
import de.citec.scie.pdf.structure.Document;
import java.io.IOException;
import java.io.InputStream;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;

/**
 * This class is a wrapper for the SCIE PDFTextExtractor.
 *
 * @author Benjamin Paassen - [email protected]
 *
 */
public class PDFImporter {

	public static void importPdf(InputStream input, JCas target) throws IOException {
		//Use the PDFStructuredTextExctractor from the included package.
		Document outDoc = PDFStructuredTextExtractor.importAsDocument(input);

		// transform to output text and index all objects in the hiararchy.
		final String docText = outDoc.indexedToString(0);
		target.setDocumentText(docText);

		// copy the structure information to the UIMA handling.
		final StructureAnnotator structureAnnotator
				= new StructureAnnotator(outDoc);
		try {
			structureAnnotator.process(target);
		} catch (AnalysisEngineProcessException ex) {
			throw new IOException(ex);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy