All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.rdfhdt.hdt.rdf.parsers.RDFParserTar Maven / Gradle / Ivy

package org.rdfhdt.hdt.rdf.parsers;

import java.io.FileNotFoundException;
import java.io.InputStream;

import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.rdfhdt.hdt.enums.RDFNotation;
import org.rdfhdt.hdt.exceptions.ParserException;
import org.rdfhdt.hdt.rdf.RDFParserCallback;
import org.rdfhdt.hdt.rdf.RDFParserFactory;
import org.rdfhdt.hdt.util.io.IOUtil;
import org.rdfhdt.hdt.util.io.NonCloseInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Parses a tar file (optionally .tgz or .tar.gz or .tar.bz2) directly, processing each file that contains rdf separately.
 * 
 * It uses RDFNotation.guess() to guess the format of each specific file. If not recognised, each file of the tar is ignored.
 *  
 *  
 * @author 
 *
 */

public class RDFParserTar implements RDFParserCallback {
	private static final Logger log = LoggerFactory.getLogger(RDFParserTar.class);

	
	/* (non-Javadoc)
	 * @see hdt.rdf.RDFParserCallback#doParse(java.lang.String, java.lang.String, hdt.enums.RDFNotation, hdt.rdf.RDFParserCallback.Callback)
	 */
	@Override
	public void doParse(String fileName, String baseUri, RDFNotation notation, RDFCallback callback) throws ParserException {
		try {
			InputStream input = IOUtil.getFileInputStream(fileName);
			this.doParse(input, baseUri, notation, callback);
			input.close();
		} catch (Exception e) {
			log.error("Unexpected exception parsing file: {}", fileName, e);
			throw new ParserException();
		} 
	}

	@Override
	public void doParse(InputStream input, String baseUri, RDFNotation notation, RDFCallback callback) throws ParserException {
		try {

			final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", input);
			TarArchiveEntry entry = null;
			
			// Make sure that the parser does not close the Tar Stream so we can read the rest of the files.
			NonCloseInputStream nonCloseIn = new NonCloseInputStream(debInputStream);

			while((entry = (TarArchiveEntry)debInputStream.getNextEntry()) != null) {

				if(entry.isFile() && !entry.getName().contains("DS_Store")) {
					try {
						RDFNotation guessnot = RDFNotation.guess(entry.getName());
						log.info("Parse from tar: {} as {}", entry.getName(), guessnot);
						RDFParserCallback parser = RDFParserFactory.getParserCallback(guessnot);

						parser.doParse(nonCloseIn, baseUri, guessnot, callback);
					}catch (IllegalArgumentException | ParserException e1) {
						log.error("Unexpected exception.", e1);
					}
				}
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			throw new ParserException();
		} catch (Exception e) {
			e.printStackTrace();
			throw new ParserException();
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy