![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.rdf.load.SingleResourceReaderTask Maven / Gradle / Ivy
package com.bigdata.rdf.load;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import org.openrdf.model.Statement;
import org.openrdf.rio.RDFFormat;
import com.bigdata.rdf.rio.IStatementBuffer;
import com.bigdata.rdf.rio.LoadStats;
import com.bigdata.rdf.rio.PresortRioLoader;
import com.bigdata.rdf.rio.RDFParserOptions;
/**
* Tasks either loads a RDF resource or verifies that the told triples found
* in that resource are present in the database. The difference between data
* load and data verify is just the behavior of the {@link IStatementBuffer}.
*
* @author Bryan Thompson
* @version $Id$
*/
public class SingleResourceReaderTask implements Runnable {
protected static final Logger log = Logger.getLogger(SingleResourceReaderTask.class);
/**
* The resource to be loaded.
*/
final String resource;
/**
* The base URL for that resource.
*/
final String baseURL;
/**
* The RDF interchange syntax that the file uses.
*/
final RDFFormat rdfFormat;
/**
* Validate the RDF interchange syntax when true
.
*/
final RDFParserOptions parserOptions;
/**
* Delete files after they have been successfully loaded when
* true
.
*/
final boolean deleteAfter;
final IStatementBufferFactory bufferFactory;
final AtomicLong toldTriples;
/**
* The time when the task was first created.
*/
final long createTime;
public String toString() {
return "LoadTask"//
+"{ resource="+resource
+", elapsed="+(System.currentTimeMillis()-createTime)//
+"}"//
;
}
/**
*
* Note: Updates to toldTriples MUST NOT occur unless the task
* succeeds, otherwise tasks which error and then retry will cause
* double-counting.
*
* @param resource
* @param baseURL
* @param rdfFormat
* @param verifyData
* @param deleteAfter
* @param bufferFactory
* @param toldTriples
*/
public SingleResourceReaderTask(String resource, String baseURL, RDFFormat rdfFormat,
final RDFParserOptions parserOptions, final boolean deleteAfter,
IStatementBufferFactory bufferFactory, AtomicLong toldTriples) {
if (resource == null)
throw new IllegalArgumentException();
if (baseURL == null)
throw new IllegalArgumentException();
if (rdfFormat == null)
throw new IllegalArgumentException();
if (parserOptions == null)
throw new IllegalArgumentException();
if (bufferFactory == null)
throw new IllegalArgumentException();
if (toldTriples == null)
throw new IllegalArgumentException();
this.resource = resource;
this.baseURL = baseURL;
this.rdfFormat = rdfFormat;
this.parserOptions = parserOptions;
this.deleteAfter = deleteAfter;
this.bufferFactory = bufferFactory;
this.toldTriples = toldTriples;
this.createTime = System.currentTimeMillis();
}
public void run() {
final LoadStats loadStats;
try {
loadStats = readData();
} catch (Exception e) {
/*
* Note: no stack trace and only a warning - we will either
* retry or declare the input as filed.
*/
log.warn("resource=" + resource + ", error=" + e);
throw new RuntimeException("resource=" + resource + " : " + e, e);
}
// Note: IFF the task succeeds!
toldTriples.addAndGet(loadStats.toldTriples.get());
}
/**
* Reads an RDF resource and either loads it into the database or
* verifies that the triples in the resource are found in the database.
*/
protected LoadStats readData() throws Exception {
final long begin = System.currentTimeMillis();
// get buffer - determines data load vs database validate.
final IStatementBuffer buffer = bufferFactory.newStatementBuffer();
// make sure that the buffer is empty.
buffer.reset();
if (log.isInfoEnabled())
log.info("loading: " + resource);
final PresortRioLoader loader = new PresortRioLoader(buffer);
// open reader on the file.
final InputStream rdfStream = new FileInputStream(resource);
// Obtain a buffered reader on the input stream.
final Reader reader = new BufferedReader(new InputStreamReader(
rdfStream));
boolean success = false;
try {
final LoadStats stats = new LoadStats();
// run the parser.
// @todo reuse the same underlying parser instance?
loader.loadRdf(reader, baseURL, rdfFormat, null, parserOptions);
success = true;
final long nstmts = loader.getStatementsAdded();
final long now = System.currentTimeMillis();
stats.toldTriples.set(nstmts);
stats.totalTime.set( now - begin );
stats.loadTime.set( now - begin );
/*
* This reports the load rate for the file, but this will only
* be representative of the real throughput if autoFlush is
* enabled (that is, if the statements for each file are flushed
* through to the database when that file is processed rather
* than being accumulated in a thread-local buffer).
*/
if (log.isInfoEnabled())
log.info(stats.toString());
return stats;
} catch (Exception ex) {
/*
* Note: discard anything in the buffer. This prevents the
* buffer from retaining data after a failed load operation.
*/
buffer.reset();
// rethrow the exception.
throw ex;
} finally {
reader.close();
rdfStream.close();
if (deleteAfter && success) {
if (!new File(resource).delete()) {
log.warn("Could not delete: " + resource);
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy