com.bigdata.rdf.sail.ExportKB Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jul 26, 2011
*/
package com.bigdata.rdf.sail;
import info.aduna.iteration.CloseableIteration;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.zip.GZIPOutputStream;
import org.apache.log4j.Logger;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.RDFWriterRegistry;
import org.openrdf.sail.SailConnection;
import org.openrdf.sail.SailException;
import com.bigdata.Banner;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.Journal;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.relation.RelationSchema;
import com.bigdata.relation.locator.ILocatableResource;
import com.bigdata.sparse.ITPS;
/**
* Utility class for exporting the configuration properties and data associated
* with one or more KBs on a {@link Journal}.
*
* @see Data
* Migration.
*
* @author Bryan Thompson
* @version $Id$
*/
public class ExportKB {
private static final Logger log = Logger.getLogger(ExportKB.class);
/**
* The KB to be exported.
*/
private final AbstractTripleStore kb;
/**
* The namespace associated with that KB.
*/
private final String namespace;
/**
* The directory into which the KB properties and data will be written.
*/
private final File kbdir;
/**
* The {@link RDFFormat} which will be used when the data are exported.
*/
private final RDFFormat format;
/**
* When true
inferences and axioms will also be exported.
* Otherwise just the explicitly given (aka told) triples/quads will be
* exported.
*/
private final boolean includeInferred;
/**
*
* @param kb
* The KB instance.
* @param kbdir
* The directory into which the exported properties and RDF data
* will be written.
* @param format
* The {@link RDFFormat} to use when exporting the data.
* @param includeInferred
* When true
inferences and axioms will also be
* exported. Otherwise just the explicitly given (aka told)
* triples/quads will be exported.
*/
public ExportKB(final AbstractTripleStore kb, final File kbdir,
final RDFFormat format, final boolean includeInferred) {
if (kb == null)
throw new IllegalArgumentException("KB not specified.");
if (kbdir == null)
throw new IllegalArgumentException(
"Output directory not specified.");
if (format == null)
throw new IllegalArgumentException("RDFFormat not specified.");
if (kb.isStatementIdentifiers() && !RDFFormat.RDFXML.equals(format))
throw new IllegalArgumentException(
"SIDs mode requires RDF/XML interchange.");
if (kb.isQuads() && !format.supportsContexts())
throw new IllegalArgumentException(
"RDFFormat does not support quads: " + format);
this.kb = kb;
this.namespace = kb.getNamespace();
this.kbdir = kbdir;
this.format = format;
this.includeInferred = includeInferred;
}
/**
* Munge a name index so that it is suitable for use in a filesystem. In
* particular, any non-word characters are converted to an underscore
* character ("_"). This gets rid of all punctuation characters and
* whitespace in the index name itself, but will not translate unicode
* characters.
*
* @param s
* The name of the scale-out index.
*
* @return A string suitable for inclusion in a filename.
*/
static private String munge(final String s) {
return s.replaceAll("[\\W]", "_");
}
/**
* Export the properties and data for the KB.
*
* @throws IOException
* @throws SailException
* @throws RDFHandlerException
*/
public void export() throws IOException, SailException, RDFHandlerException {
System.out.println("Effective output directory: " + kbdir);
prepare();
exportProperties();
exportData();
}
public void prepare() throws IOException {
if (!kbdir.exists()) {
if (!kbdir.mkdirs())
throw new IOException("Could not create directory: " + kbdir);
}
}
/**
* Export the configuration properties for the kb.
*
* @throws IOException
*/
public void exportProperties() throws IOException {
prepare();
// Prepare a comment block for the properties file.
final StringBuilder comments = new StringBuilder(
"Configuration properties.\n");
if (kb.getIndexManager() instanceof IRawStore) {
comments.append("source="
+ ((IRawStore) kb.getIndexManager()).getFile() + "\n");
comments.append("namespace=" + namespace + "\n");
// The timestamp of the KB view.
comments.append("timestamp=" + kb.getTimestamp() + "\n");
// The date and time when the KB export began. (Automatically added by Java).
// comments.append("exportDate=" + new Date() + "\n");
// The approximate #of statements (includes axioms, inferences, and
// deleted statements).
comments.append("fastStatementCount="
+ kb.getStatementCount(false/* exact */) + "\n");
// The #of URIs in the lexicon indices.
comments.append("uriCount=" + kb.getURICount() + "\n");
// The #of Literals in the lexicon indices.
comments.append("literalCount=" + kb.getLiteralCount() + "\n");
// The #of blank nodes in the lexicon indices.
comments.append("bnodeCount=" + kb.getBNodeCount() + "\n");
}
// Flatten the properties so inherited defaults will also be written
// out.
final Properties properties = flatCopy(kb.getProperties());
// Write the properties file.
final File file = new File(kbdir, "kb.properties");
System.out.println("Writing " + file);
final OutputStream os = new BufferedOutputStream(new FileOutputStream(
file));
try {
properties.store(os, comments.toString());
} finally {
os.close();
}
}
/**
* Exports all told statements associated with the last commit point for the
* KB.
*
* @throws IOException
* @throws SailException
* @throws RDFHandlerException
*/
public void exportData() throws IOException, SailException,
RDFHandlerException {
prepare();
final BigdataSail sail = new BigdataSail(kb);
try {
sail.initialize();
final SailConnection conn = sail.getReadOnlyConnection();
try {
final CloseableIteration extends Statement, SailException> itr = conn
.getStatements(null/* s */, null/* p */, null/* o */,
includeInferred, new Resource[] {}/* contexts */);
try {
final File file = new File(kbdir, "data."
+ format.getDefaultFileExtension()+".gz");
System.out.println("Writing " + file);
final OutputStream os = new GZIPOutputStream(
new FileOutputStream(file));
try {
final RDFWriter writer = RDFWriterRegistry
.getInstance().get(format).getWriter(os);
writer.startRDF();
while (itr.hasNext()) {
final Statement stmt = itr.next();
writer.handleStatement(stmt);
}
writer.endRDF();
} finally {
os.close();
}
} finally {
itr.close();
}
} finally {
conn.close();
}
} finally {
sail.shutDown();
}
}
/**
* Return a list of the namespaces for the {@link AbstractTripleStore}s
* registered against the bigdata instance.
*/
static List getNamespaces(final IIndexManager indexManager) {
// the triple store namespaces.
final List namespaces = new LinkedList();
// scan the relation schema in the global row store.
@SuppressWarnings("unchecked")
final Iterator itr = (Iterator) indexManager
.getGlobalRowStore().rangeIterator(RelationSchema.INSTANCE);
while (itr.hasNext()) {
// A timestamped property value set is a logical row with
// timestamped property values.
final ITPS tps = itr.next();
// If you want to see what is in the TPS, uncomment this.
// System.err.println(tps.toString());
// The namespace is the primary key of the logical row for the
// relation schema.
final String namespace = (String) tps.getPrimaryKey();
// Get the name of the implementation class
// (AbstractTripleStore, SPORelation, LexiconRelation, etc.)
final String className = (String) tps.get(RelationSchema.CLASS)
.getValue();
try {
final Class> cls = Class.forName(className);
if (AbstractTripleStore.class.isAssignableFrom(cls)) {
// this is a triple store (vs something else).
namespaces.add(namespace);
}
} catch (ClassNotFoundException e) {
log.error(e,e);
}
}
return namespaces;
}
/**
* Load a {@link Properties} object from a file.
*
* @param file
* The property file.
*
* @return The {@link Properties}.
*
* @throws IOException
*/
static private Properties loadProperties(final File file)
throws IOException {
final Properties p = new Properties();
final InputStream is = new BufferedInputStream(
new FileInputStream(file));
try {
p.load(is);
} finally {
is.close();
}
return p;
}
static public Properties flatCopy(final Properties props) {
final Properties tmp = new Properties();
tmp.putAll(flatten(props));
return tmp;
}
private static Map flatten(final Properties properties) {
if (properties == null) {
throw new IllegalArgumentException();
}
final Map out = new LinkedHashMap();
final Enumeration> e = properties.propertyNames();
while (e.hasMoreElements()) {
final String property = (String) e.nextElement();
final String propertyValue = properties.getProperty(property);
if (propertyValue != null)
out.put(property, propertyValue);
}
return out;
}
/**
* Export one or more KBs from a Journal. The only required argument is the
* name of the properties file for the Journal. By default all KB instances
* found on the journal will be exported into the current working directory.
* Each KB will be written into a subdirectory based on the namespace of the
* KB.
*
* @param args
* [options] propertyFile namespace*
where
* options is any of:
*
* - -outdir
* - The output directory (default is the current working
* directory)
* - -format
* - The {@link RDFFormat} which will be used to export the
* data. If not specified then an appropriate format will be
* selected based on the KB configuration. The default for
* triples or SIDs is {@link RDFFormat#RDFXML}. The default for
* quads is {@link RDFFormat#TRIX}.
* - -includeInferred
* - Normally only the told triples/quads will be exported.
* This option may be given to export the axioms and inferences
* as well as the told triples/quads.
* - -n
* - Do nothing, but show the KBs which would be exported.
* - -help
* - Display the usage message and exit.
*
* where propertyFile is the properties file for the
* Journal.
* where namespace is zero or more namespaces of KBs to
* export from the Journal. If no namespace is given, then all
* KBs on the Journal will be exported.
*
* @throws Exception
*/
public static void main(final String[] args) throws Exception {
Banner.banner();
/*
* Defaults for options.
*/
boolean nothing = false;
boolean includeInferred = false;
RDFFormat format = null;
File propertyFile = null;
File outdir = new File(".");
final List namespaces = new LinkedList();
// Parse options.
int i = 0;
for (; i < args.length; ) {
final String s = args[i];
if (!s.startsWith("-")) {
// end of options.
break;
}
i++;
if(s.equals("-n")) {
nothing = true;
} else if(s.equals("-help")) {
usage();
System.exit(0);
} else if(s.equals("-format")) {
format = RDFFormat.valueOf(args[i++]);
} else if(s.equals("-includeInferred")) {
includeInferred = true;
} else if (s.equals("-outdir")) {
outdir = new File(args[i++]);
} else {
System.err.println("Unknown option: " + s);
usage();
System.exit(1);
}
}
// properties file.
if (i == args.length) {
usage();
System.exit(1);
} else {
propertyFile = new File(args[i++]);
if (!propertyFile.exists()) {
System.err.println("No such file: " + propertyFile);
System.exit(1);
}
}
// Load the properties from the file.
final Properties properties = loadProperties(propertyFile);
/*
* Allow override of select options.
*/
{
final String[] overrides = new String[] {
// Journal options.
com.bigdata.journal.Options.FILE,
};
for (String s : overrides) {
if (System.getProperty(s) != null) {
// Override/set from the environment.
final String v = System.getProperty(s);
System.out.println("Using: " + s + "=" + v);
properties.setProperty(s, v);
}
}
}
// Open the journal.
final Journal indexManager = new Journal(properties);
try {
// The last commit time on the store.
final long commitTime = indexManager.getLastCommitTime();
if (i == args.length) {
// Use all namespaces.
namespaces.addAll(getNamespaces(indexManager));
} else {
// use just the given namespace(s).
for (; i < args.length;) {
final String namespace = args[i++];
// Verify that the KB exists.
final ILocatableResource> kb = indexManager
.getResourceLocator().locate(namespace, commitTime);
if (kb == null) {
throw new RuntimeException("No such namespace: "
+ namespace);
}
if (!(kb instanceof AbstractTripleStore)) {
throw new RuntimeException("Not a KB: " + namespace);
}
namespaces.add(namespace);
}
}
for (String namespace : namespaces) {
// Get KB view.
final AbstractTripleStore kb = (AbstractTripleStore) indexManager
.getResourceLocator().locate(namespace, commitTime);
// The name of the subdirectory on which the properties and RDF
// data will be written.
final File kbdir = new File(outdir, munge(namespace));
// Choose an appropriate RDFFormat.
RDFFormat fmt = format;
if (fmt == null) {
// Choose an appropriate format.
if (kb.isStatementIdentifiers()) {
fmt = RDFFormat.RDFXML;
} else if (kb.isQuads()) {
fmt = RDFFormat.TRIX;
} else {
fmt = RDFFormat.RDFXML;
}
}
System.out.println("Exporting " + namespace + " as "
+ fmt.getName() + " on " + kbdir);
if (!nothing) {
// Export KB.
new ExportKB(kb, kbdir, fmt, includeInferred).export();
}
}
// Success.
System.out.println("Done");
} finally {
indexManager.close();
}
}
private static void usage() {
System.err.println("usage: [options] propertyFile namespace*");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy