All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.sail.ExportKB Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Jul 26, 2011
 */

package com.bigdata.rdf.sail;

import info.aduna.iteration.CloseableIteration;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.zip.GZIPOutputStream;

import org.apache.log4j.Logger;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.RDFWriterRegistry;
import org.openrdf.sail.SailConnection;
import org.openrdf.sail.SailException;

import com.bigdata.Banner;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.Journal;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.relation.RelationSchema;
import com.bigdata.relation.locator.ILocatableResource;
import com.bigdata.sparse.ITPS;

/**
 * Utility class for exporting the configuration properties and data associated
 * with one or more KBs on a {@link Journal}.
 * 
 * @see Data
 *      Migration.
 * 
 * @author Bryan Thompson
 * @version $Id$
 */
public class ExportKB {

    private static final Logger log = Logger.getLogger(ExportKB.class);

    /**
     * The KB to be exported.
     */
    private final AbstractTripleStore kb;

    /**
     * The namespace associated with that KB.
     */
    private final String namespace;

    /**
     * The directory into which the KB properties and data will be written.
     */
    private final File kbdir;

    /**
     * The {@link RDFFormat} which will be used when the data are exported.
     */
    private final RDFFormat format;

    /**
     * When true inferences and axioms will also be exported.
     * Otherwise just the explicitly given (aka told) triples/quads will be
     * exported.
     */
    private final boolean includeInferred;
    
    /**
     * 
     * @param kb
     *            The KB instance.
     * @param kbdir
     *            The directory into which the exported properties and RDF data
     *            will be written.
     * @param format
     *            The {@link RDFFormat} to use when exporting the data.
     * @param includeInferred
     *            When true inferences and axioms will also be
     *            exported. Otherwise just the explicitly given (aka told)
     *            triples/quads will be exported.
     */
    public ExportKB(final AbstractTripleStore kb, final File kbdir,
            final RDFFormat format, final boolean includeInferred) {

        if (kb == null)
            throw new IllegalArgumentException("KB not specified.");
        
        if (kbdir == null)
            throw new IllegalArgumentException(
                    "Output directory not specified.");
        
        if (format == null)
            throw new IllegalArgumentException("RDFFormat not specified.");

        if (kb.isStatementIdentifiers() && !RDFFormat.RDFXML.equals(format))
            throw new IllegalArgumentException(
                    "SIDs mode requires RDF/XML interchange.");

        if (kb.isQuads() && !format.supportsContexts())
            throw new IllegalArgumentException(
                    "RDFFormat does not support quads: " + format);

        this.kb = kb;

        this.namespace = kb.getNamespace();
        
        this.kbdir = kbdir;
        
        this.format = format;

        this.includeInferred = includeInferred;
        
    }

    /**
     * Munge a name index so that it is suitable for use in a filesystem. In
     * particular, any non-word characters are converted to an underscore
     * character ("_"). This gets rid of all punctuation characters and
     * whitespace in the index name itself, but will not translate unicode
     * characters.
     * 
     * @param s
     *            The name of the scale-out index.
     * 
     * @return A string suitable for inclusion in a filename.
     */
    static private String munge(final String s) {

        return s.replaceAll("[\\W]", "_");

    }

    /**
     * Export the properties and data for the KB.
     * 
     * @throws IOException
     * @throws SailException
     * @throws RDFHandlerException
     */
    public void export() throws IOException, SailException, RDFHandlerException {

        System.out.println("Effective output directory: " + kbdir);

        prepare();

        exportProperties();

        exportData();

    }

    public void prepare() throws IOException {
        if (!kbdir.exists()) {
            if (!kbdir.mkdirs())
                throw new IOException("Could not create directory: " + kbdir);
        }
    }

    /**
     * Export the configuration properties for the kb.
     * 
     * @throws IOException
     */
    public void exportProperties() throws IOException {
        prepare();
        // Prepare a comment block for the properties file.
        final StringBuilder comments = new StringBuilder(
                "Configuration properties.\n");
        if (kb.getIndexManager() instanceof IRawStore) {
            comments.append("source="
                    + ((IRawStore) kb.getIndexManager()).getFile() + "\n");
            comments.append("namespace=" + namespace + "\n");
            // The timestamp of the KB view.
            comments.append("timestamp=" + kb.getTimestamp() + "\n");
            // The date and time when the KB export began. (Automatically added by Java).
//            comments.append("exportDate=" + new Date() + "\n");
            // The approximate #of statements (includes axioms, inferences, and
            // deleted statements).
            comments.append("fastStatementCount="
                    + kb.getStatementCount(false/* exact */) + "\n");
            // The #of URIs in the lexicon indices.
            comments.append("uriCount=" + kb.getURICount() + "\n");
            // The #of Literals in the lexicon indices.
            comments.append("literalCount=" + kb.getLiteralCount() + "\n");
            // The #of blank nodes in the lexicon indices.
            comments.append("bnodeCount=" + kb.getBNodeCount() + "\n");
        }
        // Flatten the properties so inherited defaults will also be written
        // out.
        final Properties properties = flatCopy(kb.getProperties());
        // Write the properties file.
        final File file = new File(kbdir, "kb.properties");
        System.out.println("Writing " + file);
        final OutputStream os = new BufferedOutputStream(new FileOutputStream(
                file));
        try {
            properties.store(os, comments.toString());
        } finally {
            os.close();
        }
    }

    /**
     * Exports all told statements associated with the last commit point for the
     * KB.
     * 
     * @throws IOException
     * @throws SailException
     * @throws RDFHandlerException
     */
    public void exportData() throws IOException, SailException,
            RDFHandlerException {
        prepare();
        final BigdataSail sail = new BigdataSail(kb);
        try {
            sail.initialize();
            final SailConnection conn = sail.getReadOnlyConnection();
            try {
                final CloseableIteration itr = conn
                        .getStatements(null/* s */, null/* p */, null/* o */,
                                includeInferred, new Resource[] {}/* contexts */);
                try {
                    final File file = new File(kbdir, "data."
                            + format.getDefaultFileExtension()+".gz");
                    System.out.println("Writing " + file);
                    final OutputStream os = new GZIPOutputStream(
                            new FileOutputStream(file));
                    try {
                        final RDFWriter writer = RDFWriterRegistry
                                .getInstance().get(format).getWriter(os);
                        writer.startRDF();
                        while (itr.hasNext()) {
                            final Statement stmt = itr.next();
                            writer.handleStatement(stmt);
                        }
                        writer.endRDF();
                    } finally {
                        os.close();
                    }
                } finally {
                    itr.close();
                }
            } finally {
                conn.close();
            }
        } finally {
            sail.shutDown();
        }

    }

    /**
     * Return a list of the namespaces for the {@link AbstractTripleStore}s
     * registered against the bigdata instance.
     */
    static List getNamespaces(final IIndexManager indexManager) {
    
        // the triple store namespaces.
        final List namespaces = new LinkedList();

        // scan the relation schema in the global row store.
        @SuppressWarnings("unchecked")
        final Iterator itr = (Iterator) indexManager
                .getGlobalRowStore().rangeIterator(RelationSchema.INSTANCE);

        while (itr.hasNext()) {

            // A timestamped property value set is a logical row with
            // timestamped property values.
            final ITPS tps = itr.next();

            // If you want to see what is in the TPS, uncomment this.
//          System.err.println(tps.toString());
            
            // The namespace is the primary key of the logical row for the
            // relation schema.
            final String namespace = (String) tps.getPrimaryKey();

            // Get the name of the implementation class
            // (AbstractTripleStore, SPORelation, LexiconRelation, etc.)
            final String className = (String) tps.get(RelationSchema.CLASS)
                    .getValue();

            try {
                final Class cls = Class.forName(className);
                if (AbstractTripleStore.class.isAssignableFrom(cls)) {
                    // this is a triple store (vs something else).
                    namespaces.add(namespace);
                }
            } catch (ClassNotFoundException e) {
                log.error(e,e);
            }

        }

        return namespaces;

    }
    
    /**
     * Load a {@link Properties} object from a file.
     * 
     * @param file
     *            The property file.
     * 
     * @return The {@link Properties}.
     * 
     * @throws IOException
     */
    static private Properties loadProperties(final File file)
            throws IOException {

        final Properties p = new Properties();

        final InputStream is = new BufferedInputStream(
                new FileInputStream(file));

        try {

            p.load(is);

        } finally {

            is.close();
        }

        return p;

    }

    static public Properties flatCopy(final Properties props) {

        final Properties tmp = new Properties();

        tmp.putAll(flatten(props));

        return tmp;

    }

    private static Map flatten(final Properties properties) {

        if (properties == null) {

            throw new IllegalArgumentException();

        }

        final Map out = new LinkedHashMap();

        final Enumeration e = properties.propertyNames();

        while (e.hasMoreElements()) {

            final String property = (String) e.nextElement();

            final String propertyValue = properties.getProperty(property);

            if (propertyValue != null)
                out.put(property, propertyValue);

        }

        return out;

    }
    
    /**
     * Export one or more KBs from a Journal. The only required argument is the
     * name of the properties file for the Journal. By default all KB instances
     * found on the journal will be exported into the current working directory.
     * Each KB will be written into a subdirectory based on the namespace of the
     * KB.
     * 
     * @param args
     *            [options] propertyFile namespace* where
     *            options is any of:
     *            
*
-outdir
*
The output directory (default is the current working * directory)
*
-format
*
The {@link RDFFormat} which will be used to export the * data. If not specified then an appropriate format will be * selected based on the KB configuration. The default for * triples or SIDs is {@link RDFFormat#RDFXML}. The default for * quads is {@link RDFFormat#TRIX}.
*
-includeInferred
*
Normally only the told triples/quads will be exported. * This option may be given to export the axioms and inferences * as well as the told triples/quads.
*
-n
*
Do nothing, but show the KBs which would be exported.
*
-help
*
Display the usage message and exit.
*
* where propertyFile is the properties file for the * Journal.
* where namespace is zero or more namespaces of KBs to * export from the Journal. If no namespace is given, then all * KBs on the Journal will be exported. * * @throws Exception */ public static void main(final String[] args) throws Exception { Banner.banner(); /* * Defaults for options. */ boolean nothing = false; boolean includeInferred = false; RDFFormat format = null; File propertyFile = null; File outdir = new File("."); final List namespaces = new LinkedList(); // Parse options. int i = 0; for (; i < args.length; ) { final String s = args[i]; if (!s.startsWith("-")) { // end of options. break; } i++; if(s.equals("-n")) { nothing = true; } else if(s.equals("-help")) { usage(); System.exit(0); } else if(s.equals("-format")) { format = RDFFormat.valueOf(args[i++]); } else if(s.equals("-includeInferred")) { includeInferred = true; } else if (s.equals("-outdir")) { outdir = new File(args[i++]); } else { System.err.println("Unknown option: " + s); usage(); System.exit(1); } } // properties file. if (i == args.length) { usage(); System.exit(1); } else { propertyFile = new File(args[i++]); if (!propertyFile.exists()) { System.err.println("No such file: " + propertyFile); System.exit(1); } } // Load the properties from the file. final Properties properties = loadProperties(propertyFile); /* * Allow override of select options. */ { final String[] overrides = new String[] { // Journal options. com.bigdata.journal.Options.FILE, }; for (String s : overrides) { if (System.getProperty(s) != null) { // Override/set from the environment. final String v = System.getProperty(s); System.out.println("Using: " + s + "=" + v); properties.setProperty(s, v); } } } // Open the journal. final Journal indexManager = new Journal(properties); try { // The last commit time on the store. final long commitTime = indexManager.getLastCommitTime(); if (i == args.length) { // Use all namespaces. namespaces.addAll(getNamespaces(indexManager)); } else { // use just the given namespace(s). for (; i < args.length;) { final String namespace = args[i++]; // Verify that the KB exists. final ILocatableResource kb = indexManager .getResourceLocator().locate(namespace, commitTime); if (kb == null) { throw new RuntimeException("No such namespace: " + namespace); } if (!(kb instanceof AbstractTripleStore)) { throw new RuntimeException("Not a KB: " + namespace); } namespaces.add(namespace); } } for (String namespace : namespaces) { // Get KB view. final AbstractTripleStore kb = (AbstractTripleStore) indexManager .getResourceLocator().locate(namespace, commitTime); // The name of the subdirectory on which the properties and RDF // data will be written. final File kbdir = new File(outdir, munge(namespace)); // Choose an appropriate RDFFormat. RDFFormat fmt = format; if (fmt == null) { // Choose an appropriate format. if (kb.isStatementIdentifiers()) { fmt = RDFFormat.RDFXML; } else if (kb.isQuads()) { fmt = RDFFormat.TRIX; } else { fmt = RDFFormat.RDFXML; } } System.out.println("Exporting " + namespace + " as " + fmt.getName() + " on " + kbdir); if (!nothing) { // Export KB. new ExportKB(kb, kbdir, fmt, includeInferred).export(); } } // Success. System.out.println("Done"); } finally { indexManager.close(); } } private static void usage() { System.err.println("usage: [options] propertyFile namespace*"); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy