All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.sail.BigdataSailHelper Maven / Gradle / Ivy

Go to download

Blazegraph(TM) DB Core Platform. It contains all Blazegraph DB dependencies other than Blueprints.

There is a newer version: 2.1.4
Show newest version
/*
 * Created on Sep 29, 2008
 */

package com.bigdata.rdf.sail;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;

import org.openrdf.sail.SailException;

import com.bigdata.btree.IIndex;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.config.ConfigurationException;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IJournal;
import com.bigdata.journal.ITx;
import com.bigdata.journal.Journal;
import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.sail.BigdataSail.Options;
import com.bigdata.rdf.spo.SPORelation;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.DataLoader;
import com.bigdata.rdf.store.LocalTripleStore;
import com.bigdata.rdf.store.ScaleOutTripleStore;
import com.bigdata.relation.RelationSchema;
import com.bigdata.service.AbstractFederation;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.service.ScaleOutClientFactory;
import com.bigdata.util.Bytes;

/**
 * Class provides guidance on parameter setup a data set and queries.
 * 
 * @author Bryan Thompson
 * 
 * @deprecated The workbench provides configuration guidance. This class also
 *             provides some support for people who want to modify some
 *             properties after a namespace has been created. This is in fact
 *             possible for some kinds of properties but not for others, but
 *             there is very little documentation about when this is and is not
 *             possible. You need to actually understand and reason about
 *             whether the property controls the manner in which the data is
 *             stored on the disk or whether it simply controls the runtime
 *             behavior in a manner that does not impact the disk storage.
 */
public class BigdataSailHelper {

    /**
     * (Re-)open a SAIL backed by persistent data stored in an
     * {@link IBigdataFederation}.
     * 
     * @param fed
     *            The federation.
     * @param namespace
     *            The namespace of the triple store.
     * @param timestamp
     *            The timestamp of the view.
     * 
     * @return The SAIL.
     */
    public BigdataSail getSail(final IBigdataFederation fed,
            final String namespace, final long timestamp) {

        ScaleOutTripleStore tripleStore = (ScaleOutTripleStore) fed
                .getResourceLocator().locate(namespace, timestamp);

        if (tripleStore == null) {

            if (timestamp == ITx.UNISOLATED) {

                // create a new triple store.

                System.out.println("Creating tripleStore: namespace="
                        + namespace);

                tripleStore = new ScaleOutTripleStore(fed, namespace,
                        timestamp, getProperties());

                tripleStore.create();

            } else {

                throw new RuntimeException("No such triple store: namespace="
                        + namespace + ", timestamp=" + timestamp);

            }

        }

        return new BigdataSail(tripleStore);

    }
    
    /**
     * (Re-)open a SAIL backed by persistent data stored on a {@link Journal}.
     * 
     * @param filename
     *            The name of the backing file for the {@link Journal}.
     * @param namespace
     *            The namespace of the triple store.
     * @param timestamp
     *            The timestamp of the view.
     * 
     * @return The SAIL.
     */
    public BigdataSail getSail(final String filename, final String namespace,
            final long timestamp) {

        final Properties properties = new Properties();

        properties.setProperty(Options.FILE, filename);

        final Journal journal = new Journal(properties);

        System.err.println("createTime="+journal.getRootBlockView().getCreateTime());
        System.err.println("lastCommitTime="+journal.getLastCommitTime());
        
        LocalTripleStore tripleStore = (LocalTripleStore) journal
                .getResourceLocator().locate(namespace, timestamp);
        
        if (tripleStore == null) {

            if (timestamp == ITx.UNISOLATED) {

                // create a new triple store.

                System.out.println("Creating tripleStore: namespace="
                        + namespace);

                tripleStore = new LocalTripleStore(journal, namespace,
                        timestamp, getProperties());

                tripleStore.create();

            } else {

                throw new RuntimeException("No such triple store: namespace="
                        + namespace + ", timestamp=" + timestamp);
                
            }
            
        }
        
        return new BigdataSail(tripleStore);
        
    }
    
    /**
     * Return the {@link Properties} that will be used to configure a new
     * {@link AbstractTripleStore} instance. The {@link AbstractTripleStore}
     * will remember the properties with which it was created and use those
     * values each time it is re-opened. The properties are stored in the global
     * row store for the backing {@link IIndexManager}.
     * 

* Note: You need to edit this code to correspond to your application * requirements. Currently the code reflects an application using a triple * store without inference, without statement level provenance, and with the * full text index enabled. Another common configuration is a triple store * with RDFS++ inference, which can be realized by changing the * {@link AbstractTripleStore.Options#AXIOMS_CLASS} property value and * possibly enabling {@link Options#TRUTH_MAINTENANCE} depending on whether * or not you will be incrementally or bulk loading data. * * @todo Bundle some sample properties files which will make it easier for * people to configure a {@link BigdataSail} and make that a command * line option. The jini configuration approach is nice since it * allows us to use symbolic constants in the configuration files. */ public Properties getProperties() { final Properties properties = new Properties(); /* * Override the initial and maximum extent so that they are more suited * to large data sets. */ properties.setProperty(Options.INITIAL_EXTENT,""+200*Bytes.megabyte); properties.setProperty(Options.MAXIMUM_EXTENT,""+200*Bytes.megabyte); /* * Turn off truth maintenance since there are no entailments for the * ontology. */ properties.setProperty(BigdataSail.Options.TRUTH_MAINTENANCE, "false"); /* * Disable RDFS++ inference since there are no entailments for the * ontology. */ properties.setProperty(Options.AXIOMS_CLASS,NoAxioms.class.getName()); // /* // * Enable rewrites of high-level queries into native rules (native JOIN // * execution). // */ // properties.setProperty(BigdataSail.Options.NATIVE_JOINS, "true"); /* * Maximum #of subqueries to evaluate concurrently for the 1st join * dimension for native rules. Zero disables the use of an executor * service. One forces a single thread, but runs the subquery on the * executor service. N>1 is concurrent subquery evaluation. */ properties.setProperty(Options.MAX_PARALLEL_SUBQUERIES, "0"); /* * The #of elements that will be materialized at a time from an access * path. The default is 20,000. However, the relatively selective joins, * small result sets for the queries, and the use of LIMIT with a FILTER * (a regex in this case) outside of the LIMIT means that a smaller * chunk size will cause the JOIN evaluation to do MUCH less work. If * the default chunk size is used, then queries with LIMIT or OFFSET can * take much longer to evaluate since they are computing a large #of * solutions beyond those required to satisify the limit (after applying * the regex FILTER). */ properties.setProperty(Options.CHUNK_CAPACITY, "100"); // properties.setProperty(Options.FULLY_BUFFERED_READ_THRESHOLD, "10000"); /* * Turn off incremental closure in the DataLoader object. */ properties.setProperty( com.bigdata.rdf.store.DataLoader.Options.CLOSURE, DataLoader.ClosureEnum.None.toString()); /* * Turn off commit in the DataLoader object. We do not need to commit * anything until we have loaded all the data and computed the closure * over the database. */ properties.setProperty( com.bigdata.rdf.store.DataLoader.Options.COMMIT, DataLoader.CommitEnum.None.toString()); /* * Turn off Unicode support for index keys (this is a big win for load * rates since LUBM does not use Unicode data, but it has very little * effect on query rates since the only time we generate Unicode sort * keys is when resolving the Values in the queries to term identifiers * in the database). */ // properties.setProperty(Options.COLLATOR, CollatorEnum.ASCII.toString()); /* * Leave the full text index enabled since it will be used to answer * the search queries. */ // /* // * Turn off the full text index. // */ // properties.setProperty(Options.TEXT_INDEX, "false"); /* * Turn off statement identifiers (provenance mode). */ properties.setProperty(Options.STATEMENT_IDENTIFIERS, "false"); // Triples only. properties.setProperty(Options.QUADS, "false"); /* * Turn off justifications (impacts only the load performance, but it is * a big impact and only required if you will be doing TM). (Actually, * since there are no entailments this will have no effect). */ properties.setProperty(Options.JUSTIFY, "false"); return properties; } /** * Return the properties associated with the {@link AbstractTripleStore} * backing the {@link BigdataSail}. * * @param sail * The sail. * * @return The persistent properties. */ public Properties getProperties(final BigdataSail sail) { return getProperties(sail.getDatabase().getIndexManager(), sail .getDatabase().getNamespace()); } /** * Return the properties associated with the given namespace. * * @param indexManager * Use {@link BigdataSail#getDatabase()} and then * {@link AbstractTripleStore#getIndexManager()}. * @param namespace * The namespace of a locatable resource such as an * {@link AbstractTripleStore}, {@link SPORelation} or * {@link LexiconRelation}. * * @return The persistent properties. */ protected Properties getProperties(IIndexManager indexManager, String namespace) { Map map = indexManager.getGlobalRowStore().read( RelationSchema.INSTANCE, namespace); Properties properties = new Properties(); properties.putAll(map); return properties; } /** * Update properties for the SAIL. This will overwrite any properties having * the same name with their new values. Properties that are not overwritten * will remain visible. A property can be deleted by specifying a * null value. *

* Note: this changes the persistent property values associated with the * SAIL. It DOES NOT change the properties associated with the given * instance. You MUST re-open the SAIL in order for the new properties to be * in effect. *

* Note: While many property values can be changed dynamically, some may * not. In particular, the properties that effect the way in which the keys * for the indices are generated as stored within the indices themselves. * Among other things this ensures that Unicode configuration options are * applied uniformly when an is accessed by any host in a federation. * * @param sail * The SAIL. * @param properties * The properties. * * @return The post-modification properties. */ public Properties setProperties(final BigdataSail sail, final Properties properties) { return setProperties(// sail.getDatabase().getIndexManager(), // sail.getDatabase().getNamespace(), // properties// ); } /** * * @param indexManager * @param namespace * @param properties * @return The post-modification properties. */ protected Properties setProperties(final IIndexManager indexManager, final String namespace, final Properties properties) { /* * Convert the Properties to a Map. */ final Map map = new HashMap(); { // set the namespace (primary key). map.put(RelationSchema.NAMESPACE, namespace); final Enumeration e = properties.propertyNames(); while (e.hasMoreElements()) { final Object key = e.nextElement(); final String name = (String) key; map.put(name, properties.getProperty(name)); } } /* * Write the map on the row store. This will overwrite any entries for * the same properties. Properties that are not overwritten will remain * visible. */ final Properties p2 = new Properties(); p2.putAll(indexManager.getGlobalRowStore().write( RelationSchema.INSTANCE, map)); if (indexManager instanceof IJournal) { // make the changes restart safe (not required for federation). ((Journal) indexManager).commit(); } // return the post-modification properties. return p2; } protected static void showProperties(final Properties p) { // sorted collection. final TreeMap map = new TreeMap(); // put into alpha order. for(Map.Entry entry : p.entrySet()) { map.put(entry.getKey().toString(), entry.getValue()); } for (Map.Entry entry : map.entrySet()) { System.out.println(entry.getKey() + "=" + entry.getValue()); } } /** * Shows some interesting details about the terms index. * * @param sail */ public static void showLexiconIndexDetails(BigdataSail sail) { IIndex ndx = sail.getDatabase().getLexiconRelation().getBlobsIndex(); IndexMetadata md = ndx.getIndexMetadata(); System.out.println("Lexicon:"); System.out.println(md.toString()); System.out.println(md.getTupleSerializer().toString()); } /** * Shows some interesting details about the primary index for the {@link SPORelation}. * * @param sail */ public static void showSPOIndexDetails(final BigdataSail sail) { final IIndex ndx = sail.getDatabase().getSPORelation() .getPrimaryIndex(); final IndexMetadata md = ndx.getIndexMetadata(); System.out.println(md.getName()+":"); System.out.println(md.toString()); System.out.println(md.getTupleSerializer().toString()); } /** * Typesafe enumeration of the deployment models. * * @author Bryan Thompson * @version $Id$ */ private static enum FederationEnum { LTS, // LDS, // EDS, JDS; } /** * Utility class. *

* Note: The LTS (local triple store) mode is inferred when the filename is * a .properties file. The JiniFederation (JDS) mode is * inferred when the filename is a .config file. If neither of * those file extensions is used, then you must specify the either LTS or * JDS explicitly. *

* Note: The namespace identifies which triple store you are * accessing and defaults to kb. *

* Note: The timestamp identifies which commit point you are * accessing and defaults to the {@link ITx#UNISOLATED} view, which can also * be specified as {@value#ITx#UNISOLATED}). *

* Note: The properties is a file containing property overrides to be * applied to the kb. * * @param args * filename ((LTS|JDS ((namespace * (timestamp)))properties) * * @throws SailException * @throws ConfigurationException * @throws IOException */ public static void main(final String[] args) throws SailException, IOException { if (args.length == 0) { System.err.println("usage: filename (LTS|JDS (namespace (timestamp)))"); System.exit(1); } final String filename = args[0]; final File file = new File(filename); final FederationEnum fedType; if (args.length > 1) { fedType = FederationEnum.valueOf(args[1]); } else if (filename.endsWith(".properties")) { fedType = FederationEnum.LTS; } else if (filename.endsWith(".config")) { fedType = FederationEnum.JDS; } else { fedType = null; System.err.println("Must specify the federation type: " + filename); System.exit(1); } switch(fedType) { case LTS: case JDS: if (file.isDirectory()) { System.err.println(fedType + " requires plain file, not a directory: dir=" + filename); System.exit(1); } break; // case LDS: // case EDS: // if (!file.isDirectory()) { // // System.err.println(fedType // + " requires a directory, not a plain file: file=" // + filename); // // System.exit(1); // // } // break; default: throw new AssertionError(); } final String namespace = args.length > 2 ? args[2] : "kb"; final long timestamp = args.length > 3 ? Long.valueOf(args[3]) : ITx.UNISOLATED; final File propertyFile = args.length > 4 ? new File(args[4]) : null; if (propertyFile != null && !propertyFile.exists()) { System.err.println("No such file: "+propertyFile); System.exit(1); } final BigdataSailHelper helper = new BigdataSailHelper(); System.out.println("filename: " + filename); final BigdataSail sail; // Note: iff we need to shutdown the federation in finally{} final AbstractFederation fed; // // Note: iff JDS. // final JiniServicesHelper jiniServicesHelper; switch (fedType) { case LTS: sail = helper.getSail(filename, namespace, timestamp); fed = null; // jiniServicesHelper = null; break; // case LDS: { // //// jiniServicesHelper = null; // // final Properties properties = new Properties(); // // properties.setProperty( // com.bigdata.service.LocalDataServiceClient.Options.DATA_DIR, // filename); // // // disable platform statistics collection. // properties.setProperty( // LocalDataServiceClient.Options.COLLECT_PLATFORM_STATISTICS, "false"); // // fed = new LocalDataServiceClient(properties).connect(); // // sail = helper.getSail(fed, namespace, timestamp); // // break; // // } // case EDS: { // //// jiniServicesHelper = null; // // final Properties properties = new Properties(); // // properties.setProperty( // com.bigdata.service.EmbeddedClient.Options.DATA_DIR, // filename); // // // disable platform statistics collection. // properties.setProperty( // EmbeddedClient.Options.COLLECT_PLATFORM_STATISTICS, "false"); // // fed = new EmbeddedClient(properties).connect(); // // sail = helper.getSail(fed, namespace, timestamp); // // break; // // } case JDS: // Should be a jini configuration file. fed = (AbstractFederation) ScaleOutClientFactory.getJiniClient(new String[] { args[0] }).connect(); sail = helper.getSail(fed, namespace, timestamp); break; default: throw new AssertionError(); } try { sail.initialize(); System.out.println("\npre-modification properties::"); showProperties(helper.getProperties(sail)); showLexiconIndexDetails(sail); showSPOIndexDetails(sail); // change some property values. if(true) { final Properties p = new Properties(); if (propertyFile != null) { System.out.println("reading new properties from file::"); final InputStream is = new BufferedInputStream( new FileInputStream(propertyFile)); try { p.load(is); } finally { is.close(); } p.store(System.out, "Will apply properties::"); } else { System.out.println("reading new properties from stdin::"); p.load(System.in); } // p.setProperty(Options.NESTED_SUBQUERY, "false"); // p.setProperty(Options.CHUNK_CAPACITY, "100"); // p.setProperty(Options.FULLY_BUFFERED_READ_THRESHOLD, "1000"); // p.setProperty(Options.MAX_PARALLEL_SUBQUERIES, "0"); // p.setProperty(Options.INCLUDE_INFERRED, "true"); // p.setProperty(Options.QUERY_TIME_EXPANDER, "false"); System.out.println("\npost-modification properties::"); showProperties(helper.setProperties(sail, p)); } } finally { sail.shutDown(); if( fed != null) { fed.shutdownNow(); } // if (jiniServicesHelper != null) { // // jiniServicesHelper.shutdown(); // // } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy