
com.bigdata.rdf.sail.BigdataSailHelper Maven / Gradle / Ivy
Show all versions of bigdata-core Show documentation
/*
* Created on Sep 29, 2008
*/
package com.bigdata.rdf.sail;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;
import org.openrdf.sail.SailException;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.config.ConfigurationException;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IJournal;
import com.bigdata.journal.ITx;
import com.bigdata.journal.Journal;
import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.sail.BigdataSail.Options;
import com.bigdata.rdf.spo.SPORelation;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.rdf.store.DataLoader;
import com.bigdata.rdf.store.LocalTripleStore;
import com.bigdata.rdf.store.ScaleOutTripleStore;
import com.bigdata.relation.RelationSchema;
import com.bigdata.service.AbstractFederation;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.service.ScaleOutClientFactory;
import com.bigdata.util.Bytes;
/**
* Class provides guidance on parameter setup a data set and queries.
*
* @author Bryan Thompson
*
* @deprecated The workbench provides configuration guidance. This class also
* provides some support for people who want to modify some
* properties after a namespace has been created. This is in fact
* possible for some kinds of properties but not for others, but
* there is very little documentation about when this is and is not
* possible. You need to actually understand and reason about
* whether the property controls the manner in which the data is
* stored on the disk or whether it simply controls the runtime
* behavior in a manner that does not impact the disk storage.
*/
public class BigdataSailHelper {
/**
* (Re-)open a SAIL backed by persistent data stored in an
* {@link IBigdataFederation}.
*
* @param fed
* The federation.
* @param namespace
* The namespace of the triple store.
* @param timestamp
* The timestamp of the view.
*
* @return The SAIL.
*/
public BigdataSail getSail(final IBigdataFederation fed,
final String namespace, final long timestamp) {
ScaleOutTripleStore tripleStore = (ScaleOutTripleStore) fed
.getResourceLocator().locate(namespace, timestamp);
if (tripleStore == null) {
if (timestamp == ITx.UNISOLATED) {
// create a new triple store.
System.out.println("Creating tripleStore: namespace="
+ namespace);
tripleStore = new ScaleOutTripleStore(fed, namespace,
timestamp, getProperties());
tripleStore.create();
} else {
throw new RuntimeException("No such triple store: namespace="
+ namespace + ", timestamp=" + timestamp);
}
}
return new BigdataSail(tripleStore);
}
/**
* (Re-)open a SAIL backed by persistent data stored on a {@link Journal}.
*
* @param filename
* The name of the backing file for the {@link Journal}.
* @param namespace
* The namespace of the triple store.
* @param timestamp
* The timestamp of the view.
*
* @return The SAIL.
*/
public BigdataSail getSail(final String filename, final String namespace,
final long timestamp) {
final Properties properties = new Properties();
properties.setProperty(Options.FILE, filename);
final Journal journal = new Journal(properties);
System.err.println("createTime="+journal.getRootBlockView().getCreateTime());
System.err.println("lastCommitTime="+journal.getLastCommitTime());
LocalTripleStore tripleStore = (LocalTripleStore) journal
.getResourceLocator().locate(namespace, timestamp);
if (tripleStore == null) {
if (timestamp == ITx.UNISOLATED) {
// create a new triple store.
System.out.println("Creating tripleStore: namespace="
+ namespace);
tripleStore = new LocalTripleStore(journal, namespace,
timestamp, getProperties());
tripleStore.create();
} else {
throw new RuntimeException("No such triple store: namespace="
+ namespace + ", timestamp=" + timestamp);
}
}
return new BigdataSail(tripleStore);
}
/**
* Return the {@link Properties} that will be used to configure a new
* {@link AbstractTripleStore} instance. The {@link AbstractTripleStore}
* will remember the properties with which it was created and use those
* values each time it is re-opened. The properties are stored in the global
* row store for the backing {@link IIndexManager}.
*
* Note: You need to edit this code to correspond to your application
* requirements. Currently the code reflects an application using a triple
* store without inference, without statement level provenance, and with the
* full text index enabled. Another common configuration is a triple store
* with RDFS++ inference, which can be realized by changing the
* {@link AbstractTripleStore.Options#AXIOMS_CLASS} property value and
* possibly enabling {@link Options#TRUTH_MAINTENANCE} depending on whether
* or not you will be incrementally or bulk loading data.
*
* @todo Bundle some sample properties files which will make it easier for
* people to configure a {@link BigdataSail} and make that a command
* line option. The jini configuration approach is nice since it
* allows us to use symbolic constants in the configuration files.
*/
public Properties getProperties() {
final Properties properties = new Properties();
/*
* Override the initial and maximum extent so that they are more suited
* to large data sets.
*/
properties.setProperty(Options.INITIAL_EXTENT,""+200*Bytes.megabyte);
properties.setProperty(Options.MAXIMUM_EXTENT,""+200*Bytes.megabyte);
/*
* Turn off truth maintenance since there are no entailments for the
* ontology.
*/
properties.setProperty(BigdataSail.Options.TRUTH_MAINTENANCE, "false");
/*
* Disable RDFS++ inference since there are no entailments for the
* ontology.
*/
properties.setProperty(Options.AXIOMS_CLASS,NoAxioms.class.getName());
// /*
// * Enable rewrites of high-level queries into native rules (native JOIN
// * execution).
// */
// properties.setProperty(BigdataSail.Options.NATIVE_JOINS, "true");
/*
* Maximum #of subqueries to evaluate concurrently for the 1st join
* dimension for native rules. Zero disables the use of an executor
* service. One forces a single thread, but runs the subquery on the
* executor service. N>1 is concurrent subquery evaluation.
*/
properties.setProperty(Options.MAX_PARALLEL_SUBQUERIES, "0");
/*
* The #of elements that will be materialized at a time from an access
* path. The default is 20,000. However, the relatively selective joins,
* small result sets for the queries, and the use of LIMIT with a FILTER
* (a regex in this case) outside of the LIMIT means that a smaller
* chunk size will cause the JOIN evaluation to do MUCH less work. If
* the default chunk size is used, then queries with LIMIT or OFFSET can
* take much longer to evaluate since they are computing a large #of
* solutions beyond those required to satisify the limit (after applying
* the regex FILTER).
*/
properties.setProperty(Options.CHUNK_CAPACITY, "100");
// properties.setProperty(Options.FULLY_BUFFERED_READ_THRESHOLD, "10000");
/*
* Turn off incremental closure in the DataLoader object.
*/
properties.setProperty(
com.bigdata.rdf.store.DataLoader.Options.CLOSURE,
DataLoader.ClosureEnum.None.toString());
/*
* Turn off commit in the DataLoader object. We do not need to commit
* anything until we have loaded all the data and computed the closure
* over the database.
*/
properties.setProperty(
com.bigdata.rdf.store.DataLoader.Options.COMMIT,
DataLoader.CommitEnum.None.toString());
/*
* Turn off Unicode support for index keys (this is a big win for load
* rates since LUBM does not use Unicode data, but it has very little
* effect on query rates since the only time we generate Unicode sort
* keys is when resolving the Values in the queries to term identifiers
* in the database).
*/
// properties.setProperty(Options.COLLATOR, CollatorEnum.ASCII.toString());
/*
* Leave the full text index enabled since it will be used to answer
* the search queries.
*/
// /*
// * Turn off the full text index.
// */
// properties.setProperty(Options.TEXT_INDEX, "false");
/*
* Turn off statement identifiers (provenance mode).
*/
properties.setProperty(Options.STATEMENT_IDENTIFIERS, "false");
// Triples only.
properties.setProperty(Options.QUADS, "false");
/*
* Turn off justifications (impacts only the load performance, but it is
* a big impact and only required if you will be doing TM). (Actually,
* since there are no entailments this will have no effect).
*/
properties.setProperty(Options.JUSTIFY, "false");
return properties;
}
/**
* Return the properties associated with the {@link AbstractTripleStore}
* backing the {@link BigdataSail}.
*
* @param sail
* The sail.
*
* @return The persistent properties.
*/
public Properties getProperties(final BigdataSail sail) {
return getProperties(sail.getDatabase().getIndexManager(), sail
.getDatabase().getNamespace());
}
/**
* Return the properties associated with the given namespace.
*
* @param indexManager
* Use {@link BigdataSail#getDatabase()} and then
* {@link AbstractTripleStore#getIndexManager()}.
* @param namespace
* The namespace of a locatable resource such as an
* {@link AbstractTripleStore}, {@link SPORelation} or
* {@link LexiconRelation}.
*
* @return The persistent properties.
*/
protected Properties getProperties(IIndexManager indexManager,
String namespace) {
Map map = indexManager.getGlobalRowStore().read(
RelationSchema.INSTANCE, namespace);
Properties properties = new Properties();
properties.putAll(map);
return properties;
}
/**
* Update properties for the SAIL. This will overwrite any properties having
* the same name with their new values. Properties that are not overwritten
* will remain visible. A property can be deleted by specifying a
* null
value.
*
* Note: this changes the persistent property values associated with the
* SAIL. It DOES NOT change the properties associated with the given
* instance. You MUST re-open the SAIL in order for the new properties to be
* in effect.
*
* Note: While many property values can be changed dynamically, some may
* not. In particular, the properties that effect the way in which the keys
* for the indices are generated as stored within the indices themselves.
* Among other things this ensures that Unicode configuration options are
* applied uniformly when an is accessed by any host in a federation.
*
* @param sail
* The SAIL.
* @param properties
* The properties.
*
* @return The post-modification properties.
*/
public Properties setProperties(final BigdataSail sail,
final Properties properties) {
return setProperties(//
sail.getDatabase().getIndexManager(), //
sail.getDatabase().getNamespace(), //
properties//
);
}
/**
*
* @param indexManager
* @param namespace
* @param properties
* @return The post-modification properties.
*/
protected Properties setProperties(final IIndexManager indexManager,
final String namespace, final Properties properties) {
/*
* Convert the Properties to a Map.
*/
final Map map = new HashMap();
{
// set the namespace (primary key).
map.put(RelationSchema.NAMESPACE, namespace);
final Enumeration e = properties.propertyNames();
while (e.hasMoreElements()) {
final Object key = e.nextElement();
final String name = (String) key;
map.put(name, properties.getProperty(name));
}
}
/*
* Write the map on the row store. This will overwrite any entries for
* the same properties. Properties that are not overwritten will remain
* visible.
*/
final Properties p2 = new Properties();
p2.putAll(indexManager.getGlobalRowStore().write(
RelationSchema.INSTANCE, map));
if (indexManager instanceof IJournal) {
// make the changes restart safe (not required for federation).
((Journal) indexManager).commit();
}
// return the post-modification properties.
return p2;
}
protected static void showProperties(final Properties p) {
// sorted collection.
final TreeMap map = new TreeMap();
// put into alpha order.
for(Map.Entry
* Note: The LTS (local triple store) mode is inferred when the filename is
* a .properties
file. The JiniFederation (JDS) mode is
* inferred when the filename is a .config
file. If neither of
* those file extensions is used, then you must specify the either LTS or
* JDS explicitly.
*
* Note: The namespace identifies which triple store you are
* accessing and defaults to kb
.
*
* Note: The timestamp identifies which commit point you are
* accessing and defaults to the {@link ITx#UNISOLATED} view, which can also
* be specified as {@value#ITx#UNISOLATED}).
*
* Note: The properties is a file containing property overrides to be
* applied to the kb.
*
* @param args
* filename ((LTS|JDS ((namespace
* (timestamp)))properties)
*
* @throws SailException
* @throws ConfigurationException
* @throws IOException
*/
public static void main(final String[] args) throws SailException, IOException {
if (args.length == 0) {
System.err.println("usage: filename (LTS|JDS (namespace (timestamp)))");
System.exit(1);
}
final String filename = args[0];
final File file = new File(filename);
final FederationEnum fedType;
if (args.length > 1) {
fedType = FederationEnum.valueOf(args[1]);
} else if (filename.endsWith(".properties")) {
fedType = FederationEnum.LTS;
} else if (filename.endsWith(".config")) {
fedType = FederationEnum.JDS;
} else {
fedType = null;
System.err.println("Must specify the federation type: " + filename);
System.exit(1);
}
switch(fedType) {
case LTS:
case JDS:
if (file.isDirectory()) {
System.err.println(fedType
+ " requires plain file, not a directory: dir="
+ filename);
System.exit(1);
}
break;
// case LDS:
// case EDS:
// if (!file.isDirectory()) {
//
// System.err.println(fedType
// + " requires a directory, not a plain file: file="
// + filename);
//
// System.exit(1);
//
// }
// break;
default:
throw new AssertionError();
}
final String namespace = args.length > 2 ? args[2] : "kb";
final long timestamp = args.length > 3 ? Long.valueOf(args[3])
: ITx.UNISOLATED;
final File propertyFile = args.length > 4 ? new File(args[4]) : null;
if (propertyFile != null && !propertyFile.exists()) {
System.err.println("No such file: "+propertyFile);
System.exit(1);
}
final BigdataSailHelper helper = new BigdataSailHelper();
System.out.println("filename: " + filename);
final BigdataSail sail;
// Note: iff we need to shutdown the federation in finally{}
final AbstractFederation fed;
// // Note: iff JDS.
// final JiniServicesHelper jiniServicesHelper;
switch (fedType) {
case LTS:
sail = helper.getSail(filename, namespace, timestamp);
fed = null;
// jiniServicesHelper = null;
break;
// case LDS: {
//
//// jiniServicesHelper = null;
//
// final Properties properties = new Properties();
//
// properties.setProperty(
// com.bigdata.service.LocalDataServiceClient.Options.DATA_DIR,
// filename);
//
// // disable platform statistics collection.
// properties.setProperty(
// LocalDataServiceClient.Options.COLLECT_PLATFORM_STATISTICS, "false");
//
// fed = new LocalDataServiceClient(properties).connect();
//
// sail = helper.getSail(fed, namespace, timestamp);
//
// break;
//
// }
// case EDS: {
//
//// jiniServicesHelper = null;
//
// final Properties properties = new Properties();
//
// properties.setProperty(
// com.bigdata.service.EmbeddedClient.Options.DATA_DIR,
// filename);
//
// // disable platform statistics collection.
// properties.setProperty(
// EmbeddedClient.Options.COLLECT_PLATFORM_STATISTICS, "false");
//
// fed = new EmbeddedClient(properties).connect();
//
// sail = helper.getSail(fed, namespace, timestamp);
//
// break;
//
// }
case JDS:
// Should be a jini configuration file.
fed = (AbstractFederation) ScaleOutClientFactory.getJiniClient(new String[] { args[0] }).connect();
sail = helper.getSail(fed, namespace, timestamp);
break;
default:
throw new AssertionError();
}
try {
sail.initialize();
System.out.println("\npre-modification properties::");
showProperties(helper.getProperties(sail));
showLexiconIndexDetails(sail);
showSPOIndexDetails(sail);
// change some property values.
if(true) {
final Properties p = new Properties();
if (propertyFile != null) {
System.out.println("reading new properties from file::");
final InputStream is = new BufferedInputStream(
new FileInputStream(propertyFile));
try {
p.load(is);
} finally {
is.close();
}
p.store(System.out, "Will apply properties::");
} else {
System.out.println("reading new properties from stdin::");
p.load(System.in);
}
// p.setProperty(Options.NESTED_SUBQUERY, "false");
// p.setProperty(Options.CHUNK_CAPACITY, "100");
// p.setProperty(Options.FULLY_BUFFERED_READ_THRESHOLD, "1000");
// p.setProperty(Options.MAX_PARALLEL_SUBQUERIES, "0");
// p.setProperty(Options.INCLUDE_INFERRED, "true");
// p.setProperty(Options.QUERY_TIME_EXPANDER, "false");
System.out.println("\npost-modification properties::");
showProperties(helper.setProperties(sail, p));
}
} finally {
sail.shutDown();
if( fed != null) {
fed.shutdownNow();
}
// if (jiniServicesHelper != null) {
//
// jiniServicesHelper.shutdown();
//
// }
}
}
}