All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.sail.webapp.DescribeCacheServlet Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.rdf.sail.webapp;

import java.io.IOException;
import java.io.OutputStream;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.UUID;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.log4j.Logger;
import org.openrdf.model.Graph;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.impl.GraphImpl;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFWriter;
import org.openrdf.rio.RDFWriterRegistry;

import com.bigdata.bop.engine.QueryEngine;
import com.bigdata.bop.fed.QueryEngineFactory;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.sparql.ast.cache.CacheConnectionFactory;
import com.bigdata.rdf.sparql.ast.cache.ICacheConnection;
import com.bigdata.rdf.sparql.ast.cache.IDescribeCache;
import com.bigdata.rdf.store.AbstractTripleStore;

/**
 * A maintained cache for DESCRIBE of URIs.
 * 

* In general, URIs may be identified either by a bare URI (in which case the * URI must be some extension of the SPARQL endpoint path) or by a SPARQL * DESCRIBE query ( endpoint?query=DESCRIBE uri(,uri)*). *

* The {@link DescribeCacheServlet} will recognize and perform the DESCRIBE of * cached resources where those resources are attached to a request attribute * * TODO Http cache control. Different strategies can make sense depending on the * scalability of the application and the tolerance for stale data. We can use * an expire for improved scalability with caching into the network, but * invalidation notices can not propagate beyond the DESCRIBE cache to the * network. We can use E-Tags with Must-Validate to provide timely invalidation. * If we support faceting (by schema or source) then we need to provide E-Tags * for each schema/source so the client can inspect/require certain facets. * * TODO VoID for forward/reverse link set and attribute sketch. * * TODO Conneg for the actual representation, but internally use an efficient * representation that can provide summaries (a SKETCH) through a jump table * with statistics for each predicate type. * * TODO Options for DESCRIBE filters by schema to get only properties and links * for a given schema namespace? * * TODO Describe caching in an open web context requires maintaining metadata * about the web resources that have been queried for a given URI. This should * include at least the URI itself and could include well known aggregators that * might have data for that URI. * * TODO Take advantage of the known materialization performed by a DESCRIBE * query when running queries (materialized star-join). Also, store IVs in the * Graph as well as Values. We need both to do efficient star-joins (enter by IV * and have IV on output). * * TODO Hash partitioned DESCRIBE fabric. The partitioned map is easy enough and * could be installed at each DS, CS, etc. node. However, the distributed * invalidating scheme is slightly trickier. We would need to install this * servlet at each endpoint exposed to mutation, which is not actually all that * difficult. There needs to be configuration information for either each * namespace or for the webapp that specifies how to locate and maintain the * cache. * * TODO Expose (and declare through VoID) a simple URI lookup service that is * powered by this cache and turns into a DESCRIBE query if there is a cache * miss (or turn it into a DESCRIBE query and let that route to the cache * first). VoID has the concept of this kind of "lookup" service. * * @see DESCRIBE * CACHE * * @author Bryan Thompson */ public class DescribeCacheServlet extends BigdataRDFServlet { static private final transient Logger log = Logger .getLogger(DescribeCacheServlet.class); /** * */ private static final long serialVersionUID = 1L; /** * The name of a request attribute whose bound value is a {@link Set} of * {@link URI}s to be described by the {@link DescribeCacheServlet}. */ static final transient String ATTR_DESCRIBE_URIS = "describeUris"; public DescribeCacheServlet() { } /** * GET returns the DESCRIBE of the resource. * * FIXME DESCRIBE: TX ISOLATION for request but ensure that cache is not * negatively effected by that isolation (i.e., how does the cache index * based on time tx view). */ @Override protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) throws IOException { /* * 1. Check the request path for a linked data GET on a resource. * If found, then add that URI to the request attribute. * * 2. Else, if the request is a SPARQL DESCRIBE, then extract the URIs * to be described and attach them as request attributes (on a set). * A single Graph will be returned in this case. The client will have * to inspect the Graph to decide which URIs were found and which were * not. * * 3. Check the request attribute for a set of URIs to be DESCRIBEd. */ @SuppressWarnings("unchecked") final Set externalURIs = (Set) req.getAttribute(ATTR_DESCRIBE_URIS); if (externalURIs == null) { buildAndCommitResponse(resp, HTTP_BADREQUEST, MIME_TEXT_PLAIN, "Request attribute not found: " + ATTR_DESCRIBE_URIS); return; } final int nvalues = externalURIs.size(); if (nvalues == 0) { buildAndCommitResponse(resp, HTTP_BADREQUEST, MIME_TEXT_PLAIN, "No URIs"); return; } final BigdataRDFContext context = getBigdataRDFContext(); final QueryEngine queryEngine = QueryEngineFactory.getInstance() .getQueryController(context.getIndexManager()); // Iff enabled. final ICacheConnection cacheConn = CacheConnectionFactory .getExistingCacheConnection(queryEngine); final String namespace = getNamespace(req); final long timestamp = getTimestamp(req); final IDescribeCache describeCache = cacheConn == null ? null : cacheConn.getDescribeCache(namespace, timestamp); if (describeCache == null) { /* * DESCRIBE cache is not enabled. * * Note: DO NOT commit the response. The DESCRIBE of the resource * can be generated by running a SPARQL query. */ resp.setStatus(HTTP_NOTFOUND); return; } final AbstractTripleStore tripleStore = context.getTripleStore( namespace, timestamp); if (tripleStore == null) { /* * There is no such triple/quad store instance. */ buildAndCommitNamespaceNotFoundResponse(req, resp); return; } /* * Ensure that URIs are BigdatURIs for this namespace. */ final Set internalURIs = new LinkedHashSet(); { final BigdataValueFactory valueFactory = tripleStore .getValueFactory(); for (URI uri : externalURIs) { internalURIs.add(valueFactory.asValue(uri)); } } /* * Resolve URIs to IVs. */ { final BigdataValue[] values = internalURIs .toArray(new BigdataValue[nvalues]); final long numNotFound = tripleStore.getLexiconRelation().addTerms( values, nvalues, true/* readOnly */); if (log.isInfoEnabled()) log.info("Not found: " + numNotFound + " out of " + values.length); } /* * Build up the response graph. * * TODO If the describe would be very large, then provide the summary * rather than delivering all the data. This will require a blobs aware * handling of the Values in the HTree. * * TODO Support SKETCH (VoID request) option here. */ Graph g = null; { for (BigdataURI uri : internalURIs) { final IV iv = uri.getIV(); final Graph x = describeCache.lookup(iv); if (x != null && g == null) { if (nvalues == 1) { // Only describing ONE (1) resource. g = x; } else { // Collect the DESCRIBE of all graphs. g = new GraphImpl(); // Combine the resource descriptions together. g.addAll(x); } } } if (g == null) { /* * None of the URIs was found. * * Note: We can only send the NOT_FOUND status and commit the * response if the cache is complete. Otherwise, we might set * the status code but we SHOULD NOT commit the response since * the DESCRIBE of the resource can be generated by running a * SPARQL query. */ // Not in the cache. Note: Response is NOT committed. resp.setStatus(HTTP_NOTFOUND); // buildResponse(resp, HTTP_NOTFOUND, MIME_TEXT_PLAIN); return; } } /* * CONNEG */ final RDFFormat format; { /* * CONNEG for the MIME type. * * Note: An attempt to CONNEG for a MIME type which can not be * used with a given type of query will result in a response * using a default MIME Type for that query. */ final String acceptStr = req.getHeader("Accept"); final ConnegUtil util = new ConnegUtil(acceptStr); format = util.getRDFFormat(RDFFormat.RDFXML); } /* * Generate response. */ try { final String mimeType = format.getDefaultMIMEType(); resp.setContentType(mimeType); if (isAttachment(mimeType)) { /* * Mark this as an attachment (rather than inline). This is * just a hint to the user agent. How the user agent handles * this hint is up to it. */ resp.setHeader("Content-disposition", "attachment; filename=query" + UUID.randomUUID() + "." + format.getDefaultFileExtension()); } if (format.hasCharset()) { // Note: Binary encodings do not specify charset. resp.setCharacterEncoding(format.getCharset().name()); } final OutputStream os = resp.getOutputStream(); final RDFWriter w = RDFWriterRegistry.getInstance().get(format) .getWriter(os); w.startRDF(); for (Statement s : g) w.handleStatement(s); w.endRDF(); os.flush(); } catch (Throwable e) { BigdataRDFServlet.launderThrowable(e, resp, "DESCRIBE: uris=" + internalURIs); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy