com.bigdata.service.fts.FTS Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.service.fts;
import org.openrdf.model.URI;
import org.openrdf.model.impl.URIImpl;
import com.bigdata.rdf.store.BDS;
/**
*
* A vocabulary for the bigdata external full text search facility.
* The FTS search may be used to combine text search and graph search,
* just like the {@link BDS} - the basic difference is that this search is
* going against an *external* service, whereas {@link BDS} is querying the
* internal fulltext index (which is kept in synch along the lines). Note that,
* in contrast to the {@link BDS}, such an external search service is not
* necessarily tightly coupled/in synch with the underlying RDF store. In
* contrary, it may be a loose, uncoupled document collection that is queried
* as part of a SPARQL query (implementing a hybrid search paradigm) and returns
* arbitrary results. These results may be valid URIs (that could be joined
* with parts of the document later on), but as well simple literals or the
* score value, to be displayed as part of the underlying query.
*
*
* Note that interface is generic in the sense that we could add generic full
* text search services, by just appending {@link EndpointType}s and adding
* implementations for them. For the beginning, we start with a Solr index.
*
*
*
* Low-latency, user facing search applications may be created by slicing the
* external full text search results and feeding them incrementally into
* SPARQL queries. This approach allows the application to manage the cost
* of the SPARQL query by bounding the input. If necessary, additional results
* can be feed into the query.
*
*
*
* Example:
*
*
* PREFIX fts:
* SELECT ?res ?score ?snippet WHERE {
* ?res fts:search "blue !red".
* ?res fts:endpoint "http://my.external.solr.endpoint:5656" .
* ?res fts:endpointType "Solr" .
* ?res fts:params "defType=dismax&bf=uses^50" .
* ?res fts:searchResultType "URI" .
* ?res fts:score ?score .
* ?res fts:snippet ?snippet .
* }
*
*
* The query returns the result matches (converted to URIs), including the
* score and sample snippets for the matches.
*
*
*
* Here's another example using a SERVICE keyword directly:
*
*
* PREFIX fts:
* SELECT *
* WHERE {
* ?res rdfs:label ?label .
* SERVICE {
* ?res fts:search "blue !red".
* ?res fts:endpoint "http://my.external.solr.endpoint:5656" .
* }
* hint:Prior hint:runLast "true" .
* }
*
*
*
* @author Michael Schmidt
* @version $Id$
*/
public interface FTS {
public interface Options {
/**
* Option that may be set to specify a default for {@link FTS#ENDPOINT},
* to be used in fulltext search whenever the {@link FTS#ENDPOINT} is
* left unspecified. When not set, the {@link FTS#ENDPOINT} is
* mandatory in FTS SERVICE queries; when set, the magic vocabulary
* {@link FTS#ENDPOINT} may be used to override the default.
*/
String FTS_ENDPOINT = FTS.class.getName() + ".defaultEndpoint";
/**
* Option that may be set to specify a default for {@link FTS#ENDPOINT_TYPE},
* to be used in fulltext search whenever the {@link FTS#ENDPOINT_TYPE} is
* left unspecified. When set, the magic vocabulary
* {@link FTS#ENDPOINT_TYPE} may be used to override the default.
*/
String FTS_ENDPOINT_TYPE = FTS.class.getName() + ".defaultEndpointType";
EndpointType DEFAULT_ENDPOINT_TYPE = EndpointType.SOLR;
/**
* Option that may be set to specify a default for {@link FTS#SEARCH_RESULT_TYPE},
* to be used in fulltext search whenever the {@link FTS#SEARCH_RESULT_TYPE} is
* left unspecified. When set, the magic vocabulary
* {@link FTS#SEARCH_RESULT_TYPE} may be used to override the default.
*/
String FTS_SEARCH_RESULT_TYPE = FTS.class.getName() + ".defaultSearchResultType";
SearchResultType DEFAULT_SEARCH_RESULT_TYPE = SearchResultType.LITERAL;
/**
* Option that may be set to specify a default for {@link FTS#FTS_TIMEOUT},
* to be used in fulltext search whenever the {@link FTS#FTS_TIMEOUT} is
* left unspecified. When set, the magic vocabulary
* {@link FTS#FTS_TIMEOUT} may be used to override the default.
*/
String FTS_TIMEOUT = FTS.class.getName() + ".defaultTimeout";
final int DEFAULT_TIMEOUT = Integer.MAX_VALUE;
/**
* Option that may be set to specify a default for {@link FTS#PARAMS},
* to be used in fulltext search whenever the {@link FTS#PARAMS} is
* left unspecified. When set, the vocabulary
* {@link FTS#PARAMS} may be used to override the default.
*/
String FTS_PARAMS = FTS.class.getName() + ".defaultParams";
final String DEFAULT_PARAMS = "";
/**
* Option that may be set to specify a default for {@link FTS#SEARCH_FIELD},
* to be used in fulltext search whenever the {@link FTS#SEARCH_FIELD} is
* left unspecified. When set, the magic vocabulary
* {@link FTS#SEARCH_FIELD} may be used to override the default.
*/
String FTS_SEARCH_FIELD = FTS.class.getName() + ".defaultSearchField";
final String DEFAULT_SEARCH_FIELD = "id"; // this is the Solr default
/**
* Option that may be set to specify a default for {@link FTS#SCORE_FIELD},
* to be used in fulltext search whenever the {@link FTS#SCORE_FIELD} is
* left unspecified. When set, the magic vocabulary
* {@link FTS#SCORE_FIELD} may be used to override the default.
*/
String FTS_SCORE_FIELD = FTS.class.getName() + ".defaultScoreField";
final String DEFAULT_SCORE_FIELD = null; // no system default
/**
* Option that may be set to specify a default for {@link FTS#SNIPPET_FIELD},
* to be used in fulltext search whenever the {@link FTS#SNIPPET_FIELD} is
* left unspecified. When set, the magic vocabulary
* {@link FTS#SNIPPET_FIELD} may be used to override the default.
*/
String FTS_SNIPPET_FIELD = FTS.class.getName() + ".defaultSnippetField";
final String DEFAULT_SNIPPET_FIELD = null; // no system default
}
/**
* The namespace used for magic search predicates.
*/
final String NAMESPACE = "http://www.bigdata.com/rdf/fts#";
/**
* The type of the FTS. For now, only Solr is implemented/supported.
*/
public static enum EndpointType {
SOLR
}
/**
* Target type for extracted entities, determining whether they are
* parsed into a literal or interpreted as a URI.
*/
public static enum SearchResultType {
URI,
LITERAL
}
/**
* The name of a magic predicate recognized in SPARQL queries when it occurs
* in statement patterns such as:
*
*
*
* ( s?, fts:search, "scale-out RDF triplestore" )
*
*
*
* The value MUST be bound and MUST be a literal, it points to the Solr
* search string.
*
*
* The subject MUST NOT be bound.
*
*
* This expression will evaluate to a set of bindings for the subject
* position corresponding to the indexed literals matching any of the terms
* obtained when the literal was tokenized.
*
* You may want to use {@link FTS#RESULT_FIELD} to fix the result field
* that is mapped to the result variable.
*/
final URI SEARCH = new URIImpl(NAMESPACE + "search");
/**
* Magic predicate used to specify the Solr endpoint to be queried. If not
* provided, the default endpoint as specified in the configuration is used.
*
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:endpoint "http://my.solr.endpoint:1012/solrIndex/" .
* }
*
*
*
* The endpoint must be provided as a literal, including protocol, IP or
* hostname, and port to be queried.
*/
final URI ENDPOINT = new URIImpl(NAMESPACE + "endpoint");
/**
* Magic predicate used to specify the endpoint type, such as a Solr
* endpoint or any other external full text search service.
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:endpoint "http://my.solr.endpoint:1012/solrIndex/" .
* ?s fts:endpointType "SOLR" .
* }
*
*
*
* The endpoint must be provided as a literal, according to the
* {@link EndpointType} enum values.
*/
final URI ENDPOINT_TYPE = new URIImpl(NAMESPACE + "endpointType");
/**
* Magic predicate used to specify full text search parameters to be
* applied when executing the search.
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:params "defType=dismax&bf=uses^5" .
* }
*
*
*
* The params need to be a correct string according to Solr specifications
* and it must be provided as a literal.
*/
final URI PARAMS = new URIImpl(NAMESPACE + "params");
/**
* Magic predicate used to specify the type of the values stored in the Solr
* field or fields from which data is extracted (the latter one being
* specified as part of the PARAMS predicate above. If there are multiple
* output fields, all of the will be included and the type specified refers
* to all of them. Default is URI, which converts the field into a URI; if
* conversion fails, the value is ignored.
*
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s searchResultType "LITERAL" .
* }
*
*
*
* Allowed values are "URI" and "LITERAL"; if none of these values is
* provided, the {@value #DEFAULT_SEARCH_RESULT_TYPE} will be used.
*/
final URI SEARCH_RESULT_TYPE = new URIImpl(NAMESPACE + "searchResultType");
/**
* Magic predicate used to query for free text search metadata to set a
* deadline in milliseconds on the full text index search (
* {@value #DEFAULT_TIMEOUT}). Use in conjunction with {@link #SEARCH} as
* follows:
*
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:timeout "5000" .
* }
*
*
*
*
* Timeout specified in milliseconds, as literal. If not specified or not
* a valid integer, the {@value #DEFAULT_TIMEOUT} is used.
*/
final URI TIMEOUT = new URIImpl(NAMESPACE + "timeout");
/**
* Magic predicate to indicate the output variable in which the score
* of matches will be saved.
*
*
*
* select ?s ?score
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:score ?score .
* }
*
*
*
* The referenced variable must not be used somewhere else in the scope.
* It will be bound to an xsd:double typed literal indicating the score
* for the match.
*
* In order to use this, you also need to set {@link FTS#SCORE_FIELD}
* to fix the result field that is mapped to the score variable.
*
*/
final URI SCORE = new URIImpl(NAMESPACE + "score");
/**
* Magic predicate to indicate the output variable in which a sample
* snippet for matches will be saved.
*
*
*
* select ?s ?snippet
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:snippet ?snippet .
* }
*
*
*
* The referenced variable must not be used somewhere else in the scope.
* It will be bound to an untyped (text) literal.
*
* In order to use this, you need to set {@link FTS#SNIPPET_FIELD} to fix
* the result field that is mapped to the snippet variable.
*/
final URI SNIPPET = new URIImpl(NAMESPACE + "snippet");
/**
* Magic predicate to indicate the external search service result field
* which will be stored in the result variable.
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:searchField "id" .
* }
*
*
*/
final URI SEARCH_FIELD = new URIImpl(NAMESPACE + "searchField");
/**
* Magic predicate to indicate the external search service field whose
* value will be bound to the snippet variable (see {@link FTS#SNIPPET}).
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:snippetField "value" .
* }
*
*
*/
final URI SNIPPET_FIELD = new URIImpl(NAMESPACE + "snippetField");
/**
* Magic predicate to indicate the external search service field whose
* value will be bound to the score variable (see {@link FTS#SCORE}).
*
*
*
* select ?s
* where {
* ?s fts:search "scale-out RDF triplestore" .
* ?s fts:scoreField "score" .
* }
*
*
*/
final URI SCORE_FIELD = new URIImpl(NAMESPACE + "scoreField");
}