All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.service.fts.FTS Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/
package com.bigdata.service.fts;

import org.openrdf.model.URI;
import org.openrdf.model.impl.URIImpl;

import com.bigdata.rdf.store.BDS;


/**
 * 

* A vocabulary for the bigdata external full text search facility. * The FTS search may be used to combine text search and graph search, * just like the {@link BDS} - the basic difference is that this search is * going against an *external* service, whereas {@link BDS} is querying the * internal fulltext index (which is kept in synch along the lines). Note that, * in contrast to the {@link BDS}, such an external search service is not * necessarily tightly coupled/in synch with the underlying RDF store. In * contrary, it may be a loose, uncoupled document collection that is queried * as part of a SPARQL query (implementing a hybrid search paradigm) and returns * arbitrary results. These results may be valid URIs (that could be joined * with parts of the document later on), but as well simple literals or the * score value, to be displayed as part of the underlying query. * *

* Note that interface is generic in the sense that we could add generic full * text search services, by just appending {@link EndpointType}s and adding * implementations for them. For the beginning, we start with a Solr index. *

* *

* Low-latency, user facing search applications may be created by slicing the * external full text search results and feeding them incrementally into * SPARQL queries. This approach allows the application to manage the cost * of the SPARQL query by bounding the input. If necessary, additional results * can be feed into the query. *

* *

* Example: * *

 * PREFIX fts: 
 * SELECT ?res ?score ?snippet WHERE {
 *   ?res fts:search "blue !red".
 *   ?res fts:endpoint  "http://my.external.solr.endpoint:5656" .
 *   ?res fts:endpointType  "Solr" .
 *   ?res fts:params "defType=dismax&bf=uses^50" .
 *   ?res fts:searchResultType "URI" .
 *   ?res fts:score ?score .
 *   ?res fts:snippet ?snippet . 
 * }
 * 
* * The query returns the result matches (converted to URIs), including the * score and sample snippets for the matches. *

* *

* Here's another example using a SERVICE keyword directly: * *

 * PREFIX fts: 
 * SELECT *
 * WHERE {
 *   ?res rdfs:label ?label .
 *   SERVICE  {
 *     ?res fts:search "blue !red".
 *     ?res fts:endpoint  "http://my.external.solr.endpoint:5656" .
 *   }
 *   hint:Prior hint:runLast "true" .
 * }
 * 

* * * @author Michael Schmidt * @version $Id$ */ public interface FTS { public interface Options { /** * Option that may be set to specify a default for {@link FTS#ENDPOINT}, * to be used in fulltext search whenever the {@link FTS#ENDPOINT} is * left unspecified. When not set, the {@link FTS#ENDPOINT} is * mandatory in FTS SERVICE queries; when set, the magic vocabulary * {@link FTS#ENDPOINT} may be used to override the default. */ String FTS_ENDPOINT = FTS.class.getName() + ".defaultEndpoint"; /** * Option that may be set to specify a default for {@link FTS#ENDPOINT_TYPE}, * to be used in fulltext search whenever the {@link FTS#ENDPOINT_TYPE} is * left unspecified. When set, the magic vocabulary * {@link FTS#ENDPOINT_TYPE} may be used to override the default. */ String FTS_ENDPOINT_TYPE = FTS.class.getName() + ".defaultEndpointType"; EndpointType DEFAULT_ENDPOINT_TYPE = EndpointType.SOLR; /** * Option that may be set to specify a default for {@link FTS#SEARCH_RESULT_TYPE}, * to be used in fulltext search whenever the {@link FTS#SEARCH_RESULT_TYPE} is * left unspecified. When set, the magic vocabulary * {@link FTS#SEARCH_RESULT_TYPE} may be used to override the default. */ String FTS_SEARCH_RESULT_TYPE = FTS.class.getName() + ".defaultSearchResultType"; SearchResultType DEFAULT_SEARCH_RESULT_TYPE = SearchResultType.LITERAL; /** * Option that may be set to specify a default for {@link FTS#FTS_TIMEOUT}, * to be used in fulltext search whenever the {@link FTS#FTS_TIMEOUT} is * left unspecified. When set, the magic vocabulary * {@link FTS#FTS_TIMEOUT} may be used to override the default. */ String FTS_TIMEOUT = FTS.class.getName() + ".defaultTimeout"; final int DEFAULT_TIMEOUT = Integer.MAX_VALUE; /** * Option that may be set to specify a default for {@link FTS#PARAMS}, * to be used in fulltext search whenever the {@link FTS#PARAMS} is * left unspecified. When set, the vocabulary * {@link FTS#PARAMS} may be used to override the default. */ String FTS_PARAMS = FTS.class.getName() + ".defaultParams"; final String DEFAULT_PARAMS = ""; /** * Option that may be set to specify a default for {@link FTS#SEARCH_FIELD}, * to be used in fulltext search whenever the {@link FTS#SEARCH_FIELD} is * left unspecified. When set, the magic vocabulary * {@link FTS#SEARCH_FIELD} may be used to override the default. */ String FTS_SEARCH_FIELD = FTS.class.getName() + ".defaultSearchField"; final String DEFAULT_SEARCH_FIELD = "id"; // this is the Solr default /** * Option that may be set to specify a default for {@link FTS#SCORE_FIELD}, * to be used in fulltext search whenever the {@link FTS#SCORE_FIELD} is * left unspecified. When set, the magic vocabulary * {@link FTS#SCORE_FIELD} may be used to override the default. */ String FTS_SCORE_FIELD = FTS.class.getName() + ".defaultScoreField"; final String DEFAULT_SCORE_FIELD = null; // no system default /** * Option that may be set to specify a default for {@link FTS#SNIPPET_FIELD}, * to be used in fulltext search whenever the {@link FTS#SNIPPET_FIELD} is * left unspecified. When set, the magic vocabulary * {@link FTS#SNIPPET_FIELD} may be used to override the default. */ String FTS_SNIPPET_FIELD = FTS.class.getName() + ".defaultSnippetField"; final String DEFAULT_SNIPPET_FIELD = null; // no system default } /** * The namespace used for magic search predicates. */ final String NAMESPACE = "http://www.bigdata.com/rdf/fts#"; /** * The type of the FTS. For now, only Solr is implemented/supported. */ public static enum EndpointType { SOLR } /** * Target type for extracted entities, determining whether they are * parsed into a literal or interpreted as a URI. */ public static enum SearchResultType { URI, LITERAL } /** * The name of a magic predicate recognized in SPARQL queries when it occurs * in statement patterns such as: * *
    * 
    * ( s?, fts:search, "scale-out RDF triplestore" )
    * 
    * 
* * The value MUST be bound and MUST be a literal, it points to the Solr * search string. * *

* The subject MUST NOT be bound. *

* * This expression will evaluate to a set of bindings for the subject * position corresponding to the indexed literals matching any of the terms * obtained when the literal was tokenized. * * You may want to use {@link FTS#RESULT_FIELD} to fix the result field * that is mapped to the result variable. */ final URI SEARCH = new URIImpl(NAMESPACE + "search"); /** * Magic predicate used to specify the Solr endpoint to be queried. If not * provided, the default endpoint as specified in the configuration is used. *

* *

    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:endpoint "http://my.solr.endpoint:1012/solrIndex/" .
    * }
    * 
    * 
* * The endpoint must be provided as a literal, including protocol, IP or * hostname, and port to be queried. */ final URI ENDPOINT = new URIImpl(NAMESPACE + "endpoint"); /** * Magic predicate used to specify the endpoint type, such as a Solr * endpoint or any other external full text search service. * *
    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:endpoint "http://my.solr.endpoint:1012/solrIndex/" .
    *   ?s fts:endpointType "SOLR" .
    * }
    * 
    * 
* * The endpoint must be provided as a literal, according to the * {@link EndpointType} enum values. */ final URI ENDPOINT_TYPE = new URIImpl(NAMESPACE + "endpointType"); /** * Magic predicate used to specify full text search parameters to be * applied when executing the search. * *
    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:params "defType=dismax&bf=uses^5" .
    * }
    * 
    * 
* * The params need to be a correct string according to Solr specifications * and it must be provided as a literal. */ final URI PARAMS = new URIImpl(NAMESPACE + "params"); /** * Magic predicate used to specify the type of the values stored in the Solr * field or fields from which data is extracted (the latter one being * specified as part of the PARAMS predicate above. If there are multiple * output fields, all of the will be included and the type specified refers * to all of them. Default is URI, which converts the field into a URI; if * conversion fails, the value is ignored. *

* *

    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s searchResultType "LITERAL" .
    * }
    * 
    * 
* * Allowed values are "URI" and "LITERAL"; if none of these values is * provided, the {@value #DEFAULT_SEARCH_RESULT_TYPE} will be used. */ final URI SEARCH_RESULT_TYPE = new URIImpl(NAMESPACE + "searchResultType"); /** * Magic predicate used to query for free text search metadata to set a * deadline in milliseconds on the full text index search ( * {@value #DEFAULT_TIMEOUT}). Use in conjunction with {@link #SEARCH} as * follows: *

* *

    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:timeout "5000" .
    * }
    * 
    * 
*

* * Timeout specified in milliseconds, as literal. If not specified or not * a valid integer, the {@value #DEFAULT_TIMEOUT} is used. */ final URI TIMEOUT = new URIImpl(NAMESPACE + "timeout"); /** * Magic predicate to indicate the output variable in which the score * of matches will be saved. * *

    * 
    * select ?s ?score
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:score ?score .
    * }
    * 
    * 
* * The referenced variable must not be used somewhere else in the scope. * It will be bound to an xsd:double typed literal indicating the score * for the match. * * In order to use this, you also need to set {@link FTS#SCORE_FIELD} * to fix the result field that is mapped to the score variable. * */ final URI SCORE = new URIImpl(NAMESPACE + "score"); /** * Magic predicate to indicate the output variable in which a sample * snippet for matches will be saved. * *
    * 
    * select ?s ?snippet
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:snippet ?snippet .
    * }
    * 
    * 
* * The referenced variable must not be used somewhere else in the scope. * It will be bound to an untyped (text) literal. * * In order to use this, you need to set {@link FTS#SNIPPET_FIELD} to fix * the result field that is mapped to the snippet variable. */ final URI SNIPPET = new URIImpl(NAMESPACE + "snippet"); /** * Magic predicate to indicate the external search service result field * which will be stored in the result variable. * *
    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:searchField "id" .
    * }
    * 
    * 
*/ final URI SEARCH_FIELD = new URIImpl(NAMESPACE + "searchField"); /** * Magic predicate to indicate the external search service field whose * value will be bound to the snippet variable (see {@link FTS#SNIPPET}). * *
    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:snippetField "value" .
    * }
    * 
    * 
*/ final URI SNIPPET_FIELD = new URIImpl(NAMESPACE + "snippetField"); /** * Magic predicate to indicate the external search service field whose * value will be bound to the score variable (see {@link FTS#SCORE}). * *
    * 
    * select ?s
    * where {
    *   ?s fts:search "scale-out RDF triplestore" .
    *   ?s fts:scoreField "score" .
    * }
    * 
    * 
*/ final URI SCORE_FIELD = new URIImpl(NAMESPACE + "scoreField"); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy