All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openrdf.sail.solr.SolrIndex Maven / Gradle / Ivy

Go to download

StackableSail implementation offering full-text search on literals, based on Solr.

There is a newer version: 4.1.2
Show newest version
/* 
 * Licensed to Aduna under one or more contributor license agreements.  
 * See the NOTICE.txt file distributed with this work for additional 
 * information regarding copyright ownership. 
 *
 * Aduna licenses this file to you under the terms of the Aduna BSD 
 * License (the "License"); you may not use this file except in compliance 
 * with the License. See the LICENSE.txt file distributed with this work 
 * for the full License.
 *
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing permissions
 * and limitations under the License.
 */
package org.openrdf.sail.solr;

import java.io.IOException;
import java.text.ParseException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SpatialParams;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.vocabulary.GEOF;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.algebra.Var;
import org.openrdf.sail.SailException;
import org.openrdf.sail.lucene.AbstractSearchIndex;
import org.openrdf.sail.lucene.BulkUpdater;
import org.openrdf.sail.lucene.DocumentDistance;
import org.openrdf.sail.lucene.DocumentResult;
import org.openrdf.sail.lucene.DocumentScore;
import org.openrdf.sail.lucene.LuceneSail;
import org.openrdf.sail.lucene.SearchDocument;
import org.openrdf.sail.lucene.SearchFields;
import org.openrdf.sail.lucene.SearchQuery;
import org.openrdf.sail.lucene.util.GeoUnits;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Function;
import com.google.common.base.Functions;
import com.google.common.collect.Iterables;
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.context.SpatialContextFactory;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;

/**
 * @see LuceneSail
 */
public class SolrIndex extends AbstractSearchIndex {

	public static final String SERVER_KEY = "server";

	public static final String DISTANCE_FIELD = "_dist";

	private final Logger logger = LoggerFactory.getLogger(getClass());

	private SolrClient client;

	private Function geoContextMapper;

	@Override
	public void initialize(Properties parameters)
		throws Exception
	{
		super.initialize(parameters);
		// slightly hacky cast to cope with the fact that Properties is
		// Map
		// even though it is effectively Map
		this.geoContextMapper = createSpatialContextMapper((Map)(Map)parameters);

		String server = parameters.getProperty(SERVER_KEY);
		if (server == null) {
			throw new SailException("Missing " + SERVER_KEY + " parameter");
		}
		int pos = server.indexOf(':');
		if (pos == -1) {
			throw new SailException("Missing scheme in " + SERVER_KEY + " parameter: " + server);
		}
		String scheme = server.substring(0, pos);
		Class clientFactoryCls = Class.forName("org.openrdf.sail.solr.client." + scheme + ".Factory");
		SolrClientFactory clientFactory = (SolrClientFactory)clientFactoryCls.newInstance();
		client = clientFactory.create(server);
	}

	protected Function createSpatialContextMapper(Map parameters) {
		// this should really be based on the schema
		ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
		SpatialContext geoContext = SpatialContextFactory.makeSpatialContext(parameters, classLoader);
		return Functions.constant(geoContext);
	}

	public SolrClient getClient() {
		return client;
	}

	@Override
	protected SpatialContext getSpatialContext(String property) {
		return geoContextMapper.apply(property);
	}

	@Override
	public void shutDown()
		throws IOException
	{
		if (client != null) {
			client.close();
			client = null;
		}
	}

	// //////////////////////////////// Methods for updating the index

	/**
	 * Returns a Document representing the specified document ID (combination of
	 * resource and context), or null when no such Document exists yet.
	 * 
	 * @throws SolrServerException
	 */
	@Override
	protected SearchDocument getDocument(String id)
		throws IOException
	{
		SolrDocument doc;
		try {
			doc = (SolrDocument)client.query(
					new SolrQuery().setRequestHandler("/get").set(SearchFields.ID_FIELD_NAME, id)).getResponse().get(
					"doc");
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
		return (doc != null) ? new SolrSearchDocument(doc) : null;
	}

	@Override
	protected Iterable getDocuments(String resourceId)
		throws IOException
	{
		SolrQuery query = new SolrQuery(termQuery(SearchFields.URI_FIELD_NAME, resourceId));
		SolrDocumentList docs;
		try {
			docs = getDocuments(query);
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
		return Iterables.transform(docs, new Function() {

			@Override
			public SearchDocument apply(SolrDocument hit) {
				return new SolrSearchDocument(hit);
			}
		});
	}

	@Override
	protected SearchDocument newDocument(String id, String resourceId, String context) {
		return new SolrSearchDocument(id, resourceId, context);
	}

	@Override
	protected SearchDocument copyDocument(SearchDocument doc) {
		SolrDocument document = ((SolrSearchDocument)doc).getDocument();
		SolrDocument newDocument = new SolrDocument();
		newDocument.putAll(document);
		return new SolrSearchDocument(newDocument);
	}

	@Override
	protected void addDocument(SearchDocument doc)
		throws IOException
	{
		SolrDocument document = ((SolrSearchDocument)doc).getDocument();
		try {
			client.add(ClientUtils.toSolrInputDocument(document));
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
	}

	@Override
	protected void updateDocument(SearchDocument doc)
		throws IOException
	{
		addDocument(doc);
	}

	@Override
	protected void deleteDocument(SearchDocument doc)
		throws IOException
	{
		try {
			client.deleteById(doc.getId());
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
	}

	@Override
	protected BulkUpdater newBulkUpdate() {
		return new SolrBulkUpdater(client);
	}

	static String termQuery(String field, String value) {
		return field + ":\"" + value + "\"";
	}

	/**
	 * Returns a list of Documents representing the specified Resource (empty
	 * when no such Document exists yet). Each document represent a set of
	 * statements with the specified Resource as a subject, which are stored in a
	 * specific context
	 */
	private SolrDocumentList getDocuments(SolrQuery query)
		throws SolrServerException, IOException
	{
		return search(query).getResults();
	}

	/**
	 * Returns a Document representing the specified Resource & Context
	 * combination, or null when no such Document exists yet.
	 */
	public SearchDocument getDocument(Resource subject, Resource context)
		throws IOException
	{
		// fetch the Document representing this Resource
		String resourceId = SearchFields.getResourceID(subject);
		String contextId = SearchFields.getContextID(context);
		return getDocument(SearchFields.formIdString(resourceId, contextId));
	}

	/**
	 * Returns a list of Documents representing the specified Resource (empty
	 * when no such Document exists yet). Each document represent a set of
	 * statements with the specified Resource as a subject, which are stored in a
	 * specific context
	 */
	public Iterable getDocuments(Resource subject)
		throws IOException
	{
		String resourceId = SearchFields.getResourceID(subject);
		return getDocuments(resourceId);
	}

	/**
	 * Filters the given list of fields, retaining all property fields.
	 */
	public static Set getPropertyFields(Set fields) {
		Set result = new HashSet(fields.size());
		for (String field : fields) {
			if (SearchFields.isPropertyField(field))
				result.add(field);
		}
		return result;
	}

	@Override
	public void begin()
		throws IOException
	{
	}

	@Override
	public void commit()
		throws IOException
	{
		try {
			client.commit();
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
	}

	@Override
	public void rollback()
		throws IOException
	{
		try {
			client.rollback();
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
	}

	@Override
	public void beginReading()
		throws IOException
	{
	}

	@Override
	public void endReading()
		throws IOException
	{
	}

	// //////////////////////////////// Methods for querying the index

	/**
	 * Parse the passed query.
	 * To be removed, no longer used.
	 * @param query
	 *        string
	 * @return the parsed query
	 * @throws ParseException
	 *         when the parsing brakes
	 */
	@Override
	@Deprecated
	protected SearchQuery parseQuery(String query, URI propertyURI) throws MalformedQueryException
	{
		SolrQuery q = prepareQuery(propertyURI, new SolrQuery(query));
		return new SolrSearchQuery(q, this);
	}

	/**
	 * Parse the passed query.
	 * 
	 * @param query
	 *        string
	 * @return the parsed query
	 * @throws ParseException
	 *         when the parsing brakes
	 */
	@Override
	protected Iterable query(Resource subject, String query, URI propertyURI,
			boolean highlight)
		throws MalformedQueryException, IOException
	{
		SolrQuery q = prepareQuery(propertyURI, new SolrQuery(query));
		if (highlight) {
			q.setHighlight(true);
			String field = (propertyURI != null) ? SearchFields.getPropertyField(propertyURI) : "*";
			q.addHighlightField(field);
			q.setHighlightSimplePre(SearchFields.HIGHLIGHTER_PRE_TAG);
			q.setHighlightSimplePost(SearchFields.HIGHLIGHTER_POST_TAG);
			q.setHighlightSnippets(2);
		}

		QueryResponse response;
		if (q.getHighlight()) {
			q.addField("*");
		}
		else {
			q.addField(SearchFields.URI_FIELD_NAME);
		}
		q.addField("score");
		try {
			if (subject != null) {
				response = search(subject, q);
			}
			else {
				response = search(q);
			}
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
		SolrDocumentList results = response.getResults();
		final Map>> highlighting = response.getHighlighting();
		return Iterables.transform(results, new Function() {

			@Override
			public DocumentScore apply(SolrDocument document) {
				SolrSearchDocument doc = new SolrSearchDocument(document);
				Map> docHighlighting = (highlighting != null) ? highlighting.get(doc.getId())
						: null;
				return new SolrDocumentScore(doc, docHighlighting);
			}
		});
	}

	// /**
	// * Parses an id-string used for a context filed (a serialized resource)
	// back to a resource.
	// * CAN RETURN NULL
	// * Inverse method of {@link #getResourceID(Resource)}
	// * @param idString
	// * @return null if the passed idString was the {@link #CONTEXT_NULL}
	// constant
	// */
	// private Resource getContextResource(String idString) {
	// if (CONTEXT_NULL.equals(idString))
	// return null;
	// else
	// return getResource(idString);
	// }

	/**
	 * Evaluates the given query only for the given resource.
	 * 
	 * @throws SolrServerException
	 */
	public QueryResponse search(Resource resource, SolrQuery query)
		throws SolrServerException, IOException
	{
		// rewrite the query
		String idQuery = termQuery(SearchFields.URI_FIELD_NAME, SearchFields.getResourceID(resource));
		query.setQuery(query.getQuery() + " AND " + idQuery);
		return search(query);
	}

	@Override
	protected Iterable geoQuery(URI geoProperty, Point p,
			final URI units, double distance, String distanceVar, Var contextVar)
		throws MalformedQueryException, IOException
	{
		double kms = GeoUnits.toKilometres(distance, units);

		String qstr = "{!geofilt score=recipDistance}";
		if(contextVar != null) {
			Resource ctx = (Resource) contextVar.getValue();
			String tq = termQuery(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx));
			if(ctx != null) {
				qstr = tq + " AND " + qstr;
			}
			else {
				qstr = "-" + tq + " AND " +qstr;
			}
		}
		SolrQuery q = new SolrQuery(qstr);
		q.set(SpatialParams.FIELD, SearchFields.getPropertyField(geoProperty));
		q.set(SpatialParams.POINT, p.getY() + "," + p.getX());
		q.set(SpatialParams.DISTANCE, Double.toString(kms));
		q.addField(SearchFields.URI_FIELD_NAME);
		// ':' is part of the fl parameter syntax so we can't use the full
		// property field name
		// instead we use wildcard + local part of the property URI
		q.addField("*" + geoProperty.getLocalName());
		// always include the distance - needed for sanity checking
		q.addField(DISTANCE_FIELD + ":geodist()");
		boolean requireContext = (contextVar != null && !contextVar.hasValue());
		if(requireContext) {
			q.addField(SearchFields.CONTEXT_FIELD_NAME);
		}

		QueryResponse response;
		try {
			response = search(q);
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}

		SolrDocumentList results = response.getResults();
		return Iterables.transform(results, new Function() {

			@Override
			public DocumentDistance apply(SolrDocument document) {
				SolrSearchDocument doc = new SolrSearchDocument(document);
				return new SolrDocumentDistance(doc, units);
			}
		});
	}

	@Override
	protected Iterable geoRelationQuery(String relation,
			URI geoProperty, Shape shape, Var contextVar)
		throws MalformedQueryException, IOException
	{
		String spatialOp = toSpatialOp(relation);
		if(spatialOp == null) {
			return null;
		}
		String wkt = toWkt(shape);
		String qstr = "\""+spatialOp+"("+wkt+")\"";
		if(contextVar != null) {
			Resource ctx = (Resource) contextVar.getValue();
			String tq = termQuery(SearchFields.CONTEXT_FIELD_NAME, SearchFields.getContextID(ctx));
			if(ctx != null) {
				qstr = tq + " AND " + qstr;
			}
			else {
				qstr = "-" + tq + " AND " +qstr;
			}
		}
		SolrQuery q = new SolrQuery(qstr);
		q.set(CommonParams.DF, SearchFields.getPropertyField(geoProperty));
		q.addField(SearchFields.URI_FIELD_NAME);
		// ':' is part of the fl parameter syntax so we can't use the full
		// property field name
		// instead we use wildcard + local part of the property URI
		q.addField("*" + geoProperty.getLocalName());
		boolean requireContext = (contextVar != null && !contextVar.hasValue());
		if(requireContext) {
			q.addField(SearchFields.CONTEXT_FIELD_NAME);
		}

		QueryResponse response;
		try {
			response = search(q);
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}

		SolrDocumentList results = response.getResults();
		return Iterables.transform(results, new Function() {

			@Override
			public DocumentResult apply(SolrDocument document) {
				SolrSearchDocument doc = new SolrSearchDocument(document);
				return new SolrDocumentResult(doc);
			}
		});
	}

	private String toSpatialOp(String relation) {
		if(GEOF.SF_INTERSECTS.stringValue().equals(relation)) {
			return "Intersects";
		}
		if(GEOF.SF_DISJOINT.stringValue().equals(relation)) {
			return "IsDisjointTo";
		}
		if(GEOF.EH_COVERED_BY.stringValue().equals(relation)) {
			return "IsWithin";
		}
		return null;
	}

	@Override
	protected Shape parseQueryShape(String property, String value) throws ParseException {
		Shape s = super.parseQueryShape(property, value);
		// workaround to preserve WKT string
		return (s instanceof Point) ? new WktPoint((Point)s, value) : new WktShape(s, value);
	}

	protected String toWkt(Shape s) {
		return ((WktShape)s).wkt;
	}

	private static class WktShape implements Shape {
		final S s;
		final String wkt;

		WktShape(S s, String wkt) {
			this.s = s;
			this.wkt = wkt;
		}

		@Override
		public SpatialRelation relate(Shape other) {
			return s.relate(other);
		}

		@Override
		public Rectangle getBoundingBox() {
			return s.getBoundingBox();
		}

		@Override
		public boolean hasArea() {
			return s.hasArea();
		}

		@Override
		public double getArea(SpatialContext ctx) {
			return s.getArea(ctx);
		}

		@Override
		public Point getCenter() {
			return s.getCenter();
		}

		@Override
		public Shape getBuffered(double distance, SpatialContext ctx) {
			return s.getBuffered(distance, ctx);
		}

		@Override
		public boolean isEmpty() {
			return s.isEmpty();
		}

		@Override
		public boolean equals(Object other) {
			return s.equals(other);
		}
	}

	private static class WktPoint extends WktShape implements Point {
		WktPoint(Point p, String wkt) {
			super(p, wkt);
		}

		@Override
		public void reset(double x, double y) {
			s.reset(x, y);
		}

		@Override
		public double getX() {
			return s.getX();
		}

		@Override
		public double getY() {
			return s.getY();
		}
	
	}

	/**
	 * Evaluates the given query and returns the results as a TopDocs instance.
	 * 
	 * @throws SolrServerException
	 */
	public QueryResponse search(SolrQuery query)
		throws SolrServerException, IOException
	{
		int nDocs;
		if (maxDocs > 0) {
			nDocs = maxDocs;
		}
		else {
			long docCount = client.query(query.setRows(0)).getResults().getNumFound();
			nDocs = Math.max((int)Math.min(docCount, Integer.MAX_VALUE), 1);
		}
		return client.query(query.setRows(nDocs));
	}

	private SolrQuery prepareQuery(URI propertyURI, SolrQuery query) {
		// check out which query parser to use, based on the given property URI
		if (propertyURI == null)
			// if we have no property given, we create a default query parser which
			// has the TEXT_FIELD_NAME as the default field
			query.set(CommonParams.DF, SearchFields.TEXT_FIELD_NAME);
		else
			// otherwise we create a query parser that has the given property as
			// the default field
			query.set(CommonParams.DF, SearchFields.getPropertyField(propertyURI));
		return query;
	}

	/**
	 * @param contexts
	 * @param sail
	 *        - the underlying native sail where to read the missing triples from
	 *        after deletion
	 * @throws SailException
	 */
	@Override
	public synchronized void clearContexts(Resource... contexts)
		throws IOException
	{

		// logger.warn("Clearing contexts operation did not change the index: contexts are not indexed at the moment");

		logger.debug("deleting contexts: {}", Arrays.toString(contexts));
		// these resources have to be read from the underlying rdf store
		// and their triples have to be added to the luceneindex after deletion of
		// documents
		// HashSet resourcesToUpdate = new HashSet();

		try {
			// remove all contexts passed
			for (Resource context : contexts) {
				// attention: context can be NULL!
				String contextString = SearchFields.getContextID(context);
				client.deleteByQuery(termQuery(SearchFields.CONTEXT_FIELD_NAME, contextString));
			}
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
	}

	/**
	 * 
	 */
	@Override
	public synchronized void clear()
		throws IOException
	{
		try {
			client.deleteByQuery("*:*");
		}
		catch (SolrServerException e) {
			throw new IOException(e);
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy