All Downloads are FREE. Search and download functionalities are using the official Maven repository.

at.newmedialab.ldclient.service.LDClient Maven / Gradle / Ivy

/*
 * Copyright (c) 2011 Salzburg Research.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package at.newmedialab.ldclient.service;

import at.newmedialab.ldclient.exception.LDClientException;
import at.newmedialab.ldclient.model.ClientResponse;
import at.newmedialab.ldclient.model.Endpoint;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.http.*;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpHead;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.impl.cookie.DateParseException;
import org.apache.http.impl.cookie.DateUtils;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.params.HttpParams;
import org.apache.http.protocol.HttpContext;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.query.BindingSet;
import org.openrdf.query.TupleQueryResultHandler;
import org.openrdf.query.TupleQueryResultHandlerException;
import org.openrdf.query.resultio.QueryResultIO;
import org.openrdf.query.resultio.QueryResultParseException;
import org.openrdf.query.resultio.TupleQueryResultFormat;
import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.event.InterceptingRepositoryConnection;
import org.openrdf.repository.event.base.InterceptingRepositoryConnectionWrapper;
import org.openrdf.repository.event.base.RepositoryConnectionInterceptorAdapter;
import org.openrdf.repository.sail.SailRepository;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParserRegistry;
import org.openrdf.sail.memory.MemoryStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.net.URLEncoder;
import java.util.Date;
import java.util.List;

/**
 * Add file description here!
 * 

* Author: Sebastian Schaffert */ public class LDClient { private Logger log = LoggerFactory.getLogger(LDClient.class); private HttpParams httpParams; private LDEndpoints ldEndpoints; private Configuration config; public LDClient() { log.info("Initialising Linked Data Client Service ..."); ldEndpoints = new LDEndpoints(); try { config = new PropertiesConfiguration("ldclient.properties"); } catch (ConfigurationException e) { log.warn("could not load configuration file ldclient.properties from current directory, home directory, or classpath"); } httpParams = new BasicHttpParams(); httpParams.setParameter(CoreProtocolPNames.USER_AGENT, "Salzburg NewMediaLab Linked Data Client"); httpParams.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, config.getInt("so_timeout", 60000)); httpParams.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, config.getInt("connection_timeout", 10000)); httpParams.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS,true); httpParams.setIntParameter(ClientPNames.MAX_REDIRECTS,3); } /** * Retrieve all triples for this resource from the Linked Data Cloud. Retrieval will be carried out according * to the endpoint definition that matches this resource. In case no endpoint definition is found, the method * will try an "default" Linked Data retrieval if the configuration option "ldcache.fallback" is set to true * * @param resource the URI resource for which to retrieve the triples * @return a Sesame in-memory repository containing the triples for this resource */ public ClientResponse retrieveResource(URI resource) throws LDClientException { HttpResponse response = null; try { Endpoint endpoint = ldEndpoints.getEndpoint(resource); if(endpoint != null && endpoint.getType() == Endpoint.EndpointType.NONE) { return null; } else if(endpoint != null) { response = retrieveFromEndpoint(resource,endpoint); return parseResponse(resource,response,endpoint); } else if(config.getBoolean("fallback",true)) { response = retrieveFromLDEndpoint(resource,null); return parseResponse(resource,response,endpoint); } } catch(TupleQueryResultHandlerException ex) { log.error("repository error while trying to retrieve resource "+resource.stringValue(),ex); throw new LDClientException("repository error while trying to retrieve resource "+resource.stringValue(),ex); } catch(QueryResultParseException ex) { log.error("SPARQL query result parsing error while trying to retrieve resource "+resource.stringValue(),ex); throw new LDClientException("SPARQL query result parsing error while trying to retrieve resource "+resource.stringValue(),ex); } catch(RDFParseException ex) { log.error("RDF parsing error while trying to retrieve resource "+resource.stringValue(),ex); throw new LDClientException("RDF parsing error while trying to retrieve resource "+resource.stringValue(),ex); } catch(RepositoryException ex) { log.error("repository error while trying to retrieve resource "+resource.stringValue(),ex); throw new LDClientException("repository error while trying to retrieve resource "+resource.stringValue(),ex); } catch(IOException ex) { log.error("I/O error while trying to retrieve resource {}: {}",resource.stringValue(),ex.getMessage()); throw new LDClientException("I/O error while trying to retrieve resource "+resource.stringValue(),ex); } finally { } return null; } private HttpResponse retrieveFromEndpoint(URI resource, Endpoint endpoint) throws IOException, LDClientException { switch (endpoint.getType()) { case SPARQL: return retrieveFromSparqlEndpoint(resource, endpoint); case CACHE: return retrieveFromCacheEndpoint(resource,endpoint); case LINKEDDATA: return retrieveFromLDEndpoint(resource,endpoint); } return null; } private HttpResponse retrieveFromCacheEndpoint(URI resource, Endpoint endpoint) throws IOException { // TODO: use last-modified header HttpClient httpClient = createClient(); String contentType = endpoint.getContentType(); HttpGet get = new HttpGet(endpoint.getEndpointUrl().replace("{uri}", URLEncoder.encode(resource.stringValue(), "UTF-8"))); get.setHeader("Accept",contentType); get.setHeader("Accept-Language", "*"); // PoolParty compatibility log.info("retrieving resource {} from cache endpoint; request URI is {}", resource.stringValue(), get.getURI()); return httpClient.execute(get); } private HttpResponse retrieveFromLDEndpoint(URI resource, Endpoint endpoint) throws IOException, LDClientException { // TODO: use last-modified header HttpClient httpClient = createClient(); String contentType = "application/rdf+xml, text/rdf+n3;q=0.8, text/turtle;q=0.6"; if(endpoint != null) { contentType = endpoint.getContentType(); } HttpGet get = new HttpGet(resource.stringValue()); get.setHeader("Accept", contentType); get.setHeader("Accept-Language", "*"); // PoolParty compatibility log.info("retrieving resource {} as Linked Data; request URI is {}",resource.stringValue(),get.getURI()); HttpResponse result = httpClient.execute(get); if(result.getEntity() == null || RDFParserRegistry.getInstance().getFileFormatForMIMEType(result.getEntity().getContentType().getValue().split(";")[0]) == null) { get.abort(); if(result.getEntity() != null) { throw new LDClientException("invalid content returned by Linked Data resource "+resource.stringValue()+": "+result.getEntity().getContentType().getValue()); } else { throw new LDClientException("no content returned by Linked Data resource "+resource.stringValue()); } } return result; } private HttpResponse retrieveFromSparqlEndpoint(URI resource, Endpoint endpoint) throws IOException, LDClientException { String contentType = "application/sparql-results+xml"; if(endpoint.getContentType() != null) { contentType = endpoint.getContentType(); } String query = "SELECT ?p ?o WHERE { <{uri}> ?p ?o }"; String url = endpoint.getEndpointUrl() .replace("{query}", URLEncoder.encode(query.replace("{uri}",resource.stringValue()),"UTF-8")) .replace("{contenttype}", URLEncoder.encode(contentType,"UTF-8")); HttpClient httpClient = createClient(); HttpGet get = new HttpGet(url); get.setHeader("Accept",contentType); get.setHeader("Accept-Language", "*"); // PoolParty compatibility log.info("retrieving resource {} from SPARQL endpoint; request URI is {}", resource.stringValue(), get.getURI()); return httpClient.execute(get); } private ClientResponse parseResponse(URI resource, HttpResponse response, Endpoint endpoint) throws LDClientException, IOException, RepositoryException, RDFParseException, QueryResultParseException, TupleQueryResultHandlerException { if(response.getStatusLine().getStatusCode() >= 200 && response.getStatusLine().getStatusCode() < 400) { HttpEntity entity = response.getEntity(); if(entity != null) { String contentType = "application/rdf+xml"; long defaultExpires = config.getInt("expiry", 86400); if(endpoint != null && endpoint.getContentType() != null) { contentType = endpoint.getContentType(); } else if(endpoint != null && endpoint.getType() == Endpoint.EndpointType.SPARQL) { contentType = "application/sparql-results+xml"; } else if(entity.getContentType() != null) { contentType = entity.getContentType().getValue().split(";")[0]; } if(endpoint != null && endpoint.getDefaultExpiry() != null) { defaultExpires = endpoint.getDefaultExpiry(); } InputStream in = entity.getContent(); try { Repository triples; if(endpoint != null) { switch (endpoint.getType()) { case SPARQL: triples = parseSparqlResponse(resource,in,contentType); break; default: triples = parseRDFResponse(resource,in,contentType); } } else { triples = parseRDFResponse(resource,in,contentType); } ClientResponse result = new ClientResponse(triples); Header expires = response.getFirstHeader("Expires"); if(expires != null) { try { Date expiresDate = DateUtils.parseDate(expires.getValue()); result.setExpires(expiresDate); } catch (DateParseException e) { log.warn("could not parse Expires: header, using default expiry settings"); Date expiresDate = new Date(System.currentTimeMillis()+defaultExpires*1000); result.setExpires(expiresDate); } } else { Date expiresDate = new Date(System.currentTimeMillis()+defaultExpires*1000); result.setExpires(expiresDate); } if(log.isInfoEnabled()) { RepositoryConnection con = triples.getConnection(); log.info("retrieved {} triples for resource {}; expiry date: {}",new Object[] {con.size(),resource.stringValue(),result.getExpires()}); con.close(); } return result; } finally { in.close(); } } else { throw new IOException("the HTTP request did not contain any data"); } } else { log.error("the HTTP request failed (status: {})",response.getStatusLine()); throw new LDClientException("the HTTP request failed (status: "+response.getStatusLine()+")"); } } private Repository parseRDFResponse(final URI resource, InputStream in, String contentType) throws RepositoryException, IOException, RDFParseException { RDFFormat format = RDFParserRegistry.getInstance().getFileFormatForMIMEType(contentType, RDFFormat.RDFXML); Repository triples = new SailRepository(new MemoryStore()); triples.initialize(); InterceptingRepositoryConnection con = new InterceptingRepositoryConnectionWrapper(triples,triples.getConnection()); con.addRepositoryConnectionInterceptor(new RepositoryConnectionInterceptorAdapter() { @Override public boolean add(RepositoryConnection conn, Resource s, org.openrdf.model.URI p, Value o, Resource... contexts) { if(s instanceof org.openrdf.model.URI) { // if s is a URI and subject a KiWiUriResource, return true if they are different return !((org.openrdf.model.URI)s).stringValue().equals(resource.stringValue()); } else { // in all other cases, return true to filter out the triple return true; } }; @Override public boolean remove(RepositoryConnection conn, Resource s, org.openrdf.model.URI p, Value o, Resource... contexts) { if(s instanceof org.openrdf.model.URI) { // if s is a URI and subject a KiWiUriResource, return true if they are different return !((org.openrdf.model.URI)s).stringValue().equals(resource.stringValue()); } else { // in all other cases, return true to filter out the triple return true; } } }); con.add(in,resource.stringValue(),format); con.commit(); con.close(); return triples; } private Repository parseSparqlResponse(final URI resource, InputStream in, String contentType) throws RepositoryException, IOException, QueryResultParseException, TupleQueryResultHandlerException { TupleQueryResultFormat format = QueryResultIO.getParserFormatForMIMEType(contentType, TupleQueryResultFormat.SPARQL); final Repository triples = new SailRepository(new MemoryStore()); triples.initialize(); QueryResultIO.parse(in,format, new TupleQueryResultHandler() { RepositoryConnection con; URI subject; @Override public void startQueryResult(List bindingNames) throws TupleQueryResultHandlerException { subject = triples.getValueFactory().createURI(resource.stringValue()); try { con = triples.getConnection(); } catch (RepositoryException e) { throw new TupleQueryResultHandlerException("error while creating repository connection",e); } } @Override public void endQueryResult() throws TupleQueryResultHandlerException { try { con.commit(); con.close(); } catch (RepositoryException e) { throw new TupleQueryResultHandlerException("error while closing repository connection",e); } } @Override public void handleSolution(BindingSet bindingSet) throws TupleQueryResultHandlerException { try { Value predicate = bindingSet.getValue("p"); Value object = bindingSet.getValue("o"); if(predicate instanceof URI) { con.add(triples.getValueFactory().createStatement(subject,(URI)predicate,object)); } else { log.error("ignoring binding as predicate {} is not a URI",predicate); } } catch (RepositoryException e) { throw new TupleQueryResultHandlerException("error while adding triple to repository connection",e); } } }, triples.getValueFactory()); return triples; } private HttpClient createClient() { DefaultHttpClient client = new DefaultHttpClient(httpParams); client.setRedirectStrategy(new LMFRedirectStrategy()); client.setHttpRequestRetryHandler(new LMFHttpRequestRetryHandler()); return client; } private class LMFRedirectStrategy extends DefaultRedirectStrategy { @Override public boolean isRedirected(HttpRequest request, HttpResponse response, HttpContext context) throws ProtocolException { if (response == null) { throw new IllegalArgumentException("HTTP response may not be null"); } int statusCode = response.getStatusLine().getStatusCode(); String method = request.getRequestLine().getMethod(); Header locationHeader = response.getFirstHeader("location"); switch (statusCode) { case HttpStatus.SC_MOVED_TEMPORARILY: return (method.equalsIgnoreCase(HttpGet.METHOD_NAME) || method.equalsIgnoreCase(HttpHead.METHOD_NAME)) && locationHeader != null; case HttpStatus.SC_MOVED_PERMANENTLY: case HttpStatus.SC_TEMPORARY_REDIRECT: return method.equalsIgnoreCase(HttpGet.METHOD_NAME) || method.equalsIgnoreCase(HttpHead.METHOD_NAME); case HttpStatus.SC_SEE_OTHER: return true; case HttpStatus.SC_MULTIPLE_CHOICES: return true; default: return false; } //end of switch } } private class LMFHttpRequestRetryHandler implements HttpRequestRetryHandler { /** * Determines if a method should be retried after an IOException * occurs during execution. * * @param exception the exception that occurred * @param executionCount the number of times this method has been * unsuccessfully executed * @param context the context for the request execution * @return true if the method should be retried, false * otherwise */ @Override public boolean retryRequest(IOException exception, int executionCount, HttpContext context) { return false; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy