All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dataconservancy.pass.client.elasticsearch.ElasticsearchPassClient Maven / Gradle / Ivy

/*
 * Copyright 2018 Johns Hopkins University
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.dataconservancy.pass.client.elasticsearch;

import static java.lang.String.format;
import static java.lang.String.join;
import static java.util.stream.Collectors.toList;

import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.apache.http.HttpHost;

import org.dataconservancy.pass.model.PassEntity;
import org.dataconservancy.pass.model.PassEntityType;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Communicates with elasticsearch
 * @author Karen Hanson
 */
public class ElasticsearchPassClient {


    private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchPassClient.class);
    
    /**
     * Template for a search attribute e.g. AND fldname:"something"
     */
    private static final String QS_ATTRIB_TEMPLATE  = "AND %s:\"%s\"";
    
    /**
     * Template for a query string e.g. (@type:Submission AND fldname:"something")
     * where the second %s could be one or more QS_ATTRIB_TEMPLATES
     */
    private static final String QS_TEMPLATE = "(@type:%s %s)";

    private static final String EXISTS_TEMPLATE = "_exists_:%s";

    private static final String NOT_EXISTS_TEMPLATE = "-" + EXISTS_TEMPLATE;

    private static final String QS_ATTRIB_NOT_EXISTS_TEMPLATE = "AND " + NOT_EXISTS_TEMPLATE;
    
    private static final String ID_FIELDNAME = "@id";

    /**
     * URL(s) of indexer
     */
    private final HttpHost[] hosts;
        
    /** 
     * Default constructor for PASS client
     */
    public ElasticsearchPassClient() {
        Set indexerUrls = ElasticsearchConfig.getIndexerHostUrl();      
        hosts = new HttpHost[indexerUrls.size()];
        int count = 0;
        for (URL url : indexerUrls) {
            LOG.info("Connecting to index at {}", url);
            hosts[count] = new HttpHost(url.getHost(), url.getPort(), url.getProtocol());
            count = count+1;
        }
        
    }
    
    /**
     * @see org.dataconservancy.pass.client.PassClient#findByAttribute(Class, String, Object)
     * 
     * @param modelClass modelClass
     * @param attribute attribute
     * @param value value
     * @return URI
     * @param  PASS entity type
     */
    public  URI findByAttribute(Class modelClass, String attribute, Object value) {
        validateModelParam(modelClass);
        validateAttribValParams(attribute, value, true);
                
        String indexType = null;
        
        if (PassEntityType.getTypeByName(modelClass.getSimpleName())!=null) {
            indexType = PassEntityType.getTypeByName(modelClass.getSimpleName()).getName();
        }

        String attribs = null;
        if (value != null) {
            attribs = String.format(QS_ATTRIB_TEMPLATE, attribute, value.toString());
        } else {
            attribs = String.format(QS_ATTRIB_NOT_EXISTS_TEMPLATE, attribute);
        }
        String querystring = String.format(QS_TEMPLATE, indexType, attribs);
             
        Set passEntityUris = getIndexerResults(querystring, 2, 0); //get 2 so we can check only one result matched
        if (passEntityUris.size()>1) {
            throw new RuntimeException(
                    format("More than one results was returned by this query (%s = %s). " + 
                            "findByAttribute() searches should match only one result.  Instead found:\n %s", 
                            attribute, value, 
                            join("\n", passEntityUris.stream().map(URI::toString).collect(toList()))));
        }
        URI passEntityUri = null;
        if (passEntityUris.size()>0) {
            passEntityUri = passEntityUris.iterator().next();
        }
        return passEntityUri;
    }

        
    /**
     * @see org.dataconservancy.pass.client.PassClient#findAllByAttribute(Class, String, Object)
     * 
     * @param modelClass modelClass
     * @param attribute attribute
     * @param value value
     * @return Set of URI
     * @param  PASS entity type
     */
    public  Set findAllByAttribute(Class modelClass, String attribute, Object value) {
        return findAllByAttribute(modelClass, attribute, value, ElasticsearchConfig.getIndexerLimit(), 0);
    }
    
        
    /**
     * @see org.dataconservancy.pass.client.PassClient#findAllByAttribute(Class, String, Object, int, int)
     * 
     * @param modelClass modelClass
     * @param attribute attribute
     * @param value value
     * @param limit limit
     * @param offset offset
     * @return Set of URI
     * @param  PASS entity type
     */
    public  Set findAllByAttribute(Class modelClass, String attribute, Object value, int limit, int offset) {
        validateModelParam(modelClass);
        validateAttribValParams(attribute, value, true);
        validLimitOffsetParams(limit, offset);
                
        String indexType = null;
        
        if (PassEntityType.getTypeByName(modelClass.getSimpleName())!=null) {
            indexType = PassEntityType.getTypeByName(modelClass.getSimpleName()).getName();
        }

        String attribs = null;
        if (value != null) {
            attribs = String.format(QS_ATTRIB_TEMPLATE, attribute, value.toString());
        } else {
            attribs = String.format(NOT_EXISTS_TEMPLATE, attribute);
        }
        String querystring = String.format(QS_TEMPLATE, indexType, attribs);                
        Set passEntityUris = getIndexerResults(querystring, limit, offset);
        
        return passEntityUris;
    }

    
    /**
     *  @see org.dataconservancy.pass.client.PassClient#findAllByAttributes(Class, Map)
     *  
     * @param modelClass modelClass
     * @param valueAttributesMap valueAttributesMap
     * @return Set of URI
     * @param  PASS entity type
     */
    public  Set findAllByAttributes(Class modelClass, Map valueAttributesMap) {
        return findAllByAttributes(modelClass, valueAttributesMap, ElasticsearchConfig.getIndexerLimit(), 0);
    }
    
    
    /**
     * @see org.dataconservancy.pass.client.PassClient#findAllByAttributes(Class, Map, int, int)
     * 
     * @param modelClass modelClass
     * @param valueAttributesMap valueAttributesMap
     * @param limit limit
     * @param offset offset
     * @return Set of URI
     * @param  PASS entity type
     */
    public  Set findAllByAttributes(Class modelClass, Map valueAttributesMap, int limit, int offset) {
        validateModelParam(modelClass);
        validateAttribMapParam(valueAttributesMap);
        validLimitOffsetParams(limit, offset);
        
        LOG.debug("Searching for {} using multiple filters", modelClass.getSimpleName());
        
        String indexType = null;
        
        if (PassEntityType.getTypeByName(modelClass.getSimpleName())!=null) {
            indexType = PassEntityType.getTypeByName(modelClass.getSimpleName()).getName();
        }
        
        StringBuilder attribs = new StringBuilder("");
        for(Entry attr : valueAttributesMap.entrySet()) {
            if (attr.getValue() != null) {
                attribs.append(String.format(QS_ATTRIB_TEMPLATE, attr.getKey(), attr.getValue().toString()));
            } else {
                attribs.append(String.format(QS_ATTRIB_NOT_EXISTS_TEMPLATE, attr.getKey()));
            }
        }
        String querystring = String.format(QS_TEMPLATE, indexType, attribs);
                
        Set passEntityUris = getIndexerResults(querystring, limit, offset);
        return passEntityUris;
    }
    
    
    /**
     * Retrieve search results from elasticsearch
     * @param querystring
     * @param limit
     * @param offset
     * @return
     */
    private Set getIndexerResults(String querystring, int limit, int offset) {
        
        Set passEntityUris = new HashSet();
        
        try (RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(hosts))){
            
            SearchRequest searchRequest = new SearchRequest(); 
            SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); 
            sourceBuilder.from(offset);
            sourceBuilder.size(limit);

            LOG.debug("Searching index using querystring: {}, with limit {} and offset {}", querystring,  limit, offset);
            //(content:this OR name:this)
            QueryStringQueryBuilder matchQueryBuilder = new QueryStringQueryBuilder(querystring);
                        
            matchQueryBuilder.defaultOperator(Operator.AND);
            sourceBuilder.query(matchQueryBuilder);
            searchRequest.source(sourceBuilder);
            SearchResponse searchResponse = client.search(searchRequest);
            SearchHits hits = searchResponse.getHits();
            Iterator hitsIt = hits.iterator();
            
            while (hitsIt.hasNext()){
                String idField = hitsIt.next().getSourceAsMap().get(ID_FIELDNAME).toString();
                passEntityUris.add(new URI(idField));
            }
        } catch (URISyntaxException e) {
            throw new RuntimeException("Something was wrong with the record returned from the indexer. The ID could not be recognized as a URI", e);
        } catch (Exception e) {
            throw new RuntimeException(String.format("An error occurred while processing the query: %s", querystring), e);
        }
            
        return passEntityUris;
        
    }
    
    private  void validateAttribMapParam(Map valueAttributesMap) {
        if (valueAttributesMap==null || valueAttributesMap.size()==0) {throw new IllegalArgumentException("valueAttributesMap cannot be empty");}
        for (Entry entry : valueAttributesMap.entrySet()) {
            validateAttribValParams(entry.getKey(), entry.getValue(), true);
        }
    }
    
    private  void validateModelParam(Class modelClass) {
        if (modelClass==null) {throw new IllegalArgumentException("modelClass cannot be null");}
        if (modelClass==PassEntity.class) {throw new IllegalArgumentException("modelClass cannot be the abstract class 'PassEntity.class'");}
    }
    
    private void validLimitOffsetParams(int limit, int offset) {
        if (offset < 0) {throw new IllegalArgumentException("The offset value cannot be less than 0");}       
        if (limit < 0) {throw new IllegalArgumentException("The limit value cannot be less than 0");}        
    }
    
    private void validateAttribValParams(String attribute, Object value, boolean allowNullValues) {
        if (attribute==null || attribute.length()==0) {throw new IllegalArgumentException("attribute cannot be null or empty");}
        if (value instanceof Collection) {throw new IllegalArgumentException("Value for attribute " + attribute + " cannot be a Collection");}
        if (value==null && !allowNullValues) {throw new IllegalArgumentException("Value cannot be null or empty");}
    }
    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy