All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.sakaiproject.search.elasticsearch.SiteElasticSearchIndexBuilder Maven / Gradle / Ivy

There is a newer version: 23.2
Show newest version
/**********************************************************************************
 * $URL$
 * $Id$
 ***********************************************************************************
 *
 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008 The Sakai Foundation
 *
 * Licensed under the Educational Community License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *       http://www.osedu.org/licenses/ECL-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 **********************************************************************************/

package org.sakaiproject.search.elasticsearch;

import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsQuery;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TimerTask;
import java.util.stream.Collectors;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.xcontent.XContentBuilder;
import org.sakaiproject.entity.api.Entity;
import org.sakaiproject.event.api.Event;
import org.sakaiproject.exception.IdUnusedException;
import org.sakaiproject.search.api.EntityContentProducer;
import org.sakaiproject.search.api.SearchService;
import org.sakaiproject.search.api.SiteSearchIndexBuilder;
import org.sakaiproject.search.model.SearchBuilderItem;
import org.sakaiproject.site.api.Site;
import org.sakaiproject.site.api.SiteService;
import org.sakaiproject.site.api.ToolConfiguration;
import org.sakaiproject.user.api.User;
import org.sakaiproject.user.api.UserDirectoryService;
import org.slf4j.Logger;

import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class SiteElasticSearchIndexBuilder extends BaseElasticSearchIndexBuilder implements SiteSearchIndexBuilder {

    protected static final String SEARCH_TOOL_ID = "sakai.search";
    protected static final String SAKAI_DOC_TYPE = "_doc";

    protected static final String ADD_RESOURCE_VALIDATION_KEY_SITE_ID = "SITE_ID";
    protected static final String DELETE_RESOURCE_KEY_SITE_ID = "SITE_ID";

    private SiteService siteService;
    private UserDirectoryService userDirectoryService;

    private boolean useSiteFilters = false;

    /**
     * set to false if you want to index all content, not just sites that have the search tool placed
     */
    private boolean onlyIndexSearchToolSites = true;

    /**
     * set to false to include user site content in index
     */
    private boolean excludeUserSites = true;

    /**
     * comma separated list of sites to always ignore when indexing.  Defaults to ~admin, !admin, PortfolioAdmin
     * use injection to set this value.
     */
    private String ignoredSites = null;

    /**
     * parsed list of ignoredSites configuration. If you wish to change this use the ignoredSites field not this
     * one to avoid having to change Spring xml
     */
    private List ignoredSitesList = new ArrayList<>();

    public void init() {
        onlyIndexSearchToolSites = serverConfigurationService.getBoolean("search.onlyIndexSearchToolSites", true);
    }

    @Override
    protected void beforeElasticSearchConfigInitialization() {
        if (StringUtils.isEmpty(this.indexedDocumentType)) {
            this.indexedDocumentType = SAKAI_DOC_TYPE;
        }
        if (ArrayUtils.isEmpty(this.suggestionResultFieldNames)) {
            this.suggestionResultFieldNames = new String[] {
                    SearchService.FIELD_TYPE,
                    SearchService.FIELD_REFERENCE,
                    SearchService.FIELD_SITEID,
                    SearchService.FIELD_CREATOR_DISPLAY_NAME,
                    SearchService.FIELD_CREATOR_ID,
                    SearchService.FIELD_CREATOR_USER_NAME,
                    SearchService.FIELD_TITLE
            };
        }
        if ( ArrayUtils.isEmpty(this.searchResultFieldNames)) {
            this.searchResultFieldNames = new String[] {
                    SearchService.FIELD_TYPE,
                    SearchService.FIELD_REFERENCE,
                    SearchService.FIELD_SITEID,
                    SearchService.FIELD_CREATOR_DISPLAY_NAME,
                    SearchService.FIELD_CREATOR_ID,
                    SearchService.FIELD_CREATOR_USER_NAME,
                    SearchService.FIELD_TITLE,
                    SearchService.FIELD_URL,
                    SearchService.FIELD_TOOL
            };
        }
    }

    @Override
    protected void beforeBackgroundSchedulerInitialization() {
        if (ignoredSites != null) {
            ignoredSitesList = Arrays.asList(ignoredSites.split(","));
        } else {
            ignoredSitesList.add("~admin");
            ignoredSitesList.add("!admin");
            ignoredSitesList.add("PortfolioAdmin");
        }
    }

    @Override
    protected void completeAddResourceEventValidations(Event event, Map validationContext)
            throws IllegalArgumentException, IllegalStateException {
        final String resourceName = (String)validationContext.get(ADD_RESOURCE_VALIDATION_KEY_RESOURCE_NAME);
        final EntityContentProducer ecp = (EntityContentProducer)validationContext.get(ADD_RESOURCE_VALIDATION_KEY_CONTENT_PRODUCER);

        String siteId = ecp.getSiteId(resourceName);

        if (onlyIndexSearchToolSites) {
            try {
                Site s = siteService.getSite(siteId);
                ToolConfiguration t = s.getToolForCommonId(SEARCH_TOOL_ID);
                if (t == null) {
                    throw new IllegalArgumentException("Resource name [" + resourceName + "] for event [" + event
                            + "] not indexable because it is not associated with a site that has the search tool");
                }
            } catch (Exception ex) {
                throw new IllegalArgumentException("Event [" + event
                        + "] not indexable because it is not associated with a site");
            }
        }
        validationContext.put(ADD_RESOURCE_VALIDATION_KEY_SITE_ID, siteId);
    }

    @Override
    protected Map extractDeleteDocumentParams(Map validationContext) {
        Map params = super.extractDeleteDocumentParams(validationContext);
        params.put(DELETE_RESOURCE_KEY_SITE_ID, validationContext.get(ADD_RESOURCE_VALIDATION_KEY_SITE_ID));
        return params;
    }

    @Override
    protected Map extractDeleteDocumentParams(NoContentException noContentException) {
        Map params = super.extractDeleteDocumentParams(noContentException);
        params.put(DELETE_RESOURCE_KEY_SITE_ID, noContentException.getSiteId());
        return params;
    }

    @Override
    protected Map extractDeleteDocumentParams(SearchHit searchHit) {
        String siteId = getFieldFromSearchHit(SearchService.FIELD_SITEID, searchHit);
        final Map params = super.extractDeleteDocumentParams(searchHit);
        params.put(DELETE_RESOURCE_KEY_SITE_ID, siteId);
        return params;
    }

    @Override
    protected DeleteRequest completeDeleteRequest(DeleteRequest deleteRequest, Map deleteParams) {
        return deleteRequest.routing((String)deleteParams.get(DELETE_RESOURCE_KEY_SITE_ID));
    }

    protected void deleteDocument(String id, String siteId) {
        final Map params = Maps.newHashMap();
        params.put(DELETE_RESOURCE_KEY_DOCUMENT_ID, id);
        params.put(DELETE_RESOURCE_KEY_SITE_ID, siteId);
        deleteDocumentWithParams(params);
    }

    @Override
    protected XContentBuilder addFields(XContentBuilder contentSourceBuilder, String resourceName,
                                        EntityContentProducer ecp, boolean includeContent) throws IOException {
        return contentSourceBuilder.field(SearchService.FIELD_SITEID, ecp.getSiteId(resourceName))
                .field(SearchService.FIELD_CREATOR_DISPLAY_NAME, ecp.getCreatorDisplayName(resourceName))
                .field(SearchService.FIELD_CREATOR_ID, ecp.getCreatorId(resourceName))
                .field(SearchService.FIELD_CREATOR_USER_NAME, ecp.getCreatorUserName(resourceName))
                .field(SearchService.FIELD_TITLE, ecp.getTitle(resourceName))
                .field(SearchService.FIELD_REFERENCE, resourceName)
                .field(SearchService.FIELD_URL, ecp.getUrl(resourceName, Entity.UrlType.PORTAL))
                //.field(SearchService.FIELD_ID, ecp.getId(resourceName))
                .field(SearchService.FIELD_TOOL, ecp.getTool())
                .field(SearchService.FIELD_CONTAINER, ecp.getContainer(resourceName))
                .field(SearchService.FIELD_TYPE, ecp.getType(resourceName));
                //.field(SearchService.FIELD_SUBTYPE, ecp.getSubType(resourceName));
    }

    @Override
    protected XContentBuilder noContentForIndexRequest(XContentBuilder contentSourceBuilder, String resourceName,
                                                       EntityContentProducer ecp, boolean includeContent)
            throws NoContentException {
        throw new NoContentException(ecp.getId(resourceName), resourceName, ecp.getSiteId(resourceName));
    }

    @Override
    protected void noContentProducerForContentQueueEntry(SearchHit hit, String reference) throws NoContentException {
        final String siteId = getFieldFromSearchHit(SearchService.FIELD_SITEID, hit);
        throw new NoContentException(hit.getId(), reference, siteId);
    }

    @Override
    protected SearchRequest completeFindContentQueueRequest(SearchRequest searchRequest) {
        return searchRequest;
    }

    protected void rebuildSiteIndex(String siteId)  {
        getLog().info("Rebuilding the index for '{}'", siteId);

        try {
            enableAzgSecurityAdvisor();
            deleteAllDocumentForSite(siteId);

            long start = System.currentTimeMillis();
            int numberOfDocs = 0;

            BulkRequest bulkRequest = new BulkRequest();

            for (final EntityContentProducer ecp : producers) {
            	Iterator i = ecp.getSiteContentIterator(siteId);

                while ( i != null && i.hasNext() ) {

                    if (bulkRequest.numberOfActions() < bulkRequestSize) {
                        String reference = i.next();

                        if (StringUtils.isNotBlank(ecp.getContent(reference))) {
                            //updating was causing issues without a _source, so doing delete and re-add
                            try {
                                deleteDocument(ecp.getId(reference), ecp.getSiteId(reference));
                                bulkRequest.add(prepareIndex(reference, ecp, true));
                                numberOfDocs++;
                            } catch (Exception e) {
                                getLog().error(e.getMessage(), e);
                            }
                        }

                    } else {
                        executeBulkRequest(bulkRequest);
                        bulkRequest = new BulkRequest();
                    }
                }

                // execute any remaining bulks requests not executed yet
                if (bulkRequest.numberOfActions() > 0) {
                    executeBulkRequest(bulkRequest);
                }

            }

            getLog().info("Queued " + numberOfDocs + " docs for indexing from site: " + siteId + " in " + (System.currentTimeMillis() - start) + " ms");

        } catch (Exception e) {
            getLog().error("An exception occurred while rebuilding the index of '" + siteId + "'", e);
        } finally {
            disableAzgSecurityAdvisor();
        }
    }

    @Override
    protected void rebuildIndexImmediately() {
        // rebuild index
        for (Site s : siteService.getSites(SiteService.SelectionType.ANY, null, null, null, SiteService.SortType.NONE, null)) {
            if (isSiteIndexable(s)) {
                rebuildSiteIndex(s.getId());
            }
        }
    }

    protected class RebuildSiteTask extends TimerTask {
        private final String siteId;

        public RebuildSiteTask(String siteId) {
            this.siteId = siteId;
        }

        /**
         * Rebuild the index from the entities own stored state {@inheritDoc}, for just
         * the supplied siteId
         */
        public void run() {
            try {
                // let's not hog the whole CPU just in case you have lots of sites with lots of data this could take a bit
                Thread.currentThread().setPriority(Thread.NORM_PRIORITY - 1);
                rebuildSiteIndex(siteId);
            } catch (Exception e) {
                getLog().error("problem queuing content indexing for site: " + siteId + " error: " + e.getMessage());
            }
        }

    }

    /**
     * Check if a site is considered as indexable based on the current server configuration.
     * 

* Not indexable sites are: *

    *
  • Special sites
  • *
  • Sites without the search tool (if the option is enabled)
  • *
  • User sites (if the option is enabled)
  • *
  • Any sites included in the ignoreSitesList (~admin and !admin are the default ignored sites)
  • *
*

* * @param site site which may be indexable * @return true if the site can be index, false otherwise */ protected boolean isSiteIndexable(Site site) { getLog().debug("Check if '" + site + "' is indexable."); return !(siteService.isSpecialSite(site.getId()) || (isOnlyIndexSearchToolSites() && site.getToolForCommonId(SEARCH_TOOL_ID) == null) || (isExcludeUserSites() && siteService.isUserSite(site.getId())) || (ignoredSitesList.contains(site.getId()))); } @Override public List getSiteMasterSearchItems() { return Collections.emptyList(); } /** * Rebuild the index from the entities own stored state {@inheritDoc}, for just * the supplied siteId */ @Override public void rebuildIndex(String siteId) { if (testMode) { rebuildSiteIndex(siteId); return; } backgroundScheduler.schedule(new RebuildSiteTask(siteId), 0); } protected void deleteAllDocumentForSite(String siteId) { getLog().debug("removing all documents from search index for siteId: {}", siteId); // TODO get DeleteByQuery working in embedded ES // DeleteByQueryRequest request = new DeleteByQueryRequest(indexName); // request.setQuery(termQuery(SearchService.FIELD_SITEID, siteId)); // request.setDocTypes(indexedDocumentType); // request.setRefresh(true); // try { // client.deleteByQuery(request, RequestOptions.DEFAULT); // } catch (IOException ioe) { // getLog().warn("Could not delete all documents in index {} for site {}, {}", indexName, siteId, ioe.toString()); // } int maxHits = 999; long hitCount = maxHits + 1; while (hitCount >= maxHits) { SearchResponse response = search(null, null, Collections.singletonList(siteId), null, 0, maxHits); SearchHits hits = response.getHits(); hitCount = hits.getTotalHits().value; getLog().info("Deleting {} docs from site {}", hitCount, siteId); for (SearchHit hit : hits) { deleteDocument(hit); } refreshIndex(); } } /** * Refresh the index for the supplied site. This simply refreshes the docs that ES already knows about. * It does not create any new docs. If you want to reload all site content you need to do a {@see rebuildIndex()} */ @Override public void refreshIndex(String siteId) { getLog().info("Refreshing the index for '{}'", siteId); //Get the currently indexed resources for this site Site site = null; try { site = siteService.getSite(siteId); } catch (IdUnusedException e) { getLog().error("site with siteId=" + siteId + " does not exist can't refresh its index"); return; } if (!isSiteIndexable(site)) { getLog().debug("ignoring request to refreshIndex for site:" + siteId + " as its not indexable"); return; } Collection resourceNames = getResourceNames(siteId); getLog().debug(resourceNames.size() + " elements will be refreshed"); for (String resourceName : resourceNames) { EntityContentProducer entityContentProducer = newEntityContentProducer(resourceName); //If there is no matching entity content producer or no associated site, skip the resource //it is either not available anymore, or the corresponding entityContentProducer doesn't exist anymore if (entityContentProducer == null || entityContentProducer.getSiteId(resourceName) == null) { getLog().warn("Couldn't either find an entityContentProducer or the resource itself for '" + resourceName + "'"); continue; } try { prepareIndexAdd(resourceName, entityContentProducer, false); } catch (NoContentException e) { // ignore we are just queuing here, not looking for content } } } /** * Get all indexed resources for a site * * @param siteId Site containing indexed resources * @return a collection of resource references or an empty collection if no resource was found */ protected Collection getResourceNames(String siteId) { getLog().debug("Obtaining indexed elements for site: '" + siteId + "'"); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() .query(termQuery(SearchService.FIELD_SITEID, siteId)) .size(Integer.MAX_VALUE) .storedField(SearchService.FIELD_REFERENCE); SearchRequest searchRequest = new SearchRequest(indexName) .searchType(SearchType.QUERY_THEN_FETCH) .types(indexedDocumentType); SearchResponse response; try { response = client.search(searchRequest, RequestOptions.DEFAULT); } catch (IOException ioe) { getLog().warn("Search for resources in site [" + siteId + "] encountered an error, " + ioe); return Collections.emptyList(); } return Arrays.stream(response.getHits().getHits()) .map(h -> getFieldFromSearchHit(SearchService.FIELD_REFERENCE, h)) .collect(Collectors.toList()); } @Override protected IndexRequest completeIndexRequest(IndexRequest indexRequest, String resourceName, EntityContentProducer ecp, boolean includeContent) { return indexRequest.routing(ecp.getSiteId(resourceName)); } @Override protected void addSearchSiteIds(SearchRequest searchRequest, List siteIds) { // if we have sites filter results to include only the sites included if (siteIds != null && !siteIds.isEmpty()) { BoolQueryBuilder queryBuilder = (BoolQueryBuilder) searchRequest.source().query(); // searchRequest.routing(siteIds.toArray(new String[]{})); // creating config whether or not to use filter, there are performance and caching differences that // maybe implementation decisions if (useSiteFilters) { QueryBuilder siteFilter = boolQuery().filter(termsQuery(SearchService.FIELD_SITEID, siteIds)); searchRequest.source().postFilter(siteFilter); } else { queryBuilder.must(termsQuery(SearchService.FIELD_SITEID, siteIds)); } } } @Override protected void completeSearchRequestBuilders(SearchRequest searchRequest, String searchTerms, List references, List siteIds) { } @Override protected void addSearchSuggestionsTerms(SearchRequest searchRequest, String searchString) { // no-op. taken care of in newSearchSuggestionsRequestAndQueryBuilders() because of the // way TermQueryBuilders have to be constructed (no default constructor so have to be // given the search field and term at instantiation) } @Override protected void addSearchSuggestionsSites(SearchRequest searchRequest, String currentSite, boolean allMySites) { String currentUser = ""; User user = userDirectoryService.getCurrentUser(); if (user != null) { currentUser = user.getId(); } String[] sites; if (allMySites || currentSite == null) { sites = getAllUsersSites(currentUser); } else { sites = new String[]{currentSite}; } QueryBuilder siteFilter = boolQuery().filter(termsQuery(SearchService.FIELD_SITEID, sites)); searchRequest.routing(sites).source().postFilter(siteFilter); } /** * Get all the sites a user has access to. * @return An array of site IDs. */ protected String[] getAllUsersSites(String currentUser) { List sites = siteService.getSites( org.sakaiproject.site.api.SiteService.SelectionType.ACCESS, null, null, null, null, null); final List siteIds = sites.stream().map(s -> s.getId()).collect(Collectors.toList()); siteIds.add(siteService.getUserSiteId(currentUser)); return siteIds.toArray(new String[siteIds.size()]); } @Override protected void completeSearchSuggestionsRequestBuilders(SearchRequest searchRequest, String searchString, String currentSite, boolean allMySites) { } /** * @return the onlyIndexSearchToolSites */ @Override public boolean isOnlyIndexSearchToolSites() { return onlyIndexSearchToolSites; } /** * @param onlyIndexSearchToolSites the onlyIndexSearchToolSites to set */ public void setOnlyIndexSearchToolSites(boolean onlyIndexSearchToolSites) { this.onlyIndexSearchToolSites = onlyIndexSearchToolSites; } public void setExcludeUserSites(boolean excludeUserSites) { this.excludeUserSites = excludeUserSites; } @Override public boolean isExcludeUserSites() { return excludeUserSites; } public void setUseSiteFilters(boolean useSiteFilters) { this.useSiteFilters = useSiteFilters; } public void setSiteService(SiteService siteService) { this.siteService = siteService; } public void setIgnoredSites(String ignoredSites) { this.ignoredSites = ignoredSites; } public void setIgnoredSitesList(List ignoredSitesList) { this.ignoredSitesList = ignoredSitesList; } public void setUserDirectoryService(UserDirectoryService userDirectoryService) { this.userDirectoryService = userDirectoryService; } @Override public String getEventResourceFilter() { return "/"; } @Override protected Logger getLog() { return log; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy