org.sakaiproject.search.elasticsearch.ElasticSearchIndexBuilder Maven / Gradle / Ivy
/**********************************************************************************
* $URL$
* $Id$
***********************************************************************************
*
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008 The Sakai Foundation
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.osedu.org/licenses/ECL-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
**********************************************************************************/
package org.sakaiproject.search.elasticsearch;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshResponse;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.action.delete.DeleteRequestBuilder;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.deletebyquery.DeleteByQueryResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.loader.JsonSettingsLoader;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.indices.IndexAlreadyExistsException;
import org.elasticsearch.search.SearchHit;
import org.sakaiproject.authz.api.SecurityAdvisor;
import org.sakaiproject.authz.api.SecurityService;
import org.sakaiproject.component.api.ServerConfigurationService;
import org.sakaiproject.event.api.Event;
import org.sakaiproject.event.api.Notification;
import org.sakaiproject.exception.IdUnusedException;
import org.sakaiproject.exception.PermissionException;
import org.sakaiproject.exception.TypeException;
import org.sakaiproject.search.api.EntityContentProducer;
import org.sakaiproject.search.api.SearchIndexBuilder;
import org.sakaiproject.search.api.SearchService;
import org.sakaiproject.search.model.SearchBuilderItem;
import org.sakaiproject.site.api.Site;
import org.sakaiproject.site.api.SiteService;
import org.sakaiproject.site.api.ToolConfiguration;
import java.io.IOException;
import java.io.StringWriter;
import java.lang.Exception;
import java.lang.String;
import java.lang.System;
import java.util.*;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.FilterBuilders.*;
import static org.elasticsearch.index.query.QueryBuilders.*;
public class ElasticSearchIndexBuilder implements SearchIndexBuilder {
private static Logger log = LoggerFactory.getLogger(ElasticSearchIndexBuilder.class);
public static final String SEARCH_TOOL_ID = "sakai.search";
private final static SecurityAdvisor allowAllAdvisor;
private SiteService siteService;
private SecurityService securityService;
private ServerConfigurationService serverConfigurationService;
private List producers = new ArrayList();
private Client client;
private String indexName;
/**
* Number of documents to index at a time for each run of the context indexing task (defaults to 500).
* Setting this too low will slow things down, setting it to high won't allow all nodes in the cluster
* to share the load.
*/
private int contentIndexBatchSize = 500;
/**
* Number of actions to send in one elasticsearch bulk index call
* defaults to 10. Setting this
* to too high a number will have memory implications as you'll be keeping
* more content in memory until the request is executed.
*/
private int bulkRequestSize = 10;
/**
* by default the mapping in configured in the mapping.json file. This can be overridden by injecting
* json into this property.
*
* See {@link elasticsearch mapping reference } for
* more information on configuration that is available. For example, if you want to change the analyzer config for
* a particular field this is the place to do it.
*/
private String mapping = null;
/**
* Expects a JSON string of ElasticSearch index settings. You can set this in your
* sakai.properties files and inject a value using the [email protected]
* property. By default this value is configured by the indexSettings.json files.
*
* See {@link elasticsearch index modules}
* for more information on configuration that is available.
*/
private String indexSettings = null;
/**
* indexing thread that performs loading the actual content into the index.
*/
private Timer contentIndexTimer = new Timer("[elasticsearch content indexer]", true);
/**
* number seconds of wait after startup before starting the BulkContentIndexerTask (defaults to 3 minutes)
*/
private int delay = 180;
/**
* how often the BulkContentIndexerTask runs in seconds (defaults to 1 minute)
*/
private int period = 60;
/**
* set to false if you want to index all content, not just sites that have the search tool placed
*/
private boolean onlyIndexSearchToolSites = true;
/**
* set to false to include user site content in index
*/
private boolean excludeUserSites = true;
private long startTime;
private long lastLoad;
/**
* comma separated list of sites to always ignore when indexing. Defaults to ~admin, !admin, PortfolioAdmin
* use injection to set this value.
*/
private String ignoredSites = null;
/**
* parsed list of ignoredSites configuration. If you wish to change this use the ignoredSites field not this
* one to avoid having to change Spring xml
*/
private List ignoredSitesList = new ArrayList();
/**
* this turns off the threads and does indexing inline. DO NOT enable this in prod.
* It is meant for testing, especially unit tests only.
*/
private boolean testMode = false;
private Map settings = new HashMap();
static {
allowAllAdvisor = new SecurityAdvisor() {
public SecurityAdvice isAllowed(String userId, String function, String reference) {
return SecurityAdvice.ALLOWED;
}
};
}
public boolean isEnabled() {
return serverConfigurationService.getBoolean("search.enable", false);
}
public void init() {
if (!isEnabled()) {
log.debug("ElasticSearch is not enabled. Set search.enable=true to change that.");
return;
}
if (ignoredSites != null) {
ignoredSitesList = Arrays.asList(ignoredSites.split(","));
} else {
ignoredSitesList.add("~admin");
ignoredSitesList.add("!admin");
ignoredSitesList.add("PortfolioAdmin");
}
// if there is a value here its been overridden by injection, we will use the overridden configuration
if (org.apache.commons.lang.StringUtils.isEmpty(mapping)) {
try {
StringWriter writer = new StringWriter();
IOUtils.copy(ElasticSearchService.class.getResourceAsStream("/org/sakaiproject/search/elastic/bundle/mapping.json"), writer, "UTF-8");
mapping = writer.toString();
} catch (Exception ex) {
log.error("Failed to load mapping config: " + ex.getMessage(), ex);
}
}
log.debug("ElasticSearch mapping will be configured as follows:" + mapping);
if (org.apache.commons.lang.StringUtils.isEmpty(indexSettings)) {
try {
StringWriter writer = new StringWriter();
IOUtils.copy(ElasticSearchService.class.getResourceAsStream("/org/sakaiproject/search/elastic/bundle/indexSettings.json"), writer, "UTF-8");
indexSettings = writer.toString();
} catch (Exception ex) {
log.error("Failed to load indexSettings config: " + ex.getMessage(), ex);
}
}
JsonSettingsLoader loader = new JsonSettingsLoader();
try {
settings = loader.load(indexSettings);
} catch (IOException e) {
log.error("problem loading indexSettings:" + e.getMessage(), e);
}
// load anything set into the ServerConfigurationService that starts with "elasticsearch.index." this will
// override anything set in the indexSettings config
for (ServerConfigurationService.ConfigItem configItem : serverConfigurationService.getConfigData().getItems()) {
if (configItem.getName().startsWith(ElasticSearchService.CONFIG_PROPERTY_PREFIX + "index.")) {
String propertyName = configItem.getName().replaceFirst(ElasticSearchService.CONFIG_PROPERTY_PREFIX, "");
settings.put(propertyName, (String) configItem.getValue());
}
}
if (log.isDebugEnabled()) {
for (String name : settings.keySet()) {
log.debug("index property '" + name + "' set to: " + settings.get(name));
}
}
if (!testMode) {
contentIndexTimer.schedule(new BulkContentIndexerTask(), (delay * 1000), (period * 1000));
} else {
log.warn("IN TEST MODE. DO NOT enable this in production !!!");
}
}
/**
* register an entity content producer to provide content to the search
* engine {@inheritDoc}
*/
public void registerEntityContentProducer(EntityContentProducer ecp) {
log.debug("register " + ecp);
producers.add(ecp);
}
/**
* Add a resource to the indexing queue {@inheritDoc}
*/
public void addResource(Notification notification, Event event) {
log.debug("Add resource " + notification + "::" + event);
if (!isEnabled()) {
log.debug("ElasticSearch is not enabled. Set search.enable=true to change that.");
return;
}
String resourceName = event.getResource();
if (resourceName == null) {
// default if null
resourceName = "";
}
if (resourceName.length() > 255) {
log.warn("Entity Reference is longer than 255 characters, not indexing. Reference="
+ resourceName);
return;
}
EntityContentProducer ecp = newEntityContentProducer(event);
if (ecp == null || ecp.getSiteId(resourceName) == null) {
log.debug("Not indexing " + resourceName + " as it has no context");
return;
}
String siteId = ecp.getSiteId(resourceName);
String id = ecp.getId(resourceName);
if (onlyIndexSearchToolSites) {
try {
Site s = siteService.getSite(siteId);
ToolConfiguration t = s.getToolForCommonId(SEARCH_TOOL_ID);
if (t == null) {
log.debug("Not indexing " + resourceName
+ " as it has no search tool");
return;
}
} catch (Exception ex) {
log.debug("Not indexing " + resourceName + " as it has no site", ex);
return;
}
}
IndexAction action = IndexAction.getAction(ecp.getAction(event));
log.debug("Action on '" + resourceName + "' detected as " + action.name());
switch (action) {
case ADD:
indexAdd(resourceName, ecp);
break;
case DELETE:
deleteDocument(id, siteId);
break;
default:
throw new UnsupportedOperationException(action + " is not yet supported");
}
}
/**
* Establish a security advisor to allow the "embedded" work to occur with no need for additional security permissions.
*/
protected void enableAzgSecurityAdvisor() {
// put in a security advisor so we can do our work without need of further permissions
securityService.pushAdvisor(allowAllAdvisor);
}
/**
* Disable the security advisor.
*/
protected void disableAzgSecurityAdvisor() {
SecurityAdvisor popped = securityService.popAdvisor(allowAllAdvisor);
if (!allowAllAdvisor.equals(popped)) {
if (popped == null) {
log.debug("Someone has removed our advisor.");
} else {
log.debug("Removed someone elses advisor, adding it back.");
securityService.pushAdvisor(popped);
}
}
}
/**
*
* @param resourceName
* @param ecp
* @return
*/
protected IndexRequestBuilder prepareIndex(String resourceName, EntityContentProducer ecp, boolean includeContent) throws IOException, NoContentException {
return client.prepareIndex(indexName, ElasticSearchService.SAKAI_DOC_TYPE, ecp.getId(resourceName))
.setSource(buildIndexRequest(ecp, resourceName, includeContent))
.setRouting(ecp.getSiteId(resourceName));
}
/**
*
* @param resourceName
* @param ecp
* @return
*/
protected void prepareIndexAdd(String resourceName, EntityContentProducer ecp, boolean includeContent) throws NoContentException {
try {
prepareIndex(resourceName, ecp, includeContent).execute().actionGet();
} catch (NoContentException e) {
throw e;
} catch (Throwable t) {
log.error("Error: trying to register resource " + resourceName
+ " in search engine: " + t.getMessage(), t);
}
}
/**
* schedules content for indexing.
* @param resourceName
* @param ecp
* @return
*/
protected void indexAdd(String resourceName, EntityContentProducer ecp) {
try {
prepareIndexAdd(resourceName, ecp, false);
} catch (NoContentException e) {
deleteDocument(e);
} catch (Exception e) {
log.error("problem updating content indexing for entity: " + resourceName + " error: " + e.getMessage());
}
}
/**
* build up the elasticsearch request
* @param ecp
* @param resourceName
* @return
* @throws IOException
*/
protected XContentBuilder buildIndexRequest(EntityContentProducer ecp, String resourceName, boolean includeContent) throws NoContentException, IOException {
XContentBuilder xContentBuilder = jsonBuilder()
.startObject()
.field(SearchService.FIELD_SITEID, ecp.getSiteId(resourceName))
.field(SearchService.FIELD_TITLE, ecp.getTitle(resourceName))
.field(SearchService.FIELD_REFERENCE, resourceName)
.field(SearchService.FIELD_URL, ecp.getUrl(resourceName))
//.field(SearchService.FIELD_ID, ecp.getId(resourceName))
.field(SearchService.FIELD_TOOL, ecp.getTool())
.field(SearchService.FIELD_CONTAINER, ecp.getContainer(resourceName))
.field(SearchService.FIELD_TYPE, ecp.getType(resourceName));
//.field(SearchService.FIELD_SUBTYPE, ecp.getSubType(resourceName));
//Add the custom properties
Map> properties = extractCustomProperties(resourceName, ecp);
for (Map.Entry> entry : properties.entrySet()) {
xContentBuilder.field(entry.getKey(), entry.getValue());
}
if (includeContent || testMode) {
String content = ecp.getContent(resourceName);
// some of the ecp impls produce content with nothing but whitespace, its waste of time to index those
if (StringUtils.isNotBlank(content)) {
xContentBuilder
// cannot rely on ecp for providing something reliable to maintain index state
// indexed indicates if the document was indexed
.field(SearchService.FIELD_INDEXED, true)
.field(SearchService.FIELD_CONTENTS, content);
} else {
throw new NoContentException(ecp.getId(resourceName), resourceName, ecp.getSiteId(resourceName));
}
}
return xContentBuilder.endObject();
}
public long getLastLoad() {
return lastLoad;
}
protected void rebuildSiteIndex(String siteId) {
log.info("Rebuilding the index for '" + siteId + "'");
try {
enableAzgSecurityAdvisor();
deleteAllDocumentForSite(siteId);
long start = System.currentTimeMillis();
int numberOfDocs = 0;
BulkRequestBuilder bulkRequest = client.prepareBulk();
for (final EntityContentProducer ecp : getProducers()) {
for (Iterator i = ecp.getSiteContentIterator(siteId); i.hasNext(); ) {
if (bulkRequest.numberOfActions() < bulkRequestSize) {
String reference = i.next();
if (StringUtils.isNotBlank(ecp.getContent(reference))) {
//updating was causing issues without a _source, so doing delete and re-add
try {
deleteDocument(ecp.getId(reference), ecp.getSiteId(reference));
bulkRequest.add(prepareIndex(reference, ecp, false));
numberOfDocs++;
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
} else {
executeBulkRequest(bulkRequest);
bulkRequest = client.prepareBulk();
}
}
// execute any remaining bulks requests not executed yet
if (bulkRequest.numberOfActions() > 0) {
executeBulkRequest(bulkRequest);
}
}
log.info("Queued " + numberOfDocs + " docs for indexing from site: " + siteId + " in " + (System.currentTimeMillis() - start) + " ms");
//flushIndex();
//refreshIndex();
} catch (Exception e) {
log.error("An exception occurred while rebuilding the index of '" + siteId + "'", e);
} finally {
disableAzgSecurityAdvisor();
}
}
protected class RebuildIndexTask extends TimerTask {
public RebuildIndexTask() {
}
/**
* Rebuild the index from the entities own stored state {@inheritDoc}
*/
public void run() {
// let's not hog the whole CPU just in case you have lots of sites with lots of data this could take a bit
Thread.currentThread().setPriority(Thread.NORM_PRIORITY - 1);
rebuildIndexForAllIndexableSites();
}
}
protected void rebuildIndexForAllIndexableSites() {
// rebuild index
for (Site s : siteService.getSites(SiteService.SelectionType.ANY, null, null, null, SiteService.SortType.NONE, null)) {
if (isSiteIndexable(s)) {
rebuildSiteIndex(s.getId());
}
}
}
protected class RebuildSiteTask extends TimerTask {
private final String siteId;
public RebuildSiteTask(String siteId) {
this.siteId = siteId;
}
/**
* Rebuild the index from the entities own stored state {@inheritDoc}, for just
* the supplied siteId
*/
public void run() {
try {
// let's not hog the whole CPU just in case you have lots of sites with lots of data this could take a bit
Thread.currentThread().setPriority(Thread.NORM_PRIORITY - 1);
rebuildSiteIndex(siteId);
} catch (Exception e) {
log.error("problem queuing content indexing for site: " + siteId + " error: " + e.getMessage());
}
}
}
/**
* This is the task that searches for any docs in the search index that do not have content yet,
* digests the content and loads it into the index. Any docs with empty content will be removed from
* the index. This timer task is run by the timer thread based on the period set above
*/
protected class BulkContentIndexerTask extends TimerTask {
public void run() {
try {
log.debug("running content indexing task");
enableAzgSecurityAdvisor();
processContentQueue();
} catch (Exception e) {
log.error("content indexing failure: " + e.getMessage(), e);
} finally {
disableAzgSecurityAdvisor();
}
}
}
/**
* Searches for any docs in the search index that have not been indexed yet,
* digests the content and loads it into the index. Any docs with empty content will be removed from
* the index.
*/
public void processContentQueue() {
startTime = System.currentTimeMillis();
// If there are a lot of docs queued up this could take awhile we don't want
// to eat up all the CPU cycles.
Thread.currentThread().setPriority(Thread.NORM_PRIORITY - 1);
if (getPendingDocuments() == 0) {
log.trace("no pending docs.");
return;
}
SearchResponse response = client.prepareSearch(indexName)
.setQuery(matchAllQuery())
.setTypes(ElasticSearchService.SAKAI_DOC_TYPE)
.setPostFilter( orFilter(
missingFilter(SearchService.FIELD_INDEXED),
termFilter(SearchService.FIELD_INDEXED, false)))
.setSize(contentIndexBatchSize)
.addFields(SearchService.FIELD_REFERENCE, SearchService.FIELD_SITEID)
.execute().actionGet();
SearchHit[] hits = response.getHits().hits();
List noContentExceptions = new ArrayList();
log.debug(getPendingDocuments() + " pending docs.");
BulkRequestBuilder bulkRequest = client.prepareBulk();
for (SearchHit hit : hits) {
if (bulkRequest.numberOfActions() < bulkRequestSize) {
String reference = getFieldFromSearchHit(SearchService.FIELD_REFERENCE, hit);
String siteId = getFieldFromSearchHit(SearchService.FIELD_SITEID, hit);
EntityContentProducer ecp = getContentProducerForReference(reference);
if (ecp != null) {
//updating was causing issues without a _source, so doing delete and re-add
try {
deleteDocument(hit.getId(), siteId);
bulkRequest.add(prepareIndex(reference, ecp, true));
} catch (NoContentException e) {
noContentExceptions.add(new NoContentException(hit.getId(), reference, siteId));
} catch (Exception e) {
log.error(e.getMessage(), e);
}
} else {
// if there is no content to index remove the doc, its pointless to have it included in the index
// and we will just waste cycles looking at it again everytime this thread runs, and will probably
// never finish because of it.
noContentExceptions.add(new NoContentException(hit.getId(), reference, siteId));
}
} else {
executeBulkRequest(bulkRequest);
bulkRequest = client.prepareBulk();
}
}
// execute any remaining bulks requests not executed yet
if (bulkRequest.numberOfActions() > 0) {
executeBulkRequest(bulkRequest);
}
// remove any docs without content, so we don't try to index them again
if (!noContentExceptions.isEmpty()) {
for (NoContentException noContentException : noContentExceptions) {
deleteDocument(noContentException);
}
}
lastLoad = System.currentTimeMillis();
if (hits.length > 0) {
log.info("Finished indexing " + hits.length + " docs in " +
((lastLoad - startTime)) + " ms");
}
}
public void deleteDocument(String id, String siteId) {
DeleteResponse deleteResponse = prepareDelete(id, siteId).execute().actionGet();
if (log.isDebugEnabled()) {
if (!deleteResponse.isFound()) {
log.debug("could not delete doc with by id: " + id + " it wasn't found");
} else {
log.debug("ES deleted a doc with id: " + deleteResponse.getId());
}
}
}
private void deleteDocument(NoContentException noContentException) {
deleteDocument(noContentException.getId(), noContentException.getSiteId());
}
protected void executeBulkRequest(BulkRequestBuilder bulkRequest) {
BulkResponse bulkResponse = bulkRequest.execute().actionGet();
log.info("bulk request of batch size: " + bulkRequest.numberOfActions() + " took " + bulkResponse.getTookInMillis() + " ms");
for (BulkItemResponse response : bulkResponse.getItems()) {
if (response.getResponse() instanceof DeleteResponse) {
DeleteResponse deleteResponse = (DeleteResponse) response.getResponse();
if (response.isFailed()) {
log.error("problem deleting doc: " + response.getId() + " error: " + response.getFailureMessage());
} else if (!deleteResponse.isFound()) {
log.debug("ES could not find a doc with id: " + deleteResponse.getId() + " to delete.");
} else {
log.debug("ES deleted a doc with id: " + deleteResponse.getId());
}
}
if (response.getResponse() instanceof IndexResponse) {
IndexResponse indexResponse = (IndexResponse) response.getResponse();
if (response.isFailed()) {
log.error("problem updating content for doc: " + response.getId() + " error: " + response.getFailureMessage());
} else {
log.debug("ES indexed content for doc with id: " + indexResponse.getId());
}
}
}
}
/**
* Extract properties from the {@link EntityContentProducer}
*
* The {@link EntityContentProducer#getCustomProperties(String)} method returns a map of different kind of elements.
* To avoid casting and calls to {@code instanceof}, extractCustomProperties does all the work and returns a formated
* map containing only {@link Collection}.
*
*
* @param resourceName affected resource
* @param contentProducer producer providing properties for the given resource
* @return a formated map of {@link Collection}
*/
private Map> extractCustomProperties(String resourceName, EntityContentProducer contentProducer) {
Map m = contentProducer.getCustomProperties(resourceName);
if (m == null)
return Collections.emptyMap();
Map> properties = new HashMap>(m.size());
for (Map.Entry propertyEntry : m.entrySet()) {
String propertyName = propertyEntry.getKey();
Object propertyValue = propertyEntry.getValue();
Collection values;
//Check for basic data type that could be provided by the EntityContentProducer
//If the data type can't be defined, nothing is stored. The toString method could be called, but some values
//could be not meant to be indexed.
if (propertyValue instanceof String)
values = Collections.singleton((String) propertyValue);
else if (propertyValue instanceof String[])
values = Arrays.asList((String[]) propertyValue);
else if (propertyValue instanceof Collection)
values = (Collection) propertyValue;
else {
if (propertyValue != null)
log.warn("Couldn't find what the value for '" + propertyName + "' was. It has been ignored. " + propertyName.getClass());
values = Collections.emptyList();
}
//If this property was already present there (this shouldn't happen, but if it does everything must be stored
if (properties.containsKey(propertyName)) {
log.warn("Two properties had a really similar name and were merged. This shouldn't happen! " + propertyName);
log.debug("Merged values '" + properties.get(propertyName) + "' with '" + values);
values = new ArrayList(values);
values.addAll(properties.get(propertyName));
}
properties.put(propertyName, values);
}
return properties;
}
/**
* refresh the index from the current stored state {@inheritDoc}
*/
public void refreshIndex() {
RefreshResponse response = client.admin().indices().refresh(new RefreshRequest(indexName)).actionGet();
}
public void destroy() {
}
@Override
public int getPendingDocuments() {
try {
CountResponse response = client.prepareCount(indexName)
.setQuery(filteredQuery(matchAllQuery(), orFilter(
missingFilter(SearchService.FIELD_INDEXED),
termFilter(SearchService.FIELD_INDEXED, false))))
.execute()
.actionGet();
return (int) response.getCount();
} catch (Exception e) {
log.error("problem getting pending docs: " + e.getMessage());
}
return 0;
}
/**
* creates a new index if one does not exist
*/
public void assureIndex() {
IndicesExistsResponse response = client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet();
if (!response.isExists()) {
createIndex();
}
}
/**
* creates a new index, does not check if the exist exists
*/
public void createIndex() {
try {
CreateIndexResponse createResponse = client.admin().indices().create(new CreateIndexRequest(indexName).settings(settings).mapping(ElasticSearchService.SAKAI_DOC_TYPE, mapping)).actionGet();
if (!createResponse.isAcknowledged()) {
log.error("Index wasn't created, can't rebuild");
}
} catch (IndexAlreadyExistsException e) {
log.warn("Index already created.");
}
// client.admin().cluster().health(new ClusterHealthRequest(indexName).waitForYellowStatus()).actionGet();
}
/**
* removes any existing index and creates a new one
*/
public void recreateIndex() {
IndicesExistsResponse response = client.admin().indices().exists(new IndicesExistsRequest(indexName)).actionGet();
if (response.isExists()) {
client.admin().indices().delete(new DeleteIndexRequest(indexName)).actionGet();
// client.admin().cluster().health(new ClusterHealthRequest(indexName).waitForYellowStatus()).actionGet();
}
// create index
createIndex();
}
/**
* Removes any existing index, creates a new index, and rebuilds the index from the entities own stored state {@inheritDoc}
*/
public void rebuildIndex() {
recreateIndex();
if (testMode) {
rebuildIndexForAllIndexableSites();
return;
}
contentIndexTimer.schedule(new RebuildIndexTask(), 0);
}
/**
* causes elasticsearch write any in memory index changes to storage
*/
protected void flushIndex() {
//flush
client.admin().indices().flush(new FlushRequest(indexName)).actionGet();
}
/**
* Check if a site is considered as indexable based on the current server configuration.
*
* Not indexable sites are:
*
* - Special sites
* - Sites without the search tool (if the option is enabled)
* - User sites (if the option is enabled)
* - Any sites included in the ignoreSitesList (~admin and !admin are the default ignored sites)
*
*
*
* @param site site which may be indexable
* @return true if the site can be index, false otherwise
*/
private boolean isSiteIndexable(Site site) {
log.debug("Check if '" + site + "' is indexable.");
return !(siteService.isSpecialSite(site.getId()) ||
(isOnlyIndexSearchToolSites() && site.getToolForCommonId(SEARCH_TOOL_ID) == null) ||
(isExcludeUserSites() && siteService.isUserSite(site.getId())) ||
(ignoredSitesList.contains(site.getId())));
}
@Override
public boolean isBuildQueueEmpty() {
return getPendingDocuments() == 0;
}
/**
* Generates a SearchableEntityProducer
*
* @param ref
* @return
* @throws PermissionException
* @throws IdUnusedException
* @throws TypeException
*/
public EntityContentProducer newEntityContentProducer(String ref) {
log.debug(" new entitycontent producer");
for (Iterator i = producers.iterator(); i.hasNext(); ) {
EntityContentProducer ecp = i.next();
if (ecp.matches(ref)) {
return ecp;
}
}
return null;
}
@Override
public List getSiteMasterSearchItems() {
return Collections.emptyList();
}
@Override
public List getGlobalMasterSearchItems() {
return Collections.emptyList();
}
/**
* get hold of an entity content producer using the event
*
* @param event
* @return
*/
public EntityContentProducer newEntityContentProducer(Event event) {
log.debug(" new entitycontent producer");
for (Iterator i = producers.iterator(); i.hasNext(); ) {
EntityContentProducer ecp = i.next();
if (ecp.matches(event)) {
log.debug(" Matched Entity Content Producer for event " + event
+ " with " + ecp);
return ecp;
} else {
log.debug("Skipped ECP " + ecp);
}
}
log.debug("Failed to match any Entity Content Producer for event " + event);
return null;
}
protected EntityContentProducer getContentProducerForReference(String ref) {
for (EntityContentProducer ecp : producers) {
if (ecp.matches(ref)) {
return ecp;
}
}
return null;
}
/**
* get all the producers registered, as a clone to avoid concurrent
* modification exceptions
*
* @return
*/
public List getContentProducers() {
return new ArrayList(producers);
}
/**
* Rebuild the index from the entities own stored state {@inheritDoc}, for just
* the supplied siteId
*/
public void rebuildIndex(String siteId) {
if (testMode) {
rebuildSiteIndex(siteId);
return;
}
contentIndexTimer.schedule(new RebuildSiteTask(siteId), 0);
}
protected void deleteAllDocumentForSite(String siteId) {
log.debug("removing all documents from search index for siteId: " + siteId);
DeleteByQueryResponse response = client.prepareDeleteByQuery(indexName)
.setQuery(termQuery(SearchService.FIELD_SITEID, siteId))
.setTypes(ElasticSearchService.SAKAI_DOC_TYPE)
.execute()
.actionGet();
}
protected DeleteRequestBuilder prepareDelete(String id, String siteId) {
return client.prepareDelete(indexName, ElasticSearchService.SAKAI_DOC_TYPE, id).setRouting(siteId);
}
/**
* Refresh the index for the supplied site. This simply refreshes the docs that ES already knows about.
* It does not create any new docs. If you want to reload all site content you need to do a {@see rebuildIndex()}
*/
public void refreshIndex(String siteId) {
log.info("Refreshing the index for '" + siteId + "'");
//Get the currently indexed resources for this site
Site site = null;
try {
site = siteService.getSite(siteId);
} catch (IdUnusedException e) {
log.error("site with siteId=" + siteId + " does not exist can't refresh its index");
return;
}
if (!isSiteIndexable(site)) {
log.debug("ignoring request to refreshIndex for site:" + siteId + " as its not indexable");
return;
}
Collection resourceNames = getResourceNames(siteId);
log.debug(resourceNames.size() + " elements will be refreshed");
for (String resourceName : resourceNames) {
EntityContentProducer entityContentProducer = getContentProducerForReference(resourceName);
//If there is no matching entity content producer or no associated site, skip the resource
//it is either not available anymore, or the corresponding entityContentProducer doesn't exist anymore
if (entityContentProducer == null || entityContentProducer.getSiteId(resourceName) == null) {
log.warn("Couldn't either find an entityContentProducer or the resource itself for '" + resourceName + "'");
continue;
}
try {
prepareIndexAdd(resourceName, entityContentProducer, false);
} catch (NoContentException e) {
// ignore we are just queuing here, not looking for content
}
}
}
/**
* Get all indexed resources for a site
*
* @param siteId Site containing indexed resources
* @return a collection of resource references or an empty collection if no resource was found
*/
protected Collection getResourceNames(String siteId) {
log.debug("Obtaining indexed elements for site: '" + siteId + "'");
SearchResponse response = client.prepareSearch(indexName)
.setSearchType(SearchType.QUERY_THEN_FETCH)
.setQuery(termQuery(SearchService.FIELD_SITEID, siteId))
.setTypes(ElasticSearchService.SAKAI_DOC_TYPE)
.setSize(Integer.MAX_VALUE)
.addFields(SearchService.FIELD_REFERENCE)
.execute()
.actionGet();
Collection resourceNames = new ArrayList();
for (SearchHit hit : response.getHits().hits()) {
resourceNames.add(getFieldFromSearchHit(SearchService.FIELD_REFERENCE, hit));
}
return resourceNames;
}
/**
* loads the field from the SearchHit. Loads from field not from source since
* we aren't storing the source.
* @param field
* @param hit
* @return
*/
static public String getFieldFromSearchHit(String field, SearchHit hit) {
if (hit != null && hit.getFields() != null && hit.getFields().get(field) != null) {
return hit.getFields().get(field).value();
}
return null;
}
@Override
public List getAllSearchItems() {
return null;
}
public static enum IndexAction {
/**
* Action Unknown, usually because the record has just been created
*/
UNKNOWN(SearchBuilderItem.ACTION_UNKNOWN),
/**
* Action ADD the record to the search engine, if the doc ID is set, then
* remove first, if not set, check its not there.
*/
ADD(SearchBuilderItem.ACTION_ADD),
/**
* Action DELETE the record from the search engine, once complete delete the
* record
*/
DELETE(SearchBuilderItem.ACTION_DELETE),
/**
* The action REBUILD causes the indexer thread to rebuild the index from
* scratch, re-fetching all entities This should only ever appear on the
* master record
*/
REBUILD(SearchBuilderItem.ACTION_REBUILD),
/**
* The action REFRESH causes the indexer thread to refresh the search index
* from the current set of entities. If a Rebuild is in progress, the
* refresh will not override the rebuild
*/
REFRESH(SearchBuilderItem.ACTION_REFRESH);
private final int itemAction;
private IndexAction(int itemAction) {
this.itemAction = itemAction;
}
/**
* Generate an IndexAction based on an action ID provided by the Search API
*
* @param itemActionId action ID used by the Search API
* @return IndexAction matching the given ID, null if nothing has been found
*/
public static IndexAction getAction(int itemActionId) {
for (IndexAction indexAction : values()) {
if (indexAction.getItemAction() == itemActionId)
return indexAction;
}
return null;
}
public int getItemAction() {
return itemAction;
}
}
/**
* @return the onlyIndexSearchToolSites
*/
public boolean isOnlyIndexSearchToolSites() {
return onlyIndexSearchToolSites;
}
/**
* @param onlyIndexSearchToolSites the onlyIndexSearchToolSites to set
*/
public void setOnlyIndexSearchToolSites(boolean onlyIndexSearchToolSites) {
this.onlyIndexSearchToolSites = onlyIndexSearchToolSites;
}
public void setExcludeUserSites(boolean excludeUserSites) {
this.excludeUserSites = excludeUserSites;
}
public boolean isExcludeUserSites() {
// TODO Auto-generated method stub
return excludeUserSites;
}
public void setClient(Client client) {
this.client = client;
}
public void setIndexName(String indexName) {
this.indexName = indexName;
}
public void setSiteService(SiteService siteService) {
this.siteService = siteService;
}
public void setDelay(int delay) {
this.delay = delay;
}
public void setPeriod(int period) {
this.period = period;
}
public void setSecurityService(SecurityService securityService) {
this.securityService = securityService;
}
public Date getStartTime() {
return new Date(startTime);
}
public String getMapping() {
return mapping;
}
public void setMapping(String mapping) {
this.mapping = mapping;
}
public void setContentIndexBatchSize(int contentIndexBatchSize) {
this.contentIndexBatchSize = contentIndexBatchSize;
}
public List getProducers() {
return producers;
}
public void setServerConfigurationService(ServerConfigurationService serverConfigurationService) {
this.serverConfigurationService = serverConfigurationService;
}
public void setBulkRequestSize(int bulkRequestSize) {
this.bulkRequestSize = bulkRequestSize;
}
public void setIgnoredSites(String ignoredSites) {
this.ignoredSites = ignoredSites;
}
public void setIgnoredSitesList(List ignoredSitesList) {
this.ignoredSitesList = ignoredSitesList;
}
public void setIndexSettings(String indexSettings) {
this.indexSettings = indexSettings;
}
public void setTestMode(boolean testMode) {
this.testMode = testMode;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy