
org.molgenis.data.elasticsearch.ElasticsearchService Maven / Gradle / Ivy
The newest version!
package org.molgenis.data.elasticsearch;
import static java.util.Objects.requireNonNull;
import static java.util.stream.StreamSupport.stream;
import static org.molgenis.data.elasticsearch.request.SourceFilteringGenerator.toFetchFields;
import static org.molgenis.data.elasticsearch.util.ElasticsearchEntityUtils.toElasticsearchId;
import static org.molgenis.data.elasticsearch.util.ElasticsearchEntityUtils.toElasticsearchIds;
import static org.molgenis.data.elasticsearch.util.MapperTypeSanitizer.sanitizeMapperType;
import static org.molgenis.data.transaction.MolgenisTransactionManager.TRANSACTION_ID_RESOURCE_NAME;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
import org.apache.commons.lang3.ArrayUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.exists.types.TypesExistsResponse;
import org.elasticsearch.action.admin.indices.mapping.delete.DeleteMappingResponse;
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.admin.indices.optimize.OptimizeResponse;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.deletebyquery.DeleteByQueryResponse;
import org.elasticsearch.action.deletebyquery.IndexDeleteByQueryResponse;
import org.elasticsearch.action.get.GetRequestBuilder;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.get.MultiGetItemResponse;
import org.elasticsearch.action.get.MultiGetRequest.Item;
import org.elasticsearch.action.get.MultiGetRequestBuilder;
import org.elasticsearch.action.get.MultiGetResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.metadata.MappingMetaData;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.fetch.source.FetchSourceContext;
import org.molgenis.data.AggregateQuery;
import org.molgenis.data.AggregateResult;
import org.molgenis.data.AttributeMetaData;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.EntityMetaData;
import org.molgenis.data.EntityStream;
import org.molgenis.data.Fetch;
import org.molgenis.data.MolgenisDataException;
import org.molgenis.data.Query;
import org.molgenis.data.Repository;
import org.molgenis.data.elasticsearch.index.ElasticsearchIndexCreator;
import org.molgenis.data.elasticsearch.index.MappingsBuilder;
import org.molgenis.data.elasticsearch.request.SearchRequestGenerator;
import org.molgenis.data.elasticsearch.response.ResponseParser;
import org.molgenis.data.elasticsearch.util.ElasticsearchUtils;
import org.molgenis.data.elasticsearch.util.SearchRequest;
import org.molgenis.data.elasticsearch.util.SearchResult;
import org.molgenis.data.meta.AttributeMetaDataMetaData;
import org.molgenis.data.meta.EntityMetaDataMetaData;
import org.molgenis.data.meta.PackageImpl;
import org.molgenis.data.support.DefaultEntity;
import org.molgenis.data.support.DefaultEntityMetaData;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.data.support.UuidGenerator;
import org.molgenis.data.transaction.MolgenisTransactionListener;
import org.molgenis.data.transaction.MolgenisTransactionLogEntryMetaData;
import org.molgenis.data.transaction.MolgenisTransactionLogMetaData;
import org.molgenis.util.DependencyResolver;
import org.molgenis.util.EntityUtils;
import org.molgenis.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.transaction.support.TransactionSynchronizationManager;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
/**
* ElasticSearch implementation of the SearchService interface. TODO use scroll-scan where possible:
* http://www.elasticsearch.org/guide/en/elasticsearch /reference/current/search-request-scroll.html#scroll-scans
*
* @author erwin
*/
public class ElasticsearchService implements SearchService, MolgenisTransactionListener
{
private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchService.class);
private static final int BATCH_SIZE = 1000;
public static final String CRUD_TYPE_FIELD_NAME = "MolgenisCrudType";
private static BulkProcessorFactory BULK_PROCESSOR_FACTORY = new BulkProcessorFactory();
private static List NON_TRANSACTIONAL_ENTITIES = Arrays.asList(MolgenisTransactionLogMetaData.ENTITY_NAME,
MolgenisTransactionLogEntryMetaData.ENTITY_NAME);
public static enum IndexingMode
{
ADD, UPDATE
};
static enum CrudType
{
ADD, UPDATE, DELETE
}
private final DataService dataService;
private final ElasticsearchEntityFactory elasticsearchEntityFactory;
private final String indexName;
private final Client client;
private final ResponseParser responseParser = new ResponseParser();
private final SearchRequestGenerator generator = new SearchRequestGenerator();
private final ElasticsearchUtils elasticsearchUtils;
public ElasticsearchService(Client client, String indexName, DataService dataService,
ElasticsearchEntityFactory elasticsearchEntityFactory)
{
this(client, indexName, dataService, elasticsearchEntityFactory, true);
}
/**
* Testability
*
* @param client
* @param indexName
* @param dataService
* @param entityToSourceConverter
* @param createIndexIfNotExists
*/
ElasticsearchService(Client client, String indexName, DataService dataService,
ElasticsearchEntityFactory elasticsearchEntityFactory, boolean createIndexIfNotExists)
{
this.client = requireNonNull(client);
this.indexName = requireNonNull(indexName);
this.dataService = requireNonNull(dataService);
this.elasticsearchEntityFactory = requireNonNull(elasticsearchEntityFactory);
this.elasticsearchUtils = new ElasticsearchUtils(client);
if (createIndexIfNotExists)
{
new ElasticsearchIndexCreator(client).createIndexIfNotExists(indexName);
}
}
@Override
public Iterable getTypes()
{
if (LOG.isTraceEnabled())
{
LOG.trace("Retrieving Elasticsearch mappings ...");
}
GetMappingsResponse mappingsResponse = client.admin().indices().prepareGetMappings(indexName).get();
if (LOG.isDebugEnabled())
{
LOG.debug("Retrieved Elasticsearch mappings");
}
final ImmutableOpenMap indexMappings = mappingsResponse.getMappings().get(indexName);
return new Iterable()
{
@Override
public Iterator iterator()
{
return indexMappings.keysIt();
}
};
}
@Override
@Deprecated
public SearchResult search(SearchRequest request)
{
return search(SearchType.QUERY_AND_FETCH, request);
}
private SearchResult search(SearchType searchType, SearchRequest request)
{
SearchRequestBuilder builder = client.prepareSearch(indexName);
// TODO : A quick fix now! Need to find a better way to get
// EntityMetaData in
// ElasticSearchService, because ElasticSearchService should not be
// aware of DataService. E.g. Put EntityMetaData in the SearchRequest
// object
EntityMetaData entityMetaData = (request.getDocumentType() != null && dataService != null
&& dataService.hasRepository(request.getDocumentType()))
? dataService.getEntityMetaData(request.getDocumentType()) : null;
String documentType = request.getDocumentType() == null ? null : sanitizeMapperType(request.getDocumentType());
if (LOG.isTraceEnabled())
{
LOG.trace("*** REQUEST\n" + builder);
}
generator.buildSearchRequest(builder, documentType, searchType, request.getQuery(),
request.getAggregateField1(), request.getAggregateField2(), request.getAggregateFieldDistinct(),
entityMetaData);
SearchResponse response = builder.get();
if (LOG.isTraceEnabled())
{
LOG.trace("*** RESPONSE\n" + response);
}
return responseParser.parseSearchResponse(request, response, entityMetaData, dataService);
}
@Override
public boolean hasMapping(EntityMetaData entityMetaData)
{
String docType = sanitizeMapperType(entityMetaData.getName());
GetMappingsResponse getMappingsResponse = client.admin().indices().prepareGetMappings(indexName).execute()
.actionGet();
ImmutableOpenMap> allMappings = getMappingsResponse
.getMappings();
final ImmutableOpenMap indexMappings = allMappings.get(indexName);
return indexMappings.containsKey(docType);
}
public boolean hasMapping(String index, EntityMetaData entityMetaData)
{
String docType = sanitizeMapperType(entityMetaData.getName());
GetMappingsResponse getMappingsResponse = client.admin().indices().prepareGetMappings(index).execute()
.actionGet();
ImmutableOpenMap> allMappings = getMappingsResponse
.getMappings();
final ImmutableOpenMap indexMappings = allMappings.get(index);
return indexMappings.containsKey(docType);
}
@Override
public void createMappings(EntityMetaData entityMetaData)
{
boolean storeSource = storeSource(entityMetaData);
createMappings(entityMetaData, storeSource, true, true);
}
public void createMappings(String index, EntityMetaData entityMetaData)
{
boolean storeSource = storeSource(entityMetaData);
createMappings(index, entityMetaData, storeSource, true, true);
}
private void createMappings(String index, EntityMetaData entityMetaData, boolean storeSource, boolean enableNorms,
boolean createAllIndex)
{
try
{
XContentBuilder jsonBuilder = MappingsBuilder.buildMapping(entityMetaData, storeSource, enableNorms,
createAllIndex);
if (LOG.isTraceEnabled()) LOG.trace("Creating Elasticsearch mapping [{}] ...", jsonBuilder.string());
String entityName = entityMetaData.getName();
PutMappingResponse response = client.admin().indices().preparePutMapping(index)
.setType(sanitizeMapperType(entityName)).setSource(jsonBuilder).get();
if (!response.isAcknowledged())
{
throw new ElasticsearchException(
"Creation of mapping for documentType [" + entityName + "] failed. Response=" + response);
}
if (LOG.isDebugEnabled()) LOG.debug("Created Elasticsearch mapping [{}]", jsonBuilder.string());
}
catch (IOException e)
{
throw new UncheckedIOException(e);
}
}
@Override
public void createMappings(EntityMetaData entityMetaData, boolean storeSource, boolean enableNorms,
boolean createAllIndex)
{
createMappings(indexName, entityMetaData, storeSource, enableNorms, createAllIndex);
}
@Override
public void refresh(EntityMetaData entityMeta)
{
String transactionId = getCurrentTransactionId();
if (transactionId != null && !NON_TRANSACTIONAL_ENTITIES.contains(entityMeta.getName()))
{
refresh(transactionId);
}
else
{
refresh(indexName);
}
}
private void refresh(String index)
{
if (LOG.isTraceEnabled()) LOG.trace("Refreshing Elasticsearch index [{}] ...", index);
elasticsearchUtils.refreshIndex(index);
if (LOG.isDebugEnabled()) LOG.debug("Refreshed Elasticsearch index [{}]", index);
}
@Override
public long count(EntityMetaData entityMetaData)
{
return count(null, entityMetaData);
}
@Override
public long count(Query q, EntityMetaData entityMetaData)
{
String entityName = entityMetaData.getName();
String type = sanitizeMapperType(entityName);
if (LOG.isTraceEnabled())
{
if (q != null)
{
LOG.trace("Counting Elasticsearch [{}] docs using query [{}] ...", type, q);
}
else
{
LOG.trace("Counting Elasticsearch [{}] docs", type);
}
}
SearchRequestBuilder searchRequestBuilder = client.prepareSearch(indexName);
generator.buildSearchRequest(searchRequestBuilder, type, SearchType.COUNT, q, null, null, null, entityMetaData);
SearchResponse searchResponse = searchRequestBuilder.get();
if (searchResponse.getFailedShards() > 0)
{
throw new ElasticsearchException("Search failed. Returned headers:" + searchResponse.getHeaders());
}
long count = searchResponse.getHits().totalHits();
if (LOG.isDebugEnabled())
{
long ms = searchResponse.getTookInMillis();
if (q != null)
{
LOG.debug("Counted {} Elasticsearch [{}] docs using query [{}] in {}ms", count, type, q, ms);
}
else
{
LOG.debug("Counted {} Elasticsearch [{}] docs in {}ms", count, type, ms);
}
}
String transactionId = getCurrentTransactionId();
if (transactionId != null && !NON_TRANSACTIONAL_ENTITIES.contains(entityMetaData.getName()))
{
if (hasMapping(transactionId, entityMetaData))
{
// count added entities in transaction index
Query countAddedQ = q != null ? new QueryImpl(q) : new QueryImpl();
if (countAddedQ.getRules() != null && !countAddedQ.getRules().isEmpty())
{
countAddedQ.and();
}
countAddedQ.eq(CRUD_TYPE_FIELD_NAME, CrudType.ADD.toString());
SearchRequestBuilder countAddSearchRequestBuilder = client.prepareSearch(transactionId);
generator.buildSearchRequest(countAddSearchRequestBuilder, type, SearchType.COUNT, countAddedQ, null,
null, null, entityMetaData);
SearchResponse countAddSearchResponse = countAddSearchRequestBuilder.get();
if (countAddSearchResponse.getFailedShards() > 0)
{
throw new ElasticsearchException(
"Search failed. Returned headers:" + countAddSearchResponse.getHeaders());
}
long addedCount = countAddSearchResponse.getHits().totalHits();
// count deleted entities in transaction index
Query countDeletedQ = q != null ? new QueryImpl(q) : new QueryImpl();
if (countDeletedQ.getRules() != null && !countDeletedQ.getRules().isEmpty())
{
countDeletedQ.and();
}
countDeletedQ.eq(CRUD_TYPE_FIELD_NAME, CrudType.DELETE.toString());
SearchRequestBuilder countDeletedSearchRequestBuilder = client.prepareSearch(transactionId);
generator.buildSearchRequest(countDeletedSearchRequestBuilder, type, SearchType.COUNT, countDeletedQ,
null, null, null, entityMetaData);
SearchResponse countDeletedSearchResponse = countDeletedSearchRequestBuilder.get();
if (countDeletedSearchResponse.getFailedShards() > 0)
{
throw new ElasticsearchException(
"Search failed. Returned headers:" + countDeletedSearchResponse.getHeaders());
}
long deletedCount = countDeletedSearchResponse.getHits().totalHits();
count = count + addedCount - deletedCount;
}
}
return count;
}
@Override
public void index(Entity entity, EntityMetaData entityMetaData, IndexingMode indexingMode)
{
index(entity, entityMetaData, indexingMode, true);
}
private void index(Entity entity, EntityMetaData entityMetaData, IndexingMode indexingMode, boolean updateIndex)
{
String transactionId = null;
if (!NON_TRANSACTIONAL_ENTITIES.contains(entityMetaData.getName()))
{
transactionId = getCurrentTransactionId();
}
String index = transactionId != null ? transactionId : indexName;
CrudType crudType = indexingMode == IndexingMode.ADD ? CrudType.ADD : CrudType.UPDATE;
index(index, Collections.singleton(entity).iterator(), entityMetaData, crudType, updateIndex);
}
@Override
public long index(Iterable extends Entity> entities, EntityMetaData entityMetaData, IndexingMode indexingMode)
{
String transactionId = null;
if (!NON_TRANSACTIONAL_ENTITIES.contains(entityMetaData.getName()))
{
transactionId = getCurrentTransactionId();
}
String index = transactionId != null ? transactionId : indexName;
CrudType crudType = indexingMode == IndexingMode.ADD ? CrudType.ADD : CrudType.UPDATE;
return index(index, entities.iterator(), entityMetaData, crudType, true);
}
@Override
public long index(Stream extends Entity> entities, EntityMetaData entityMetaData, IndexingMode indexingMode)
{
String transactionId = null;
if (!NON_TRANSACTIONAL_ENTITIES.contains(entityMetaData.getName()))
{
transactionId = getCurrentTransactionId();
}
String index = transactionId != null ? transactionId : indexName;
CrudType crudType = indexingMode == IndexingMode.ADD ? CrudType.ADD : CrudType.UPDATE;
return index(index, entities.iterator(), entityMetaData, crudType, true);
}
private String getCurrentTransactionId()
{
return (String) TransactionSynchronizationManager.getResource(TRANSACTION_ID_RESOURCE_NAME);
}
long index(String index, Iterator extends Entity> it, EntityMetaData entityMetaData, CrudType crudType,
boolean updateIndex)
{
String entityName = entityMetaData.getName();
String type = sanitizeMapperType(entityName);
String transactionId = null;
if (!NON_TRANSACTIONAL_ENTITIES.contains(entityMetaData.getName()))
{
transactionId = getCurrentTransactionId();
}
long nrIndexedEntities = 0;
BulkProcessor bulkProcessor = BULK_PROCESSOR_FACTORY.create(client);
try
{
if (transactionId != null)
{
// store entities in the index related to this transaction even
// if the entity should not be stored in
// the index, after transaction commit the transaction index is
// merged with the main index. Based on the
// main index mapping the data is (not) stored. The transaction
// index is removed after transaction
// commit or rollback.
if (!hasMapping(transactionId, entityMetaData))
{
createMappings(transactionId, entityMetaData, true, true, true);
}
}
while (it.hasNext())
{
Entity entity = it.next();
String id = toElasticsearchId(entity, entityMetaData);
Map source = elasticsearchEntityFactory.create(entityMetaData, entity);
if (transactionId != null)
{
if (crudType == CrudType.UPDATE)
{
// updating a document in the transactional index is the same as adding the new updated
// document.
GetResponse response = client.prepareGet(transactionId, type, id).get();
if (LOG.isDebugEnabled())
{
LOG.debug("Retrieved document type [{}] with id [{}] in index [{}]", type, id,
transactionId);
}
if (response.isExists())
{
crudType = CrudType.ADD;
}
}
source.put(CRUD_TYPE_FIELD_NAME, crudType.name());
}
if (LOG.isDebugEnabled())
{
LOG.debug("Indexing [{}] with id [{}] in index [{}] mode [{}] ...", type, id, index, crudType);
}
bulkProcessor.add(new IndexRequest().index(index).type(type).id(id).source(source));
++nrIndexedEntities;
// If not in transaction, update references now, if in transaction the
// references are updated in
// the commitTransaction method
if (updateIndex && (crudType == CrudType.UPDATE) && (transactionId == null))
{
updateReferences(entity, entityMetaData);
}
}
}
finally
{
elasticsearchUtils.waitForCompletion(bulkProcessor);
}
return nrIndexedEntities;
}
@Override
public void delete(Entity entity, EntityMetaData entityMetaData)
{
String elasticsearchId = toElasticsearchId(entity, entityMetaData);
deleteById(elasticsearchId, entityMetaData);
}
@Override
public void deleteById(String id, EntityMetaData entityMetaData)
{
if (!canBeDeleted(Arrays.asList(id), entityMetaData))
{
throw new MolgenisDataException(
"Cannot delete entity because there are other entities referencing it. Delete these first.");
}
String transactionId = getCurrentTransactionId();
if (transactionId == null || NON_TRANSACTIONAL_ENTITIES.contains(entityMetaData.getName()))
{
deleteById(indexName, id, entityMetaData);
}
else
{
// Check if delete from main index or if it is delete from entity
// that is not committed yet and is in the
// temp index
String type = sanitizeMapperType(entityMetaData.getName());
GetResponse response = client.prepareGet(indexName, type, id).get();
if (LOG.isDebugEnabled())
{
LOG.debug("Retrieved document type [{}] with id [{}] in index [{}]", type, id, indexName);
}
if (response.isExists())
{
// Copy to temp transaction index and mark as deleted
Map source = response.getSource();
Entity entity;
if (source != null)
{
entity = elasticsearchEntityFactory.create(entityMetaData, source, null);
}
else
{
entity = dataService.findOne(entityMetaData.getName(), id);
}
index(transactionId, Collections.singleton(entity).iterator(), entityMetaData, CrudType.DELETE, false);
}
else
{
deleteById(transactionId, id, entityMetaData);
}
}
}
private void deleteById(String index, String id, EntityMetaData entityMetaData)
{
String entityName = entityMetaData.getName();
String type = sanitizeMapperType(entityName);
if (LOG.isTraceEnabled())
{
LOG.trace("Deleting Elasticsearch '" + type + "' doc with id [" + id + "] ...");
}
GetResponse response = client.prepareGet(index, type, id).get();
if (LOG.isDebugEnabled())
{
LOG.debug("Retrieved document type [{}] with id [{}] in index [{}]", type, id, index);
}
if (response.isExists())
{
client.prepareDelete(index, type, id).get();
}
if (LOG.isDebugEnabled())
{
LOG.debug("Deleted Elasticsearch '" + type + "' doc with id [" + id + "]");
}
}
@Override
public void deleteById(Stream ids, EntityMetaData entityMetaData)
{
ids.forEach(id -> deleteById(id, entityMetaData));
}
@Override
public void delete(Iterable extends Entity> entities, EntityMetaData entityMetaData)
{
delete(stream(entities.spliterator(), true), entityMetaData);
}
@Override
public void delete(Stream extends Entity> entities, EntityMetaData entityMetaData)
{
Stream
© 2015 - 2025 Weber Informatics LLC | Privacy Policy