org.hibernate.search.engine.impl.WorkPlan Maven / Gradle / Ivy
Show all versions of hibernate-search-engine Show documentation
/*
* Hibernate Search, full-text search for your domain model
*
* License: GNU Lesser General Public License (LGPL), version 2.1 or later
* See the lgpl.txt file in the root directory or .
*/
package org.hibernate.search.engine.impl;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.hibernate.search.exception.AssertionFailure;
import org.hibernate.search.exception.SearchException;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.PurgeAllLuceneWork;
import org.hibernate.search.backend.spi.DeleteByQueryLuceneWork;
import org.hibernate.search.backend.spi.DeleteByQueryWork;
import org.hibernate.search.backend.spi.DeletionQuery;
import org.hibernate.search.backend.spi.Work;
import org.hibernate.search.backend.spi.WorkType;
import org.hibernate.search.bridge.spi.ConversionContext;
import org.hibernate.search.bridge.util.impl.ContextualExceptionBridgeHelper;
import org.hibernate.search.engine.integration.impl.ExtendedSearchIntegrator;
import org.hibernate.search.engine.spi.AbstractDocumentBuilder;
import org.hibernate.search.engine.spi.ContainedInRecursionContext;
import org.hibernate.search.engine.spi.DocumentBuilderContainedEntity;
import org.hibernate.search.engine.spi.EntityIndexBinding;
import org.hibernate.search.indexes.interceptor.EntityIndexingInterceptor;
import org.hibernate.search.indexes.interceptor.IndexingOverride;
import org.hibernate.search.spi.IndexedTypeIdentifier;
import org.hibernate.search.spi.InstanceInitializer;
import org.hibernate.search.spi.impl.PojoIndexedTypeIdentifier;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;
import java.lang.invoke.MethodHandles;
/**
* Represents the set of changes going to be applied to the index for the entities. A stream of Work is feed as input, a
* list of LuceneWork is output, and in the process we try to reduce the number of output operations to the minimum
* needed to reach the same final state.
*
* @author Sanne Grinovero
* @author Hardy Ferentschik
* @author Martin Braun
* @since 3.3
*/
@SuppressWarnings( { "rawtypes", "unchecked" })
public class WorkPlan {
private static final Log log = LoggerFactory.make( MethodHandles.lookup() );
/*
* Using a LinkedHashMap to ensure the order will be stable from one run to another.
* This changes everything when debugging...
*/
private final Map byClass = new LinkedHashMap();
private final ExtendedSearchIntegrator extendedIntegrator;
private final InstanceInitializer instanceInitializer;
/**
* most work is split in two, some other might cancel one or more existing works,
* we don't track the number accurately as that's not needed.
*/
private int approximateWorkQueueSize = 0;
public WorkPlan(ExtendedSearchIntegrator extendedIntegrator) {
this.extendedIntegrator = extendedIntegrator;
this.instanceInitializer = extendedIntegrator.getInstanceInitializer();
}
/**
* Adds a work to be performed as part of the final plan.
*
* @param work The work instance to add to the work plan
*/
public void addWork(Work work) {
approximateWorkQueueSize++;
IndexedTypeIdentifier typeIdentifier = instanceInitializer.getIndexedTypeIdFromWork( work );
PerClassWork classWork = getClassWork( work.getTenantIdentifier(), typeIdentifier );
classWork.addWork( work );
}
/**
* Removes all scheduled work
*/
public void clear() {
byClass.clear();
approximateWorkQueueSize = 0;
}
/**
* Returns an approximation of the amount of work in the queue.
* This is meant for resource control for auto flushing of large pending batches.
*
* @return the approximation
* @see org.hibernate.search.cfg.Environment#QUEUEINGPROCESSOR_BATCHSIZE
*/
public int size() {
return approximateWorkQueueSize;
}
/**
* @param tenantId the tenant identifier
* @param typeIdentifier The entity class for which to retrieve the work
*
* @return returns (and creates if needed) the {@code PerClassWork} from the {@link #byClass} map.
* @throws SearchException if the specified type is not @Indexed nor hosts any @ContainedIn
*/
private PerClassWork getClassWork(String tenantId, IndexedTypeIdentifier typeIdentifier) {
PerClassWork classWork = getClassWorkIfConfigured( tenantId, typeIdentifier );
if ( classWork == null ) {
throw new SearchException(
"Unable to perform work. Entity Class is not @Indexed nor hosts @ContainedIn: " + typeIdentifier
);
}
return classWork;
}
/**
* @param tenantId the tenant identifier
* @param typeIdentifier The entity class for which to retrieve the work
*
* @return the {@code PerClassWork} from the {@link #byClass} map, creating it if necessary and if possible
* (if the type is configured). Otherwise, return null.
*/
private PerClassWork getClassWorkIfConfigured(String tenantId, IndexedTypeIdentifier typeIdentifier) {
PerClassWork classWork = byClass.get( typeIdentifier );
if ( classWork == null ) {
AbstractDocumentBuilder documentBuilder = getDocumentBuilderIfConfigured( extendedIntegrator, typeIdentifier );
if ( documentBuilder == null ) {
return null;
}
else {
classWork = new PerClassWork( tenantId, typeIdentifier, documentBuilder );
byClass.put( typeIdentifier, classWork );
}
}
return classWork;
}
/**
* Makes sure that all additional work needed because of containedIn
* is added to the work plan.
*/
public void processContainedInAndPrepareExecution() {
PerClassWork[] worksFromEvents = byClass.values().toArray( new PerClassWork[byClass.size()] );
// We need to iterate on a "frozen snapshot" of the byClass values
// because of HSEARCH-647. This method is not recursive, invoked
// only after the current unit of work is complete, and all additional
// work we add through recursion is already complete, so we don't need
// to process again new classes we add during the process.
for ( PerClassWork perClassWork : worksFromEvents ) {
perClassWork.processContainedInAndPrepareExecution();
}
}
/**
* Used for recursive processing of containedIn
*
* @param the type of the entity
* @param value the entity to be processed
* @param context the validator for the depth constraints
* @param tenantId the tenant identifier. It can be null.
*/
public void recurseContainedIn(T value, ContainedInRecursionContext context, String tenantId) {
Class entityClass = instanceInitializer.getClass( value );
//TODO separate the ContainedIn processing in its own registry of types
PerClassWork classWork = getClassWorkIfConfigured( tenantId, new PojoIndexedTypeIdentifier( entityClass ) );
if ( classWork != null ) {
classWork.recurseContainedIn( value, context );
}
}
/**
* @return returns the current plan converted as a list of {@code LuceneWork}
*/
public List getPlannedLuceneWork() {
List luceneQueue = new ArrayList();
for ( PerClassWork perClassWork : byClass.values() ) {
perClassWork.enqueueLuceneWork( luceneQueue );
}
return luceneQueue;
}
/**
* {@code PerClassWork} organizes work per entity type.
*/
class PerClassWork {
/**
* We further organize work per entity identifier so that we can cancel or adapt work being done
* on the same entities.
*
* This map uses as key what we originally received as {@link Work#getId()} if the type
* is annotated with @ProvidedId, otherwise it uses the value pointed to by
* {@link org.hibernate.search.annotations.DocumentId} or as last attempt {@code javax.persistence.Id}.
*
* We use a LinkedHashMap to ensure the order will be stable from one run to another.
* This changes everything when debugging...
*/
private final Map entityById = new LinkedHashMap();
/**
* When a PurgeAll operation is send on the type, we can remove all previously scheduled work
* and remember that the first operation on the index is going to be a purge all.
*/
private boolean purgeAll = false;
private List deletionQueries = new ArrayList<>();
/**
* The type of all classes being managed
*/
private final IndexedTypeIdentifier typeIdentifier;
private final String tenantId;
/**
* The DocumentBuilder relative to the type being managed
*/
private final AbstractDocumentBuilder documentBuilder;
/**
* The entity {@link #typeIdentifier} does not have its own index, but is only used in contained scenarios
*/
private final boolean containedInOnly;
/**
* @param tenantId the tenant ID
* @param typeIdentifier The type of entities being managed by this instance
* @param documentBuilder The document builder to use for entities managed by this instance
*/
PerClassWork(String tenantId, IndexedTypeIdentifier typeIdentifier, AbstractDocumentBuilder documentBuilder) {
this.typeIdentifier = typeIdentifier;
this.documentBuilder = documentBuilder;
this.containedInOnly = documentBuilder instanceof DocumentBuilderContainedEntity;
this.tenantId = tenantId;
}
/**
* Adds a work to the current plan. The entityClass of the work must be of the
* type managed by this.
*
* @param work the {@code Work} instance to add to the plan
*/
public void addWork(Work work) {
if ( work.getType() == WorkType.PURGE_ALL ) {
entityById.clear();
this.deletionQueries.clear();
purgeAll = true;
}
else if ( work.getType() == WorkType.DELETE_BY_QUERY ) {
DeleteByQueryWork delWork = (DeleteByQueryWork) work;
this.deletionQueries.add( delWork.getDeleteByQuery() );
}
else {
Serializable id = extractProperId( work );
PerEntityWork entityWork = entityById.get( id );
if ( entityWork == null ) {
entityWork = new PerEntityWork( work );
entityById.put( id, entityWork );
}
entityWork.addWork( work );
}
}
/**
* We need to make a difference on which value is used as identifier
* according to use case and mapping options
*
* @param work The work instance from which to extract the id
*
* @return the appropriate id to use for this work
*/
private Serializable extractProperId(Work work) {
// see HSEARCH-662
if ( containedInOnly ) {
return work.getId();
}
Object entity = work.getEntity();
// 1) entity is null for purge operation, which requires to trust the work id
// 2) types mapped as provided id require to use the work id
// 3) when Hibernate identifier rollback is used && this identifier is our same id source, we need to get the value from work id
if ( entity == null
|| documentBuilder.requiresProvidedId()
|| ( work.isIdentifierWasRolledBack() && documentBuilder.isIdMatchingJpaId() ) ) {
return work.getId();
}
else {
return documentBuilder.getId( entity );
}
}
/**
* Enqueues all work needed to be performed according to current state into
* the LuceneWork queue.
*
* @param luceneQueue work will be appended to this list
*/
public void enqueueLuceneWork(List luceneQueue) {
final Set> entityInstances = entityById.entrySet();
final ConversionContext conversionContext = new ContextualExceptionBridgeHelper();
if ( purgeAll ) {
luceneQueue.add( new PurgeAllLuceneWork( tenantId, typeIdentifier ) );
}
for ( DeletionQuery delQuery : this.deletionQueries ) {
luceneQueue.add( new DeleteByQueryLuceneWork( tenantId, typeIdentifier, delQuery ) );
}
for ( Entry entry : entityInstances ) {
Serializable indexingId = entry.getKey();
PerEntityWork perEntityWork = entry.getValue();
String tenantIdentifier = perEntityWork.getTenantIdentifier();
perEntityWork.enqueueLuceneWork( tenantIdentifier, typeIdentifier, indexingId, documentBuilder, luceneQueue, conversionContext );
}
}
/**
* Starts processing the {@code ContainedIn} annotation for all instances stored in
* {@link #entityById}.
*
* This processing must be performed when no more work is being collected by the event
* system. The processing might recursively add more work to the plan.
*/
public void processContainedInAndPrepareExecution() {
Entry[] entityInstancesFrozenView = new Entry[entityById.size()];
entityInstancesFrozenView = entityById.entrySet().toArray( entityInstancesFrozenView );
for ( Entry entry : entityInstancesFrozenView ) {
PerEntityWork perEntityWork = entry.getValue();
perEntityWork.processContainedIn( documentBuilder, WorkPlan.this );
}
}
/**
* Method to continue the recursion for ContainedIn processing, as started by {@link #processContainedInAndPrepareExecution()}
* Additional work that needs to be processed will be added to this same WorkPlan.
*
* @param value the instance to be processed
*/
void recurseContainedIn(Object value, ContainedInRecursionContext context) {
if ( documentBuilder.requiresProvidedId() ) {
log.containedInPointsToProvidedId( instanceInitializer.getClass( value ) );
}
else {
Serializable extractedId = documentBuilder.getId( value );
if ( extractedId != null ) {
PerEntityWork entityWork = entityById.get( extractedId );
if ( entityWork == null ) {
EntityIndexingInterceptor entityInterceptor = getEntityInterceptor();
IndexingOverride operation;
if ( entityInterceptor != null ) {
operation = entityInterceptor.onUpdate( value );
}
else {
operation = IndexingOverride.APPLY_DEFAULT;
}
//TODO there is a small duplication with some of TransactionalWorker.interceptWork
// but what would be a proper factored solution?
switch ( operation ) {
//we are planning an update by default
case UPDATE:
case APPLY_DEFAULT:
entityWork = new PerEntityWork( tenantId, value );
entityById.put( extractedId, entityWork );
break;
case SKIP:
log.forceSkipIndexOperationViaInterception( this.typeIdentifier, WorkType.UPDATE );
break;
case REMOVE:
log.forceRemoveOnIndexOperationViaInterception( this.typeIdentifier, WorkType.UPDATE );
Work work = new Work( tenantId, value, extractedId, WorkType.DELETE );
entityWork = new PerEntityWork( work );
entityById.put( extractedId, entityWork );
break;
default:
throw new AssertionFailure( "Unknown action type: " + operation );
}
// recursion starts
documentBuilder.appendContainedInWorkForInstance( value, WorkPlan.this, context, tenantId );
}
// else nothing to do as it's being processed already
}
else {
// this branch for @ContainedIn recursive work of non-indexed entities
// as they don't have an indexingId
documentBuilder.appendContainedInWorkForInstance( value, WorkPlan.this, context, tenantId );
}
}
}
private EntityIndexingInterceptor getEntityInterceptor() {
EntityIndexBinding indexBindingForEntity = extendedIntegrator.getIndexBinding(
this.typeIdentifier
);
return indexBindingForEntity != null ? indexBindingForEntity.getEntityIndexingInterceptor() : null;
}
public String getTenantId() {
return tenantId;
}
}
/**
* Keeps track of what needs to be done Lucene wise for each entity.
* Each entity might need to be deleted from the index, added to the index,
* or both; in this case delete will be performed first.
*/
private static class PerEntityWork {
private Object entity;
/**
* When true, the Lucene Document representing this entity will be deleted
* from the index.
*/
private boolean delete = false;
/**
* When true, the entity will be converted to a Lucene Document and added
* to the index.
*/
private boolean add = false;
/**
* Needed to stop recursion for processing ContainedIn
* of already processed instances.
*/
private boolean containedInProcessed = false;
private final String tenantId;
/**
* Constructor to force an update of the entity even without
* having a specific Work instance for it.
*
* @param entity the instance which needs to be updated in the index
*/
private PerEntityWork(String tenantId, Object entity) {
// for updates only
this.entity = entity;
this.delete = true;
this.add = true;
this.containedInProcessed = true;
this.tenantId = tenantId;
}
/**
* Prepares the initial state of planned changes according
* to the type of work being fired.
*
* @param work the work instance
*/
private PerEntityWork(Work work) {
entity = work.getEntity();
tenantId = work.getTenantIdentifier();
WorkType type = work.getType();
// sets the initial state:
switch ( type ) {
case ADD:
add = true;
break;
case DELETE:
case PURGE:
delete = true;
break;
case COLLECTION:
case UPDATE:
delete = true;
add = true;
break;
case INDEX:
add = true;
delete = true;
break;
case PURGE_ALL:
// not breaking intentionally: PURGE_ALL should not reach this
// class
case DELETE_BY_QUERY:
// not breaking intentionally: DELETE_BY_QUERY should not reach
// this class
default:
throw new SearchException( "unexpected state:" + type );
}
}
/**
* Has different effects depending on the new type of work needed
* and the previous scheduled work.
* This way we never store more than a plan for each entity and order
* of final execution is irrelevant, what matters is the order in which the
* work is added to the plan.
*
* @param work the work instance to add
*/
public void addWork(Work work) {
entity = work.getEntity();
WorkType type = work.getType();
switch ( type ) {
case INDEX:
case UPDATE:
if ( add && !delete ) {
// noop: the entity was newly created in this same unit of work
// so it needs to be added no need to delete
}
else {
add = true;
delete = true;
}
break;
case ADD: // Is the only operation which doesn't imply a delete-before-add
add = true;
// leave delete flag as-is
break;
case DELETE:
case PURGE:
if ( add && !delete ) {
// the entity was was newly created in this same unit of
// work so works counter each other
add = false;
}
else {
add = false;
delete = true;
}
break;
case COLLECTION:
if ( !add && !delete ) {
add = true;
delete = true;
}
// nothing to do, as something else was done
break;
case PURGE_ALL:
case DELETE_BY_QUERY:
default:
throw new SearchException( "unexpected state:" + type );
}
}
/**
* Adds the needed LuceneWork to the queue for this entity instance
*
* @param tenantIdentifier the tenant identifier
* @param typeIdentifier the type
* @param indexingId identifier of the instance
* @param entityBuilder the DocumentBuilder for this type
* @param luceneQueue the queue collecting all changes
*/
public void enqueueLuceneWork(String tenantIdentifier, IndexedTypeIdentifier typeIdentifier, Serializable indexingId, AbstractDocumentBuilder entityBuilder,
List luceneQueue, ConversionContext conversionContext) {
if ( add || delete ) {
entityBuilder.addWorkToQueue( tenantIdentifier, typeIdentifier, entity, indexingId, delete, add, luceneQueue, conversionContext );
}
}
/**
* Works via recursion passing the WorkPlan over, so that additional work can be planned
* according to the needs of ContainedIn processing.
*
* @param entityBuilder the DocumentBuilder for this type
* @param workplan the current WorkPlan, used for recursion
*
* @see org.hibernate.search.annotations.ContainedIn
*/
public void processContainedIn(AbstractDocumentBuilder entityBuilder, WorkPlan workplan) {
if ( entity != null && !containedInProcessed ) {
containedInProcessed = true;
if ( add || delete ) {
entityBuilder.appendContainedInWorkForInstance( entity, workplan, null, getTenantIdentifier() );
}
}
}
public String getTenantIdentifier() {
return tenantId;
}
}
/**
* Get the DocumentBuilder for this type.
*
* @param extendedIntegrator the search factory (implementor)
* @param typeIdentifier the entity type for which to retrieve the document builder
*
* @return the DocumentBuilder for this type
*/
private static AbstractDocumentBuilder getDocumentBuilderIfConfigured(ExtendedSearchIntegrator extendedIntegrator,
IndexedTypeIdentifier typeIdentifier) {
EntityIndexBinding entityIndexBinding = extendedIntegrator.getIndexBinding( typeIdentifier );
if ( entityIndexBinding == null ) {
DocumentBuilderContainedEntity entityBuilder = extendedIntegrator.getDocumentBuilderContainedEntity(
typeIdentifier
);
if ( entityBuilder == null ) {
/*
* May happen when @ContainedIn targets a non-configured type (not @Indexed nor with any @ContainedIn),
* which is a legitimate use case when this type has a configured subtype.
* Just return null.
*/
return null;
}
else {
return entityBuilder;
}
}
else {
return entityIndexBinding.getDocumentBuilder();
}
}
}