All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hibernate.search.engine.impl.WorkPlan Maven / Gradle / Ivy

There is a newer version: 5.11.12.Final
Show newest version
/*
 * Hibernate Search, full-text search for your domain model
 *
 * License: GNU Lesser General Public License (LGPL), version 2.1 or later
 * See the lgpl.txt file in the root directory or .
 */
package org.hibernate.search.engine.impl;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.hibernate.search.exception.AssertionFailure;
import org.hibernate.search.exception.SearchException;
import org.hibernate.search.backend.LuceneWork;
import org.hibernate.search.backend.PurgeAllLuceneWork;
import org.hibernate.search.backend.spi.DeleteByQueryLuceneWork;
import org.hibernate.search.backend.spi.DeleteByQueryWork;
import org.hibernate.search.backend.spi.DeletionQuery;
import org.hibernate.search.backend.spi.Work;
import org.hibernate.search.backend.spi.WorkType;
import org.hibernate.search.bridge.spi.ConversionContext;
import org.hibernate.search.bridge.util.impl.ContextualExceptionBridgeHelper;
import org.hibernate.search.engine.integration.impl.ExtendedSearchIntegrator;
import org.hibernate.search.engine.spi.AbstractDocumentBuilder;
import org.hibernate.search.engine.spi.ContainedInRecursionContext;
import org.hibernate.search.engine.spi.DocumentBuilderContainedEntity;
import org.hibernate.search.engine.spi.EntityIndexBinding;
import org.hibernate.search.indexes.interceptor.EntityIndexingInterceptor;
import org.hibernate.search.indexes.interceptor.IndexingOverride;
import org.hibernate.search.spi.IndexedTypeIdentifier;
import org.hibernate.search.spi.InstanceInitializer;
import org.hibernate.search.spi.impl.PojoIndexedTypeIdentifier;
import org.hibernate.search.util.logging.impl.Log;
import org.hibernate.search.util.logging.impl.LoggerFactory;
import java.lang.invoke.MethodHandles;

/**
 * Represents the set of changes going to be applied to the index for the entities. A stream of Work is feed as input, a
 * list of LuceneWork is output, and in the process we try to reduce the number of output operations to the minimum
 * needed to reach the same final state.
 *
 * @author Sanne Grinovero
 * @author Hardy Ferentschik
 * @author Martin Braun
 * @since 3.3
 */
@SuppressWarnings( { "rawtypes", "unchecked" })
public class WorkPlan {

	private static final Log log = LoggerFactory.make( MethodHandles.lookup() );

	/*
	 * Using a LinkedHashMap to ensure the order will be stable from one run to another.
	 * This changes everything when debugging...
	 */
	private final Map byClass = new LinkedHashMap();

	private final ExtendedSearchIntegrator extendedIntegrator;

	private final InstanceInitializer instanceInitializer;

	/**
	 * most work is split in two, some other might cancel one or more existing works,
	 * we don't track the number accurately as that's not needed.
	 */
	private int approximateWorkQueueSize = 0;

	public WorkPlan(ExtendedSearchIntegrator extendedIntegrator) {
		this.extendedIntegrator = extendedIntegrator;
		this.instanceInitializer = extendedIntegrator.getInstanceInitializer();
	}

	/**
	 * Adds a work to be performed as part of the final plan.
	 *
	 * @param work The work instance to add to the work plan
	 */
	public void addWork(Work work) {
		approximateWorkQueueSize++;
		IndexedTypeIdentifier typeIdentifier = instanceInitializer.getIndexedTypeIdFromWork( work );
		PerClassWork classWork = getClassWork( work.getTenantIdentifier(), typeIdentifier );
		classWork.addWork( work );
	}

	/**
	 * Removes all scheduled work
	 */
	public void clear() {
		byClass.clear();
		approximateWorkQueueSize = 0;
	}

	/**
	 * Returns an approximation of the amount of work in the queue.
	 * This is meant for resource control for auto flushing of large pending batches.
	 *
	 * @return the approximation
	 * @see org.hibernate.search.cfg.Environment#QUEUEINGPROCESSOR_BATCHSIZE
	 */
	public int size() {
		return approximateWorkQueueSize;
	}

	/**
	 * @param tenantId the tenant identifier
	 * @param typeIdentifier The entity class for which to retrieve the work
	 *
	 * @return returns (and creates if needed) the {@code PerClassWork} from the {@link #byClass} map.
	 * @throws SearchException if the specified type is not @Indexed nor hosts any @ContainedIn
	 */
	private PerClassWork getClassWork(String tenantId, IndexedTypeIdentifier typeIdentifier) {
		PerClassWork classWork = getClassWorkIfConfigured( tenantId, typeIdentifier );
		if ( classWork == null ) {
			throw new SearchException(
					"Unable to perform work. Entity Class is not @Indexed nor hosts @ContainedIn: " + typeIdentifier
			);
		}
		return classWork;
	}

	/**
	 * @param tenantId the tenant identifier
	 * @param typeIdentifier The entity class for which to retrieve the work
	 *
	 * @return the {@code PerClassWork} from the {@link #byClass} map, creating it if necessary and if possible
	 * (if the type is configured). Otherwise, return null.
	 */
	private PerClassWork getClassWorkIfConfigured(String tenantId, IndexedTypeIdentifier typeIdentifier) {
		PerClassWork classWork = byClass.get( typeIdentifier );
		if ( classWork == null ) {
			AbstractDocumentBuilder documentBuilder = getDocumentBuilderIfConfigured( extendedIntegrator, typeIdentifier );
			if ( documentBuilder == null ) {
				return null;
			}
			else {
				classWork = new PerClassWork( tenantId, typeIdentifier, documentBuilder );
				byClass.put( typeIdentifier, classWork );
			}
		}
		return classWork;
	}

	/**
	 * Makes sure that all additional work needed because of containedIn
	 * is added to the work plan.
	 */
	public void processContainedInAndPrepareExecution() {
		PerClassWork[] worksFromEvents = byClass.values().toArray( new PerClassWork[byClass.size()] );

		// We need to iterate on a "frozen snapshot" of the byClass values
		// because of HSEARCH-647. This method is not recursive, invoked
		// only after the current unit of work is complete, and all additional
		// work we add through recursion is already complete, so we don't need
		// to process again new classes we add during the process.
		for ( PerClassWork perClassWork : worksFromEvents ) {
			perClassWork.processContainedInAndPrepareExecution();
		}
	}

	/**
	 * Used for recursive processing of containedIn
	 *
	 * @param  the type of the entity
	 * @param value the entity to be processed
	 * @param context the validator for the depth constraints
	 * @param tenantId the tenant identifier. It can be null.
	 */
	public  void recurseContainedIn(T value, ContainedInRecursionContext context, String tenantId) {
		Class entityClass = instanceInitializer.getClass( value );
		//TODO separate the ContainedIn processing in its own registry of types
		PerClassWork classWork = getClassWorkIfConfigured( tenantId, new PojoIndexedTypeIdentifier( entityClass ) );
		if ( classWork != null ) {
			classWork.recurseContainedIn( value, context );
		}
	}

	/**
	 * @return returns the current plan converted as a list of {@code LuceneWork}
	 */
	public List getPlannedLuceneWork() {
		List luceneQueue = new ArrayList();
		for ( PerClassWork perClassWork : byClass.values() ) {
			perClassWork.enqueueLuceneWork( luceneQueue );
		}
		return luceneQueue;
	}

	/**
	 * {@code PerClassWork} organizes work per entity type.
	 */
	class PerClassWork {

		/**
		 * We further organize work per entity identifier so that we can cancel or adapt work being done
		 * on the same entities.
		 * 

* This map uses as key what we originally received as {@link Work#getId()} if the type * is annotated with @ProvidedId, otherwise it uses the value pointed to by * {@link org.hibernate.search.annotations.DocumentId} or as last attempt {@code javax.persistence.Id}. *

* We use a LinkedHashMap to ensure the order will be stable from one run to another. * This changes everything when debugging... */ private final Map entityById = new LinkedHashMap(); /** * When a PurgeAll operation is send on the type, we can remove all previously scheduled work * and remember that the first operation on the index is going to be a purge all. */ private boolean purgeAll = false; private List deletionQueries = new ArrayList<>(); /** * The type of all classes being managed */ private final IndexedTypeIdentifier typeIdentifier; private final String tenantId; /** * The DocumentBuilder relative to the type being managed */ private final AbstractDocumentBuilder documentBuilder; /** * The entity {@link #typeIdentifier} does not have its own index, but is only used in contained scenarios */ private final boolean containedInOnly; /** * @param tenantId the tenant ID * @param typeIdentifier The type of entities being managed by this instance * @param documentBuilder The document builder to use for entities managed by this instance */ PerClassWork(String tenantId, IndexedTypeIdentifier typeIdentifier, AbstractDocumentBuilder documentBuilder) { this.typeIdentifier = typeIdentifier; this.documentBuilder = documentBuilder; this.containedInOnly = documentBuilder instanceof DocumentBuilderContainedEntity; this.tenantId = tenantId; } /** * Adds a work to the current plan. The entityClass of the work must be of the * type managed by this. * * @param work the {@code Work} instance to add to the plan */ public void addWork(Work work) { if ( work.getType() == WorkType.PURGE_ALL ) { entityById.clear(); this.deletionQueries.clear(); purgeAll = true; } else if ( work.getType() == WorkType.DELETE_BY_QUERY ) { DeleteByQueryWork delWork = (DeleteByQueryWork) work; this.deletionQueries.add( delWork.getDeleteByQuery() ); } else { Serializable id = extractProperId( work ); PerEntityWork entityWork = entityById.get( id ); if ( entityWork == null ) { entityWork = new PerEntityWork( work ); entityById.put( id, entityWork ); } entityWork.addWork( work ); } } /** * We need to make a difference on which value is used as identifier * according to use case and mapping options * * @param work The work instance from which to extract the id * * @return the appropriate id to use for this work */ private Serializable extractProperId(Work work) { // see HSEARCH-662 if ( containedInOnly ) { return work.getId(); } Object entity = work.getEntity(); // 1) entity is null for purge operation, which requires to trust the work id // 2) types mapped as provided id require to use the work id // 3) when Hibernate identifier rollback is used && this identifier is our same id source, we need to get the value from work id if ( entity == null || documentBuilder.requiresProvidedId() || ( work.isIdentifierWasRolledBack() && documentBuilder.isIdMatchingJpaId() ) ) { return work.getId(); } else { return documentBuilder.getId( entity ); } } /** * Enqueues all work needed to be performed according to current state into * the LuceneWork queue. * * @param luceneQueue work will be appended to this list */ public void enqueueLuceneWork(List luceneQueue) { final Set> entityInstances = entityById.entrySet(); final ConversionContext conversionContext = new ContextualExceptionBridgeHelper(); if ( purgeAll ) { luceneQueue.add( new PurgeAllLuceneWork( tenantId, typeIdentifier ) ); } for ( DeletionQuery delQuery : this.deletionQueries ) { luceneQueue.add( new DeleteByQueryLuceneWork( tenantId, typeIdentifier, delQuery ) ); } for ( Entry entry : entityInstances ) { Serializable indexingId = entry.getKey(); PerEntityWork perEntityWork = entry.getValue(); String tenantIdentifier = perEntityWork.getTenantIdentifier(); perEntityWork.enqueueLuceneWork( tenantIdentifier, typeIdentifier, indexingId, documentBuilder, luceneQueue, conversionContext ); } } /** * Starts processing the {@code ContainedIn} annotation for all instances stored in * {@link #entityById}. * * This processing must be performed when no more work is being collected by the event * system. The processing might recursively add more work to the plan. */ public void processContainedInAndPrepareExecution() { Entry[] entityInstancesFrozenView = new Entry[entityById.size()]; entityInstancesFrozenView = entityById.entrySet().toArray( entityInstancesFrozenView ); for ( Entry entry : entityInstancesFrozenView ) { PerEntityWork perEntityWork = entry.getValue(); perEntityWork.processContainedIn( documentBuilder, WorkPlan.this ); } } /** * Method to continue the recursion for ContainedIn processing, as started by {@link #processContainedInAndPrepareExecution()} * Additional work that needs to be processed will be added to this same WorkPlan. * * @param value the instance to be processed */ void recurseContainedIn(Object value, ContainedInRecursionContext context) { if ( documentBuilder.requiresProvidedId() ) { log.containedInPointsToProvidedId( instanceInitializer.getClass( value ) ); } else { Serializable extractedId = documentBuilder.getId( value ); if ( extractedId != null ) { PerEntityWork entityWork = entityById.get( extractedId ); if ( entityWork == null ) { EntityIndexingInterceptor entityInterceptor = getEntityInterceptor(); IndexingOverride operation; if ( entityInterceptor != null ) { operation = entityInterceptor.onUpdate( value ); } else { operation = IndexingOverride.APPLY_DEFAULT; } //TODO there is a small duplication with some of TransactionalWorker.interceptWork // but what would be a proper factored solution? switch ( operation ) { //we are planning an update by default case UPDATE: case APPLY_DEFAULT: entityWork = new PerEntityWork( tenantId, value ); entityById.put( extractedId, entityWork ); break; case SKIP: log.forceSkipIndexOperationViaInterception( this.typeIdentifier, WorkType.UPDATE ); break; case REMOVE: log.forceRemoveOnIndexOperationViaInterception( this.typeIdentifier, WorkType.UPDATE ); Work work = new Work( tenantId, value, extractedId, WorkType.DELETE ); entityWork = new PerEntityWork( work ); entityById.put( extractedId, entityWork ); break; default: throw new AssertionFailure( "Unknown action type: " + operation ); } // recursion starts documentBuilder.appendContainedInWorkForInstance( value, WorkPlan.this, context, tenantId ); } // else nothing to do as it's being processed already } else { // this branch for @ContainedIn recursive work of non-indexed entities // as they don't have an indexingId documentBuilder.appendContainedInWorkForInstance( value, WorkPlan.this, context, tenantId ); } } } private EntityIndexingInterceptor getEntityInterceptor() { EntityIndexBinding indexBindingForEntity = extendedIntegrator.getIndexBinding( this.typeIdentifier ); return indexBindingForEntity != null ? indexBindingForEntity.getEntityIndexingInterceptor() : null; } public String getTenantId() { return tenantId; } } /** * Keeps track of what needs to be done Lucene wise for each entity. * Each entity might need to be deleted from the index, added to the index, * or both; in this case delete will be performed first. */ private static class PerEntityWork { private Object entity; /** * When true, the Lucene Document representing this entity will be deleted * from the index. */ private boolean delete = false; /** * When true, the entity will be converted to a Lucene Document and added * to the index. */ private boolean add = false; /** * Needed to stop recursion for processing ContainedIn * of already processed instances. */ private boolean containedInProcessed = false; private final String tenantId; /** * Constructor to force an update of the entity even without * having a specific Work instance for it. * * @param entity the instance which needs to be updated in the index */ private PerEntityWork(String tenantId, Object entity) { // for updates only this.entity = entity; this.delete = true; this.add = true; this.containedInProcessed = true; this.tenantId = tenantId; } /** * Prepares the initial state of planned changes according * to the type of work being fired. * * @param work the work instance */ private PerEntityWork(Work work) { entity = work.getEntity(); tenantId = work.getTenantIdentifier(); WorkType type = work.getType(); // sets the initial state: switch ( type ) { case ADD: add = true; break; case DELETE: case PURGE: delete = true; break; case COLLECTION: case UPDATE: delete = true; add = true; break; case INDEX: add = true; delete = true; break; case PURGE_ALL: // not breaking intentionally: PURGE_ALL should not reach this // class case DELETE_BY_QUERY: // not breaking intentionally: DELETE_BY_QUERY should not reach // this class default: throw new SearchException( "unexpected state:" + type ); } } /** * Has different effects depending on the new type of work needed * and the previous scheduled work. * This way we never store more than a plan for each entity and order * of final execution is irrelevant, what matters is the order in which the * work is added to the plan. * * @param work the work instance to add */ public void addWork(Work work) { entity = work.getEntity(); WorkType type = work.getType(); switch ( type ) { case INDEX: case UPDATE: if ( add && !delete ) { // noop: the entity was newly created in this same unit of work // so it needs to be added no need to delete } else { add = true; delete = true; } break; case ADD: // Is the only operation which doesn't imply a delete-before-add add = true; // leave delete flag as-is break; case DELETE: case PURGE: if ( add && !delete ) { // the entity was was newly created in this same unit of // work so works counter each other add = false; } else { add = false; delete = true; } break; case COLLECTION: if ( !add && !delete ) { add = true; delete = true; } // nothing to do, as something else was done break; case PURGE_ALL: case DELETE_BY_QUERY: default: throw new SearchException( "unexpected state:" + type ); } } /** * Adds the needed LuceneWork to the queue for this entity instance * * @param tenantIdentifier the tenant identifier * @param typeIdentifier the type * @param indexingId identifier of the instance * @param entityBuilder the DocumentBuilder for this type * @param luceneQueue the queue collecting all changes */ public void enqueueLuceneWork(String tenantIdentifier, IndexedTypeIdentifier typeIdentifier, Serializable indexingId, AbstractDocumentBuilder entityBuilder, List luceneQueue, ConversionContext conversionContext) { if ( add || delete ) { entityBuilder.addWorkToQueue( tenantIdentifier, typeIdentifier, entity, indexingId, delete, add, luceneQueue, conversionContext ); } } /** * Works via recursion passing the WorkPlan over, so that additional work can be planned * according to the needs of ContainedIn processing. * * @param entityBuilder the DocumentBuilder for this type * @param workplan the current WorkPlan, used for recursion * * @see org.hibernate.search.annotations.ContainedIn */ public void processContainedIn(AbstractDocumentBuilder entityBuilder, WorkPlan workplan) { if ( entity != null && !containedInProcessed ) { containedInProcessed = true; if ( add || delete ) { entityBuilder.appendContainedInWorkForInstance( entity, workplan, null, getTenantIdentifier() ); } } } public String getTenantIdentifier() { return tenantId; } } /** * Get the DocumentBuilder for this type. * * @param extendedIntegrator the search factory (implementor) * @param typeIdentifier the entity type for which to retrieve the document builder * * @return the DocumentBuilder for this type */ private static AbstractDocumentBuilder getDocumentBuilderIfConfigured(ExtendedSearchIntegrator extendedIntegrator, IndexedTypeIdentifier typeIdentifier) { EntityIndexBinding entityIndexBinding = extendedIntegrator.getIndexBinding( typeIdentifier ); if ( entityIndexBinding == null ) { DocumentBuilderContainedEntity entityBuilder = extendedIntegrator.getDocumentBuilderContainedEntity( typeIdentifier ); if ( entityBuilder == null ) { /* * May happen when @ContainedIn targets a non-configured type (not @Indexed nor with any @ContainedIn), * which is a legitimate use case when this type has a configured subtype. * Just return null. */ return null; } else { return entityBuilder; } } else { return entityIndexBinding.getDocumentBuilder(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy