All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.application.container.DocumentProcessing Maven / Gradle / Ivy

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.application.container;

import com.yahoo.api.annotations.Beta;
import com.yahoo.component.ComponentSpecification;
import com.yahoo.docproc.impl.DocprocExecutor;
import com.yahoo.docproc.impl.DocprocService;
import com.yahoo.docproc.DocumentProcessor;
import com.yahoo.docproc.jdisc.DocumentProcessingHandler;
import com.yahoo.document.DocumentType;
import com.yahoo.document.DocumentTypeManager;
import com.yahoo.document.annotation.AnnotationType;
import com.yahoo.processing.execution.chain.ChainRegistry;

import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/**
 * For doing document processing with {@link JDisc}.
 *
 * @author Einar M R Rosenvinge
 */
@Beta
public final class DocumentProcessing {

    private final DocumentProcessingHandler handler;
    private final Map documentTypes;

    DocumentProcessing(DocumentProcessingHandler handler) {
        this.handler = handler;
        documentTypes = retrieveDocumentTypes(handler.getDocumentTypeManager());
    }

    private static Map retrieveDocumentTypes(DocumentTypeManager documentTypeManager) {
        Map documentTypes = new HashMap<>() ;
         for (Iterator i = documentTypeManager.documentTypeIterator(); i.hasNext();) {
            DocumentType type = i.next();
            documentTypes.put(type.getName(), type);
        }
        return Collections.unmodifiableMap(documentTypes);
    }

    /**
     * Processes the given Processing through the specified chain. Note that if one
     * {@link com.yahoo.docproc.DocumentProcessor DocumentProcessor} in the
     * chain returns a {@link com.yahoo.docproc.DocumentProcessor.LaterProgress DocumentProcessor.LaterProgress},
     * the calling thread will sleep for the duration
     * specified in {@link com.yahoo.docproc.DocumentProcessor.LaterProgress#getDelay() DocumentProcessor.LaterProgress#getDelay()},
     * and then run again. This method will hence return when a document processor returns
     * {@link com.yahoo.docproc.DocumentProcessor.Progress#DONE DocumentProcessor.Progress#DONE} or
     * {@link com.yahoo.docproc.DocumentProcessor.Progress#FAILED DocumentProcessor.Progress#FAILED}, throws an exception,
     * or if the calling thread is interrupted. This method will never return a
     * {@link com.yahoo.docproc.DocumentProcessor.LaterProgress DocumentProcessor.LaterProgress}.
     *
     * @param chain the specification of the chain to execute
     * @param processing the Processing to process
     * @return Progress.DONE or Progress.FAILED
     * @throws RuntimeException if one of the document processors in the chain throws, or if the calling thread is interrupted
     */
    public DocumentProcessor.Progress process(ComponentSpecification chain, com.yahoo.docproc.Processing processing) {
        DocprocExecutor executor = getExecutor(chain);
        return executor.processUntilDone(processing);
    }

    /**
     * Processes the given Processing through the specified chain. Note that if one
     * {@link com.yahoo.docproc.DocumentProcessor DocumentProcessor} in the
     * chain returns a {@link com.yahoo.docproc.DocumentProcessor.LaterProgress DocumentProcessor.LaterProgress},
     * it will be returned by this method. This method will hence return whenever a document processor returns any
     * {@link com.yahoo.docproc.DocumentProcessor.Progress DocumentProcessor.Progress}, or
     * throws an exception.
     *
     * @param chain the specification of the chain to execute
     * @param processing the Processing to process
     * @return any Progress
     * @throws RuntimeException if one of the document processors in the chain throws
     */
    public DocumentProcessor.Progress processOnce(ComponentSpecification chain, com.yahoo.docproc.Processing processing) {
        DocprocExecutor executor = getExecutor(chain);
        return executor.process(processing);
    }

    private DocprocExecutor getExecutor(ComponentSpecification chain) {
        DocprocService service = handler.getDocprocServiceRegistry().getComponent(chain);
        if (service == null) {
            throw new IllegalArgumentException("No such chain: " + chain);
        }
        return service.getExecutor();
    }

    /**
     * Returns a registry of configured docproc chains.
     *
     * @return a registry of configured docproc chains
     */
    public ChainRegistry getChains() {
        return handler.getChains();
    }

    public Map getDocumentTypes() {
        return documentTypes;
    }

    public Map getAnnotationTypes() {
        return handler.getDocumentTypeManager().getAnnotationTypeRegistry().getTypes();
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy