All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.core.ProcessingComponentSuite Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.carrot2.util.CloseableUtils;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.resource.ResourceLookup;
import org.carrot2.util.simplexml.PersisterHelpers;
import org.simpleframework.xml.ElementList;
import org.simpleframework.xml.Root;
import org.simpleframework.xml.core.Commit;
import org.simpleframework.xml.core.Persister;
import org.simpleframework.xml.strategy.TreeStrategy;

import org.carrot2.shaded.guava.common.collect.Iterables;
import org.carrot2.shaded.guava.common.collect.Iterators;
import org.carrot2.shaded.guava.common.collect.Lists;

/**
 * A set of {@link IProcessingComponent}s used in Carrot2 applications.
 */
@Root(name = "component-suite")
public class ProcessingComponentSuite
{
    @ElementList(inline = true, required = false, entry = "include")
    ArrayList includes;

    @ElementList(name = "sources", entry = "source", required = false)
    private ArrayList sources;

    @ElementList(name = "algorithms", entry = "algorithm", required = false)
    private ArrayList algorithms;

    @ElementList(name = "components", entry = "component", required = false)
    private ArrayList otherComponents;

    public ProcessingComponentSuite()
    {
    }

    public ProcessingComponentSuite(ArrayList sources,
                                    ArrayList algorithms)
    {
        this.algorithms = algorithms;
        this.sources = sources;
        this.otherComponents = Lists.newArrayList();
    }

    /**
     * Returns the internal list of document sources. Changes to this list will affect the
     * suite.
     */
    public List getSources()
    {
        return sources;
    }

    /**
     * Returns the internal list of algorithms. Changes to this list will affect the
     * suite.
     */
    public List getAlgorithms()
    {
        return algorithms;
    }

    /**
     * Return a list of other components (not algorithms, not sources).
     */
    public List getOtherComponents()
    {
        return otherComponents;
    }
    
    /**
     * Returns all components available in this suite, including data sources, algorithms
     * and any other types.
     */
    public List getComponents()
    {
        return Lists.newArrayList(Iterables.concat(sources, algorithms, otherComponents));
    }

    /**
     * Replace missing attributes with empty lists.
     */
    @Commit
    private void postDeserialize(Map session) throws Exception
    {
        if (sources == null) sources = new ArrayList<>();
        if (algorithms == null) algorithms = new ArrayList<>();
        if (includes == null) includes = new ArrayList<>();
        if (otherComponents == null) otherComponents = new ArrayList<>();

        // Acquire contextual resource lookup from the session.
        final ResourceLookup resourceLookup = PersisterHelpers.getResourceLookup(session);

        // Load included suites. Currently, we don't check for cycles.
        final List suites = Lists.newArrayList();

        for (ProcessingComponentSuiteInclude include : includes)
        {
            final IResource resource = resourceLookup.getFirst(include.suite);
            if (resource == null)
            {
                throw new Exception("Could not locate resource: " + include.suite);
            }
            suites.add(deserialize(resource, resourceLookup));
        }

        // Merge sources
        for (ProcessingComponentSuite suite : suites)
        {
            sources.addAll(suite.getSources());
            algorithms.addAll(suite.getAlgorithms());
            otherComponents.addAll(suite.getOtherComponents());
        }
    }

    /**
     * Deserializes component suite information from an XML stream.
     * 
     * @param resource The resource to be deserialized (must not be null).
     * @param resourceLookup Resource lookup utilities for potential included resources. 
     */
    public static ProcessingComponentSuite deserialize(IResource resource,
                                                        ResourceLookup resourceLookup) 
        throws Exception
    {
        if (resource == null)
        {
            throw new IOException("Resource must not be null.");
        }

        final InputStream inputStream = resource.open();
        try
        {
            if (inputStream == null)
            {
                throw new IOException("Input stream must not be null.");
            }
            
            final Persister persister = PersisterHelpers.createPersister(
                resourceLookup, new TreeStrategy());
            final ProcessingComponentSuite suite = persister.read(ProcessingComponentSuite.class, inputStream);
            
            // Clear internals related do deserialization
            suite.includes = null;
            return suite;
        }
        finally
        {
            CloseableUtils.close(inputStream);
        }
    }

    /**
     * Serializes this component suite as an UTF-8 encoded XML.
     */
    public void serialize(OutputStream stream) throws Exception
    {
        new Persister().write(this, stream);
    }

    /**
     * Remove components marked as unavailable from the suite.
     * 
     * @see ProcessingComponentDescriptor#isComponentAvailable()
     */
    public List removeUnavailableComponents()
    {
        ArrayList failed = Lists.newArrayList();
        ProcessingComponentDescriptor p;
        for (Iterator i = Iterators.concat(
            sources.iterator(), algorithms.iterator()); i.hasNext();)
        {
            p = i.next();
            if (!p.isComponentAvailable())
            {
                failed.add(p);
                i.remove();
            }
        }

        return failed;
    }

    /**
     * Returns all processing component configurations available in this suite.
     * 
     * @see Controller#init(Map, ProcessingComponentConfiguration...)
     */
    public ProcessingComponentConfiguration [] getComponentConfigurations()
    {
        final List components = getComponents();
        final ProcessingComponentConfiguration [] result = 
            new ProcessingComponentConfiguration [components.size()];
        int i = 0;
        for (ProcessingComponentDescriptor processingComponentDescriptor : components)
        {
            result[i++] = processingComponentDescriptor.getComponentConfiguration();
        }
        return result;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy