org.carrot2.core.ProcessingResult Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of carrot2-mini Show documentation
Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).
There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2013, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import java.io.*;
import java.util.*;

import org.apache.commons.lang.StringUtils;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.util.MapUtils;
import org.carrot2.util.simplexml.SimpleXmlWrapperValue;
import org.carrot2.util.simplexml.SimpleXmlWrappers;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.util.DefaultPrettyPrinter;
import org.codehaus.jackson.map.ObjectMapper;
import org.simpleframework.xml.*;
import org.simpleframework.xml.core.*;

import com.google.common.collect.*;

/**
 * Encapsulates the results of processing. Provides access to the values of attributes
 * collected after processing and utility methods for obtaining processed documents (
 * {@link #getDocuments()})) and the created clusters ({@link #getClusters()}).
 */
@Root(name = "searchresult", strict = false)
public final class ProcessingResult
{
    /** Attributes collected after processing */
    private Map attributes = Maps.newHashMap();

    /** Read-only view of attributes exposed in {@link #getAttributes()} */
    private Map attributesView;

    /**
     * Query field used during serialization/ deserialization, see
     * {@link #afterDeserialization()} and {@link #beforeSerialization()}
     */
    @Element(required = false)
    private String query;

    /**
     * Documents field used during serialization/ deserialization, see
     * {@link #afterDeserialization()} and {@link #beforeSerialization()}
     */
    @ElementList(inline = true, required = false)
    private List documents;

    /**
     * Clusters field used during serialization/ deserialization, see
     * {@link #afterDeserialization()} and {@link #beforeSerialization()}
     */
    @ElementList(inline = true, required = false)
    private List clusters;

    /** Attributes of this result for serialization/ deserialization purposes. */
    @ElementMap(entry = "attribute", key = "key", attribute = true, inline = true, required = false)
    private HashMap otherAttributesForSerialization;

    /**
     * Parameterless constructor required for XML serialization/ deserialization.
     */
    ProcessingResult()
    {
        this(new HashMap());
    }

    /**
     * Creates a {@link ProcessingResult} with the provided attributes.
     * Assigns unique document identifiers if documents are present in the
     * attributes map (under the key {@link AttributeNames#DOCUMENTS}).
     */
    @SuppressWarnings("unchecked")
    ProcessingResult(Map attributes)
    {
        this.attributes = attributes;

        // Replace a modifiable collection of documents with an unmodifiable one
        final List documents = (List) attributes
            .get(AttributeNames.DOCUMENTS);
        if (documents != null)
        {
            Document.assignDocumentIds(documents);
            attributes.put(AttributeNames.DOCUMENTS,
                Collections.unmodifiableList(documents));
        }

        // Replace a modifiable collection of clusters with an unmodifiable one
        final List clusters = (List) attributes
            .get(AttributeNames.CLUSTERS);
        if (clusters != null)
        {
            Cluster.assignClusterIds(clusters);
            attributes.put(AttributeNames.CLUSTERS,
                Collections.unmodifiableList(clusters));
        }

        // Store a reference to attributes as an unmodifiable map
        this.attributesView = Collections.unmodifiableMap(attributes);

    }

    /**
     * Returns attributes fed-in and collected during processing. The returned map is
     * unmodifiable.
     * 
     * @return attributes fed-in and collected during processing
     */
    public Map getAttributes()
    {
        return attributesView;
    }

    /**
     * Returns a specific attribute of this result set. This method is equivalent to
     * calling {@link #getAttributes()} and then getting the required attribute from the
     * map.
     * 
     * @param key key of the attribute to return
     * @return value of the attribute
     */
    @SuppressWarnings("unchecked")
    public  T getAttribute(String key)
    {
        return (T) attributesView.get(key);
    }

    /**
     * Returns the documents that have been processed. The returned collection is
     * unmodifiable.
     * 
     * @return documents that have been processed or null if no documents are
     *         present in the result.
     */
    @SuppressWarnings("unchecked")
    public List getDocuments()
    {
        return (List) attributes.get(AttributeNames.DOCUMENTS);
    }

    /*
     * TODO: Returning a list of clusters instead of a (possibly artificial) cluster with
     * subclusters adds a little complexity to recursive methods operating on clusters (a
     * natural entry point is a method taking one cluster and acting on subclusters
     * recursively). If we have to start with a list of clusters, we have to handle this
     * special case separately...
     */

    /**
     * Returns the clusters that have been created during processing. The returned list is
     * unmodifiable.
     * 
     * @return clusters created during processing or null if no clusters were
     *         present in the result.
     */
    @SuppressWarnings("unchecked")
    public List getClusters()
    {
        return (List) attributes.get(AttributeNames.CLUSTERS);
    }

    /**
     * Extracts document and cluster lists before serialization.
     */
    @Persist
    @SuppressWarnings("unused")
    private void beforeSerialization()
    {
        /*
         * See http://issues.carrot2.org/browse/CARROT-693; this monitor does not save us
         * in multi-threaded environment anyway. A better solution would be to prepare
         * this eagerly in the constructor, but we try to balance overhead and full
         * correctness here.
         */
        synchronized (this)
        {
            query = (String) attributes.get(AttributeNames.QUERY);

            if (getDocuments() != null)
            {
                documents = Lists.newArrayList(getDocuments());
            }
            else
            {
                documents = null;
            }

            if (getClusters() != null)
            {
                clusters = Lists.newArrayList(getClusters());
            }
            else
            {
                clusters = null;
            }

            otherAttributesForSerialization = MapUtils.asHashMap(SimpleXmlWrappers
                .wrap(attributes));
            otherAttributesForSerialization.remove(AttributeNames.QUERY);
            otherAttributesForSerialization.remove(AttributeNames.CLUSTERS);
            otherAttributesForSerialization.remove(AttributeNames.DOCUMENTS);
            if (otherAttributesForSerialization.isEmpty())
            {
                otherAttributesForSerialization = null;
            }
        }
    }

    /**
     * Transfers document and cluster lists to the attributes map after deserialization.
     */
    @Commit
    @SuppressWarnings("unused")
    private void afterDeserialization() throws Exception
    {
        if (otherAttributesForSerialization != null)
        {
            attributes = SimpleXmlWrappers.unwrap(otherAttributesForSerialization);
        }

        attributesView = Collections.unmodifiableMap(attributes);

        attributes.put(AttributeNames.QUERY, query != null ? query.trim() : null);
        attributes.put(AttributeNames.DOCUMENTS, documents);
        attributes.put(AttributeNames.CLUSTERS, clusters);

        // Convert document ids to the actual references
        if (clusters != null && documents != null)
        {
            final Map documentsById = Maps.newHashMap();
            for (Document document : documents)
            {
                documentsById.put(document.getStringId(), document);
            }

            for (Cluster cluster : clusters)
            {
                documentIdToReference(cluster, documentsById);
            }
        }
    }

    /**
     * Replace document refids with the actual references upon deserialization.
     */
    private void documentIdToReference(Cluster cluster, Map documents)
    {
        if (cluster.documentIds != null)
        {
            for (Cluster.DocumentRefid documentRefid : cluster.documentIds)
            {
                cluster.addDocuments(documents.get(documentRefid.refid));
            }
        }

        for (Cluster subcluster : cluster.getSubclusters())
        {
            documentIdToReference(subcluster, documents);
        }
    }

    /**
     * Serializes this {@link ProcessingResult} to an XML string.
     */
    public String serialize()
    {
        try {
            StringWriter sw = new StringWriter();
            new Persister().write(this, sw);
            return sw.toString();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    
    /**
     * Serializes this {@link ProcessingResult} to an XML stream. The output includes all
     * documents, clusters and other attributes.
     * 
     * This method is not thread-safe, external synchronization must be applied if needed.
     * 
     * 
     * @param stream the stream to serialize this {@link ProcessingResult} to. The stream
     *            will not be closed.
     * @throws Exception in case of any problems with serialization
     */
    public void serialize(OutputStream stream) throws Exception
    {
        serialize(stream, true, true);
    }

    /**
     * Serializes this {@link ProcessingResult} to a byte stream. Documents and clusters
     * can be included or skipped in the output as requested. Other attributes are always
     * included.
     * 
     * This method is not thread-safe, external synchronization must be applied if needed.
     * 
     * 
     * @param stream the stream to serialize this {@link ProcessingResult} to. The stream
     *            will not be closed.
     * @param saveDocuments if false, documents will not be serialized.
     *            Notice that when deserializing XML containing clusters but not
     *            documents, document references in {@link Cluster#getDocuments()} will
     *            not be restored.
     * @param saveClusters if false, clusters will not be serialized
     * @throws Exception in case of any problems with serialization
     */
    public void serialize(OutputStream stream, boolean saveDocuments, boolean saveClusters)
        throws Exception
    {
        serialize(stream, saveDocuments, saveClusters, true);
    }

    /**
     * Serializes this {@link ProcessingResult} to a byte stream. Documents, clusters and
     * other attributes can be included or skipped in the output as requested.
     * 
     * This method is not thread-safe, external synchronization must be applied if needed.
     * 
     * 
     * @param stream the stream to serialize this {@link ProcessingResult} to. The stream
     *            will not be closed.
     * @param saveDocuments if false, documents will not be serialized.
     *            Notice that when deserializing XML containing clusters but not
     *            documents, document references in {@link Cluster#getDocuments()} will
     *            not be restored.
     * @param saveClusters if false, clusters will not be serialized
     * @param saveOtherAttributes if false, other attributes will not be
     *            serialized
     * @throws Exception in case of any problems with serialization
     */
    public void serialize(OutputStream stream, boolean saveDocuments,
        boolean saveClusters, boolean saveOtherAttributes) throws Exception
    {
        final Map backupAttributes = attributes;

        attributes = prepareAttributesForSerialization(saveDocuments, saveClusters,
            saveOtherAttributes);

        new Persister().write(this, stream);

        attributes = backupAttributes;
    }
    
    /**
     * Deserialize from an input stream of characters.
     */
    public static ProcessingResult deserialize(CharSequence input) throws Exception
    {
        return new Persister().read(ProcessingResult.class, input.toString());
    }

    /**
     * Deserializes a {@link ProcessingResult} from an XML stream.
     * 
     * @param input the input XML stream to deserialize a {@link ProcessingResult} from.
     *            The stream will not be closed.
     * @return deserialized {@link ProcessingResult}
     * @throws Exception is case of any problems with deserialization
     */
    public static ProcessingResult deserialize(InputStream input) throws Exception
    {
        return new Persister().read(ProcessingResult.class, input);
    }

    /**
     * Serializes this processing result as JSON to the provided writer. The
     * output includes all documents, clusters and other attributes.
     * 
     * This method is not thread-safe, external synchronization must be applied if needed.
     * 
     * 
     * @param writer the writer to serialize this processing result to. The writer will
     *            not be closed.
     * @throws IOException in case of any problems with serialization
     */
    public void serializeJson(Writer writer) throws IOException
    {
        serializeJson(writer, null);
    }

    /**
     * Serializes this processing result as JSON to the provided writer. The
     * output includes all documents, clusters and other attributes.
     * 
     * This method is not thread-safe, external synchronization must be applied if needed.
     * 
     * 
     * @param writer the writer to serialize this processing result to. The writer will
     *            not be closed.
     * @param callback JavaScript function name in which to wrap the JSON response or
     *            null.
     * @throws IOException in case of any problems with serialization
     */
    public void serializeJson(Writer writer, String callback) throws IOException
    {
        serializeJson(writer, callback, true, true);
    }

    /**
     * Serializes this processing result as JSON to the provided writer.
     * Documents and clusters can be included or skipped in the output as requested. Other
     * attributes are always included.
     * 
     * This method is not thread-safe, external synchronization must be applied if needed.
     * 
     * 
     * @param writer the writer to serialize this processing result to. The writer will
     *            not be closed.
     * @param callback JavaScript function name in which to wrap the JSON response or
     *            null.
     * @param saveDocuments if false, documents will not be serialized.
     * @param saveClusters if false, clusters will not be serialized
     * @throws IOException in case of any problems with serialization
     */
    public void serializeJson(Writer writer, String callback, boolean saveDocuments,
        boolean saveClusters) throws IOException
    {
        serializeJson(writer, callback, false, saveDocuments, saveClusters);
    }

    /**
     * Serializes this processing result as JSON to the provided writer.
     * 
     * This method is not thread-safe, external synchronization must be applied if needed.
     * 
     * 
     * @param writer the writer to serialize this processing result to. The writer will
     *            not be closed.
     * @param callback JavaScript function name in which to wrap the JSON response or
     *            null.
     * @param indent if true, the output JSON will be pretty-printed
     * @param saveDocuments if false, documents will not be serialized.
     * @param saveClusters if false, clusters will not be serialized
     * @throws IOException in case of any problems with serialization
     */
    public void serializeJson(Writer writer, String callback, boolean indent,
        boolean saveDocuments, boolean saveClusters) throws IOException
    {
        serializeJson(writer, callback, indent, saveDocuments, saveClusters, true);
    }

    /**
     * Serializes this processing result as JSON to the provided writer.
     * Documents, clusters and other attributes can be included or skipped in the output
     * as requested.
     * 
     * @param writer the writer to serialize this processing result to. The writer will
     *            not be closed.
     * @param callback JavaScript function name in which to wrap the JSON response or
     *            null.
     * @param indent if true, the output JSON will be pretty-printed
     * @param saveDocuments if false, documents will not be serialized.
     * @param saveClusters if false, clusters will not be serialized
     * @param saveOtherAttributes if false, other attributes will not be
     *            serialized
     * @throws IOException in case of any problems with serialization
     */
    public void serializeJson(Writer writer, String callback, boolean indent,
        boolean saveDocuments, boolean saveClusters, boolean saveOtherAttributes)
        throws IOException
    {
        final ObjectMapper mapper = new ObjectMapper();
        final JsonGenerator generator = new JsonFactory().createJsonGenerator(writer);
        if (indent)
        {
            generator.setPrettyPrinter(new DefaultPrettyPrinter());
        }

        if (StringUtils.isNotBlank(callback))
        {
            writer.write(callback + "(");
        }
        final Map attrs = prepareAttributesForSerialization(
            saveDocuments, saveClusters, saveOtherAttributes);
        mapper.writeValue(generator, attrs);
        if (StringUtils.isNotBlank(callback))
        {
            writer.write(");");
        }
    }

    /**
     * Prepares a temporary attributes map for serialization purposes. Includes only the
     * requested elements in the map.
     */
    private Map prepareAttributesForSerialization(boolean saveDocuments,
        boolean saveClusters, boolean saveOtherAttributes)
    {
        final Map tempAttributes = Maps.newHashMap();

        if (saveOtherAttributes)
        {
            tempAttributes.putAll(attributes);
            tempAttributes.remove(AttributeNames.DOCUMENTS);
            tempAttributes.remove(AttributeNames.CLUSTERS);
        }
        else
        {
            tempAttributes
                .put(AttributeNames.QUERY, attributes.get(AttributeNames.QUERY));
        }

        if (saveDocuments)
        {
            tempAttributes.put(AttributeNames.DOCUMENTS, getDocuments());
        }

        if (saveClusters)
        {
            tempAttributes.put(AttributeNames.CLUSTERS, getClusters());
        }

        return tempAttributes;
    }
}