All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.core.ProcessingResult Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2013, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import java.io.*;
import java.util.*;

import org.apache.commons.lang.StringUtils;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.util.MapUtils;
import org.carrot2.util.simplexml.SimpleXmlWrapperValue;
import org.carrot2.util.simplexml.SimpleXmlWrappers;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.util.DefaultPrettyPrinter;
import org.codehaus.jackson.map.ObjectMapper;
import org.simpleframework.xml.*;
import org.simpleframework.xml.core.*;

import com.google.common.collect.*;

/**
 * Encapsulates the results of processing. Provides access to the values of attributes
 * collected after processing and utility methods for obtaining processed documents (
 * {@link #getDocuments()})) and the created clusters ({@link #getClusters()}).
 */
@Root(name = "searchresult", strict = false)
public final class ProcessingResult
{
    /** Attributes collected after processing */
    private Map attributes = Maps.newHashMap();

    /** Read-only view of attributes exposed in {@link #getAttributes()} */
    private Map attributesView;

    /**
     * Query field used during serialization/ deserialization, see
     * {@link #afterDeserialization()} and {@link #beforeSerialization()}
     */
    @Element(required = false)
    private String query;

    /**
     * Documents field used during serialization/ deserialization, see
     * {@link #afterDeserialization()} and {@link #beforeSerialization()}
     */
    @ElementList(inline = true, required = false)
    private List documents;

    /**
     * Clusters field used during serialization/ deserialization, see
     * {@link #afterDeserialization()} and {@link #beforeSerialization()}
     */
    @ElementList(inline = true, required = false)
    private List clusters;

    /** Attributes of this result for serialization/ deserialization purposes. */
    @ElementMap(entry = "attribute", key = "key", attribute = true, inline = true, required = false)
    private HashMap otherAttributesForSerialization;

    /**
     * Parameterless constructor required for XML serialization/ deserialization.
     */
    ProcessingResult()
    {
        this(new HashMap());
    }

    /**
     * Creates a {@link ProcessingResult} with the provided attributes.
     * Assigns unique document identifiers if documents are present in the
     * attributes map (under the key {@link AttributeNames#DOCUMENTS}).
     */
    @SuppressWarnings("unchecked")
    ProcessingResult(Map attributes)
    {
        this.attributes = attributes;

        // Replace a modifiable collection of documents with an unmodifiable one
        final List documents = (List) attributes
            .get(AttributeNames.DOCUMENTS);
        if (documents != null)
        {
            Document.assignDocumentIds(documents);
            attributes.put(AttributeNames.DOCUMENTS,
                Collections.unmodifiableList(documents));
        }

        // Replace a modifiable collection of clusters with an unmodifiable one
        final List clusters = (List) attributes
            .get(AttributeNames.CLUSTERS);
        if (clusters != null)
        {
            Cluster.assignClusterIds(clusters);
            attributes.put(AttributeNames.CLUSTERS,
                Collections.unmodifiableList(clusters));
        }

        // Store a reference to attributes as an unmodifiable map
        this.attributesView = Collections.unmodifiableMap(attributes);

    }

    /**
     * Returns attributes fed-in and collected during processing. The returned map is
     * unmodifiable.
     * 
     * @return attributes fed-in and collected during processing
     */
    public Map getAttributes()
    {
        return attributesView;
    }

    /**
     * Returns a specific attribute of this result set. This method is equivalent to
     * calling {@link #getAttributes()} and then getting the required attribute from the
     * map.
     * 
     * @param key key of the attribute to return
     * @return value of the attribute
     */
    @SuppressWarnings("unchecked")
    public  T getAttribute(String key)
    {
        return (T) attributesView.get(key);
    }

    /**
     * Returns the documents that have been processed. The returned collection is
     * unmodifiable.
     * 
     * @return documents that have been processed or null if no documents are
     *         present in the result.
     */
    @SuppressWarnings("unchecked")
    public List getDocuments()
    {
        return (List) attributes.get(AttributeNames.DOCUMENTS);
    }

    /*
     * TODO: Returning a list of clusters instead of a (possibly artificial) cluster with
     * subclusters adds a little complexity to recursive methods operating on clusters (a
     * natural entry point is a method taking one cluster and acting on subclusters
     * recursively). If we have to start with a list of clusters, we have to handle this
     * special case separately...
     */

    /**
     * Returns the clusters that have been created during processing. The returned list is
     * unmodifiable.
     * 
     * @return clusters created during processing or null if no clusters were
     *         present in the result.
     */
    @SuppressWarnings("unchecked")
    public List getClusters()
    {
        return (List) attributes.get(AttributeNames.CLUSTERS);
    }

    /**
     * Extracts document and cluster lists before serialization.
     */
    @Persist
    @SuppressWarnings("unused")
    private void beforeSerialization()
    {
        /*
         * See http://issues.carrot2.org/browse/CARROT-693; this monitor does not save us
         * in multi-threaded environment anyway. A better solution would be to prepare
         * this eagerly in the constructor, but we try to balance overhead and full
         * correctness here.
         */
        synchronized (this)
        {
            query = (String) attributes.get(AttributeNames.QUERY);

            if (getDocuments() != null)
            {
                documents = Lists.newArrayList(getDocuments());
            }
            else
            {
                documents = null;
            }

            if (getClusters() != null)
            {
                clusters = Lists.newArrayList(getClusters());
            }
            else
            {
                clusters = null;
            }

            otherAttributesForSerialization = MapUtils.asHashMap(SimpleXmlWrappers
                .wrap(attributes));
            otherAttributesForSerialization.remove(AttributeNames.QUERY);
            otherAttributesForSerialization.remove(AttributeNames.CLUSTERS);
            otherAttributesForSerialization.remove(AttributeNames.DOCUMENTS);
            if (otherAttributesForSerialization.isEmpty())
            {
                otherAttributesForSerialization = null;
            }
        }
    }

    /**
     * Transfers document and cluster lists to the attributes map after deserialization.
     */
    @Commit
    @SuppressWarnings("unused")
    private void afterDeserialization() throws Exception
    {
        if (otherAttributesForSerialization != null)
        {
            attributes = SimpleXmlWrappers.unwrap(otherAttributesForSerialization);
        }

        attributesView = Collections.unmodifiableMap(attributes);

        attributes.put(AttributeNames.QUERY, query != null ? query.trim() : null);
        attributes.put(AttributeNames.DOCUMENTS, documents);
        attributes.put(AttributeNames.CLUSTERS, clusters);

        // Convert document ids to the actual references
        if (clusters != null && documents != null)
        {
            final Map documentsById = Maps.newHashMap();
            for (Document document : documents)
            {
                documentsById.put(document.getStringId(), document);
            }

            for (Cluster cluster : clusters)
            {
                documentIdToReference(cluster, documentsById);
            }
        }
    }

    /**
     * Replace document refids with the actual references upon deserialization.
     */
    private void documentIdToReference(Cluster cluster, Map documents)
    {
        if (cluster.documentIds != null)
        {
            for (Cluster.DocumentRefid documentRefid : cluster.documentIds)
            {
                cluster.addDocuments(documents.get(documentRefid.refid));
            }
        }

        for (Cluster subcluster : cluster.getSubclusters())
        {
            documentIdToReference(subcluster, documents);
        }
    }

    /**
     * Serializes this {@link ProcessingResult} to an XML string.
     */
    public String serialize()
    {
        try {
            StringWriter sw = new StringWriter();
            new Persister().write(this, sw);
            return sw.toString();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    
    /**
     * Serializes this {@link ProcessingResult} to an XML stream. The output includes all
     * documents, clusters and other attributes.
     * 

* This method is not thread-safe, external synchronization must be applied if needed. *

* * @param stream the stream to serialize this {@link ProcessingResult} to. The stream * will not be closed. * @throws Exception in case of any problems with serialization */ public void serialize(OutputStream stream) throws Exception { serialize(stream, true, true); } /** * Serializes this {@link ProcessingResult} to a byte stream. Documents and clusters * can be included or skipped in the output as requested. Other attributes are always * included. *

* This method is not thread-safe, external synchronization must be applied if needed. *

* * @param stream the stream to serialize this {@link ProcessingResult} to. The stream * will not be closed. * @param saveDocuments if false, documents will not be serialized. * Notice that when deserializing XML containing clusters but not * documents, document references in {@link Cluster#getDocuments()} will * not be restored. * @param saveClusters if false, clusters will not be serialized * @throws Exception in case of any problems with serialization */ public void serialize(OutputStream stream, boolean saveDocuments, boolean saveClusters) throws Exception { serialize(stream, saveDocuments, saveClusters, true); } /** * Serializes this {@link ProcessingResult} to a byte stream. Documents, clusters and * other attributes can be included or skipped in the output as requested. *

* This method is not thread-safe, external synchronization must be applied if needed. *

* * @param stream the stream to serialize this {@link ProcessingResult} to. The stream * will not be closed. * @param saveDocuments if false, documents will not be serialized. * Notice that when deserializing XML containing clusters but not * documents, document references in {@link Cluster#getDocuments()} will * not be restored. * @param saveClusters if false, clusters will not be serialized * @param saveOtherAttributes if false, other attributes will not be * serialized * @throws Exception in case of any problems with serialization */ public void serialize(OutputStream stream, boolean saveDocuments, boolean saveClusters, boolean saveOtherAttributes) throws Exception { final Map backupAttributes = attributes; attributes = prepareAttributesForSerialization(saveDocuments, saveClusters, saveOtherAttributes); new Persister().write(this, stream); attributes = backupAttributes; } /** * Deserialize from an input stream of characters. */ public static ProcessingResult deserialize(CharSequence input) throws Exception { return new Persister().read(ProcessingResult.class, input.toString()); } /** * Deserializes a {@link ProcessingResult} from an XML stream. * * @param input the input XML stream to deserialize a {@link ProcessingResult} from. * The stream will not be closed. * @return deserialized {@link ProcessingResult} * @throws Exception is case of any problems with deserialization */ public static ProcessingResult deserialize(InputStream input) throws Exception { return new Persister().read(ProcessingResult.class, input); } /** * Serializes this processing result as JSON to the provided writer. The * output includes all documents, clusters and other attributes. *

* This method is not thread-safe, external synchronization must be applied if needed. *

* * @param writer the writer to serialize this processing result to. The writer will * not be closed. * @throws IOException in case of any problems with serialization */ public void serializeJson(Writer writer) throws IOException { serializeJson(writer, null); } /** * Serializes this processing result as JSON to the provided writer. The * output includes all documents, clusters and other attributes. *

* This method is not thread-safe, external synchronization must be applied if needed. *

* * @param writer the writer to serialize this processing result to. The writer will * not be closed. * @param callback JavaScript function name in which to wrap the JSON response or * null. * @throws IOException in case of any problems with serialization */ public void serializeJson(Writer writer, String callback) throws IOException { serializeJson(writer, callback, true, true); } /** * Serializes this processing result as JSON to the provided writer. * Documents and clusters can be included or skipped in the output as requested. Other * attributes are always included. *

* This method is not thread-safe, external synchronization must be applied if needed. *

* * @param writer the writer to serialize this processing result to. The writer will * not be closed. * @param callback JavaScript function name in which to wrap the JSON response or * null. * @param saveDocuments if false, documents will not be serialized. * @param saveClusters if false, clusters will not be serialized * @throws IOException in case of any problems with serialization */ public void serializeJson(Writer writer, String callback, boolean saveDocuments, boolean saveClusters) throws IOException { serializeJson(writer, callback, false, saveDocuments, saveClusters); } /** * Serializes this processing result as JSON to the provided writer. *

* This method is not thread-safe, external synchronization must be applied if needed. *

* * @param writer the writer to serialize this processing result to. The writer will * not be closed. * @param callback JavaScript function name in which to wrap the JSON response or * null. * @param indent if true, the output JSON will be pretty-printed * @param saveDocuments if false, documents will not be serialized. * @param saveClusters if false, clusters will not be serialized * @throws IOException in case of any problems with serialization */ public void serializeJson(Writer writer, String callback, boolean indent, boolean saveDocuments, boolean saveClusters) throws IOException { serializeJson(writer, callback, indent, saveDocuments, saveClusters, true); } /** * Serializes this processing result as JSON to the provided writer. * Documents, clusters and other attributes can be included or skipped in the output * as requested. * * @param writer the writer to serialize this processing result to. The writer will * not be closed. * @param callback JavaScript function name in which to wrap the JSON response or * null. * @param indent if true, the output JSON will be pretty-printed * @param saveDocuments if false, documents will not be serialized. * @param saveClusters if false, clusters will not be serialized * @param saveOtherAttributes if false, other attributes will not be * serialized * @throws IOException in case of any problems with serialization */ public void serializeJson(Writer writer, String callback, boolean indent, boolean saveDocuments, boolean saveClusters, boolean saveOtherAttributes) throws IOException { final ObjectMapper mapper = new ObjectMapper(); final JsonGenerator generator = new JsonFactory().createJsonGenerator(writer); if (indent) { generator.setPrettyPrinter(new DefaultPrettyPrinter()); } if (StringUtils.isNotBlank(callback)) { writer.write(callback + "("); } final Map attrs = prepareAttributesForSerialization( saveDocuments, saveClusters, saveOtherAttributes); mapper.writeValue(generator, attrs); if (StringUtils.isNotBlank(callback)) { writer.write(");"); } } /** * Prepares a temporary attributes map for serialization purposes. Includes only the * requested elements in the map. */ private Map prepareAttributesForSerialization(boolean saveDocuments, boolean saveClusters, boolean saveOtherAttributes) { final Map tempAttributes = Maps.newHashMap(); if (saveOtherAttributes) { tempAttributes.putAll(attributes); tempAttributes.remove(AttributeNames.DOCUMENTS); tempAttributes.remove(AttributeNames.CLUSTERS); } else { tempAttributes .put(AttributeNames.QUERY, attributes.get(AttributeNames.QUERY)); } if (saveDocuments) { tempAttributes.put(AttributeNames.DOCUMENTS, getDocuments()); } if (saveClusters) { tempAttributes.put(AttributeNames.CLUSTERS, getClusters()); } return tempAttributes; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy