org.carrot2.core.Cluster Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of carrot2-mini Show documentation
Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).
There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.core;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.carrot2.util.MapUtils;
import org.carrot2.util.StringUtils;
import org.carrot2.util.simplexml.SimpleXmlWrapperValue;
import org.carrot2.util.simplexml.SimpleXmlWrappers;
import org.simpleframework.xml.Attribute;
import org.simpleframework.xml.ElementList;
import org.simpleframework.xml.ElementMap;
import org.simpleframework.xml.Root;
import org.simpleframework.xml.core.Commit;
import org.simpleframework.xml.core.Persist;

import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
import org.carrot2.shaded.guava.common.base.Function;
import org.carrot2.shaded.guava.common.collect.Lists;
import org.carrot2.shaded.guava.common.collect.Maps;
import org.carrot2.shaded.guava.common.collect.Ordering;
import org.carrot2.shaded.guava.common.collect.Sets;

/**
 * A cluster (group) of {@link Document}s. Each cluster has a human-readable label
 * consisting of one or more phrases, a list of documents it contains and a list of its
 * subclusters. Optionally, additional attributes can be associated with a cluster, e.g.
 * {@link #OTHER_TOPICS}. This class is not thread-safe.
 */
@Root(name = "group", strict = false)
@JsonAutoDetect(
    creatorVisibility  = JsonAutoDetect.Visibility.NONE,
    fieldVisibility    = JsonAutoDetect.Visibility.NONE,
    getterVisibility   = JsonAutoDetect.Visibility.NONE,
    isGetterVisibility = JsonAutoDetect.Visibility.NONE,
    setterVisibility   = JsonAutoDetect.Visibility.NONE)
@JsonSerialize()
@JsonInclude(JsonInclude.Include.NON_NULL)
public final class Cluster
{
    /**
     * Indicates that the cluster is an Other Topics cluster. Such a cluster
     * contains documents that remain unclustered at given level of cluster hierarchy.
     * 
     * Type of this attribute is {@link Boolean}.
     * 
     * 
     * @see #setAttribute(String, Object)
     * @see #getAttribute(String)
     */
    public static final String OTHER_TOPICS = "other-topics";

    /**
     * Default label for the Other Topics cluster.
     */
    public static final String OTHER_TOPICS_LABEL = "Other Topics";

    /**
     * Score of this cluster that indicates the clustering algorithm's beliefs on the
     * quality of this cluster. The exact semantics of the score varies across algorithms.
     * 
     * Type of this attribute is {@link Double}.
     * 
     * 
     * @see #setAttribute(String, Object)
     * @see #getAttribute(String)
     */
    public static final String SCORE = "score";

    /**
     * @see #getId()
     */
    @Attribute(required = false)
    Integer id;

    /** Phrases describing this cluster. */
    @ElementList(required = false, name = "title", entry = "phrase")
    private ArrayList phrases = new ArrayList();

    /** A read-only list of phrases exposed in {@link #getPhrases()}. */
    private List phrasesView = Collections.unmodifiableList(phrases);

    /** Subclusters of this cluster. */
    @ElementList(required = false, inline = true)
    private ArrayList subclusters = new ArrayList();

    /** A read-only list of subclusters exposed in {@link #getSubclusters()}. */
    private List subclustersView = Collections.unmodifiableList(subclusters);

    /** Documents contained in this cluster. */
    private final ArrayList documents = new ArrayList();

    /** A read-only list of this cluster's documents exposed in {@link #getDocuments()}. */
    private final List documentsView = Collections.unmodifiableList(documents);

    /** Attributes of this cluster. */
    private Map attributes = new HashMap();

    /** A Read-only view of the attributes of this cluster. */
    private Map attributesView = Collections.unmodifiableMap(attributes);

    /** Cached concatenated label */
    private String labelCache = null;

    /** Cached list of documents from this cluster and subclusters */
    private List allDocuments;

    /** Attributes of this cluster for serialization/ deserialization purposes. */
    @ElementMap(entry = "attribute", key = "key", attribute = true, inline = true, required = false)
    private HashMap otherAttributesForSerialization;

    /**
     * List of document ids used for serialization/ deserialization purposes.
     */
    @ElementList(required = false, inline = true)
    List documentIds;

    /**
     * A helper class for serialization/ deserialization of documents with refids.
     */
    @Root(name = "document")
    static class DocumentRefid
    {
        @Attribute
        String refid;

        DocumentRefid()
        {
        }

        DocumentRefid(String refid)
        {
            this.refid = refid;
        }
    }

    /**
     * Creates a {@link Cluster} with an empty label, no documents and no subclusters.
     */
    public Cluster()
    {
    }

    /**
     * Creates a {@link Cluster} with the provided phrase to be used as the
     * cluster's label and documents contained in the cluster.
     * 
     * @param phrase the phrase to form the cluster's label
     * @param documents documents contained in the cluster
     */
    public Cluster(String phrase, Document... documents)
    {
        addPhrases(phrase);
        addDocuments(documents);
    }

    /**
     * Same as {@link #Cluster(String,Document...)} but allows specifying
     * cluster identifier.
     */
    public Cluster(Integer id, String phrase, Document... documents)
    {
        this(phrase, documents);
        this.id = id;
    }

    /**
     * Formats this cluster's label. If there is more than one phrase describing this
     * cluster, phrases will be separated by a comma followed by a space, e.g. "Phrase
     * one, Phrase two". To format multi-phrase label in a different way, use
     * {@link #getPhrases()}.
     * 
     * @return formatted label of this cluster
     */
    public String getLabel()
    {
        if (labelCache == null)
        {
            labelCache = StringUtils.toString(phrases, ", ");
        }
        return labelCache;
    }

    /**
     * Returns all phrases describing this cluster. The returned list is unmodifiable.
     * 
     * @return phrases describing this cluster
     */
    @JsonProperty
    public List getPhrases()
    {
        return phrasesView;
    }

    /**
     * Returns all subclusters of this cluster. The returned list is unmodifiable.
     * 
     * @return subclusters of this cluster
     */
    public List getSubclusters()
    {
        return subclustersView;
    }

    /**
     * For JSON serialization only.
     */
    @JsonProperty("clusters")
    private List getSubclustersForSerialization()
    {
        return subclustersView.isEmpty() ? null : subclustersView;
    }

    /**
     * Returns all documents contained in this cluster. The returned list is unmodifiable.
     * 
     * @return documents contained in this cluster
     */
    public List getDocuments()
    {
        return documentsView;
    }

    /**
     * Returns all documents contained in this cluster and (recursively) all documents
     * from this cluster's subclusters. The returned list contains unique documents, i.e.
     * if a document is attached to multiple subclusters if this cluster, the document
     * will appear only once on the list. The documents are enumerated in breadth first
     * order, i.e. first come documents returned by {@link #getDocuments()} and then
     * documents from subclusters.
     * 
     * @return all documents from this cluster and its subclusters
     */
    public List getAllDocuments()
    {
        if (allDocuments == null)
        {
            allDocuments = new ArrayList(collectAllDocuments(this,
                new LinkedHashSet()));
        }

        return allDocuments;
    }

    /**
     * Returns all documents in this cluster ordered according to the provided comparator.
     * See {@link Document} for common comparators.
     */
    public List getAllDocuments(Comparator comparator)
    {
        final List sortedDocuments = Lists.newArrayList(getAllDocuments());
        Collections.sort(sortedDocuments, comparator);
        return sortedDocuments;
    }

    /**
     * A recursive routine for collecting unique documents from this cluster and
     * subclusters.
     */
    private static Set collectAllDocuments(Cluster cluster, Set docs)
    {
        if (cluster == null)
        {
            return docs;
        }

        docs.addAll(cluster.getDocuments());

        final List subclusters = cluster.getSubclusters();
        for (final Cluster subcluster : subclusters)
        {
            collectAllDocuments(subcluster, docs);
        }

        return docs;
    }

    /**
     * Adds phrases to the description of this cluster.
     * 
     * @param phrases to be added to the description of this cluster
     * @return this cluster for convenience
     */
    public Cluster addPhrases(String... phrases)
    {
        labelCache = null;
        for (final String phrase : phrases)
        {
            this.phrases.add(phrase);
        }

        return this;
    }

    /**
     * Adds phrases to the description of this cluster.
     * 
     * @param phrases to be added to the description of this cluster
     * @return this cluster for convenience
     */
    public Cluster addPhrases(Iterable phrases)
    {
        labelCache = null;
        for (final String phrase : phrases)
        {
            this.phrases.add(phrase);
        }

        return this;
    }

    /**
     * Adds document to this cluster.
     * 
     * @param documents to be added to this cluster
     * @return this cluster for convenience
     */
    public Cluster addDocuments(Document... documents)
    {
        for (final Document document : documents)
        {
            this.documents.add(document);
        }
        allDocuments = null;

        return this;
    }

    /**
     * Method optimized for single document instead of a vararg.
     * @see #addDocuments(Document...)
     */
    public Cluster addDocument(Document document)
    {
        this.documents.add(document);
        allDocuments = null;
        return this;
    }


    /**
     * Adds document to this cluster.
     * 
     * @param documents to be added to this cluster
     * @return this cluster for convenience
     */
    public Cluster addDocuments(Iterable documents)
    {
        for (final Document document : documents)
        {
            this.documents.add(document);
        }
        allDocuments = null;

        return this;
    }

    /**
     * Adds subclusters to this cluster
     * 
     * @param subclusters to be added to this cluster
     * @return this cluster for convenience
     */
    public Cluster addSubclusters(Cluster... subclusters)
    {
        for (final Cluster cluster : subclusters)
        {
            this.subclusters.add(cluster);
        }
        allDocuments = null;

        return this;
    }

    /**
     * Adds a subcluster to this cluster.
     * @see #addSubclusters(Cluster...)
     */
    public Cluster addSubcluster(Cluster cluster)
    {
        this.subclusters.add(cluster);
        this.allDocuments = null;
        return this;
    }

    /**
     * Adds subclusters to this cluster
     * 
     * @param clusters to be added to this cluster
     * @return this cluster for convenience
     */
    public Cluster addSubclusters(Iterable clusters)
    {
        for (final Cluster cluster : clusters)
        {
            this.subclusters.add(cluster);
        }
        allDocuments = null;

        return this;
    }

    /**
     * Returns this cluster's {@value #SCORE} field.
     */
    @JsonProperty
    @Attribute(required = false)
    public Double getScore()
    {
        return getAttribute(SCORE);
    }

    /**
     * Sets this cluster's {@link #SCORE} field.
     * 
     * @param score score to set
     * @return this cluster for convenience
     */
    @Attribute(required = false)
    public Cluster setScore(Double score)
    {
        return setAttribute(SCORE, score);
    }

    /**
     * Returns the attribute associated with this cluster under the provided
     * key. If there is no attribute under the provided key,
     * null will be returned.
     * 
     * @param key of the attribute
     * @return attribute value of null
     */
    @SuppressWarnings("unchecked")
    public  T getAttribute(String key)
    {
        return (T) attributes.get(key);
    }

    /**
     * Associates an attribute with this cluster.
     * 
     * @param key for the attribute
     * @param value for the attribute
     * @return this cluster for convenience
     */
    public  Cluster setAttribute(String key, T value)
    {
        attributes.put(key, value);
        return this;
    }
    
    /**
     * Unconditionally remove an attribute from this cluster, if it exists. If there
     * is no such attribute, nothing happens.
     */
    public  Cluster removeAttribute(String key)
    {
        attributes.remove(key);
        return this;
    }

    /**
     * Returns all attributes of this cluster. The returned map is unmodifiable.
     * 
     * @return all attributes of this cluster
     */
    public Map getAttributes()
    {
        return attributesView;
    }

    /**
     * Returns the size of the cluster calculated as the number of unique documents it
     * contains, including its subclusters.
     * 
     * @return size of the cluster
     */
    public int size()
    {
        return getAllDocuments().size();
    }

    /**
     * For serialization only.
     */
    @JsonProperty
    @Attribute(required = false)
    private int getSize()
    {
        return size();
    }

    /**
     * Empty implementation, SimpleXML requires both a getter and a setter.
     */
    @Attribute(required = false)
    private void setSize(int size)
    {
        // We only serialize the size, hence empty implementation
    }

    /**
     * Internal identifier of this cluster within the {@link ProcessingResult}. This
     * identifier is assigned dynamically after clusters are passed to
     * {@link ProcessingResult}.
     * 
     * @see ProcessingResult
     */
    @JsonProperty
    public Integer getId()
    {
        return id;
    }

    /**
     * Returns true if this cluster is the {@link #OTHER_TOPICS} cluster.
     */
    public boolean isOtherTopics()
    {
        final Boolean otherTopics = getAttribute(OTHER_TOPICS);
        return otherTopics != null && otherTopics.booleanValue();
    }

    /**
     * Sets the {@link #OTHER_TOPICS} attribute of this cluster.
     * 
     * @param isOtherTopics if true, this cluster will be marked as an
     *            Other Topics cluster.
     * @return this cluster for convenience
     */
    public Cluster setOtherTopics(boolean isOtherTopics)
    {
        if (isOtherTopics) {
            setAttribute(OTHER_TOPICS, Boolean.TRUE).setScore(0.0);
        } else {
            removeAttribute(OTHER_TOPICS);
        }
        return this;
    }

    /**
     * Compares clusters by size as returned by {@link #size()}. Clusters with more
     * documents are larger.
     */
    public static final Comparator BY_SIZE_COMPARATOR = Ordering.natural()
        .nullsFirst().onResultOf(new Function(){
            public Integer apply(Cluster cluster)
            {
                return cluster.size();
            }
        });

    /**
     * Compares clusters by score as returned by {@link #SCORE}. Clusters with larger
     * score are larger.
     */
    public static final Comparator BY_SCORE_COMPARATOR = Ordering.natural()
        .nullsFirst().onResultOf(new Function(){
            public Double apply(Cluster cluster)
            {
                return cluster.getAttribute(SCORE);
            }
        });

    /**
     * Compares clusters by the natural order of their labels as returned by
     * {@link #getLabel()}.
     */
    public static final Comparator BY_LABEL_COMPARATOR = Ordering.natural()
        .nullsFirst().onResultOf(new Function(){
            public String apply(Cluster cluster)
            {
                return cluster.getLabel();
            }
        });

    /**
     * Compares clusters first by their size as returned by {@link #size()} and labels as
     * returned by {@link #getLabel()}. In case of equal sizes, natural order of the
     * labels decides.
     * 
     * Please note: this is a reversed comparator, so "larger" clusters end up
     * nearer the beginning of the list being sorted (which is usually the order in which
     * the applications want to display clusters).
     * 
     */
    public static final Comparator BY_REVERSED_SIZE_AND_LABEL_COMPARATOR = Ordering
        .from(Collections.reverseOrder(BY_SIZE_COMPARATOR)).compound(BY_LABEL_COMPARATOR);

    /**
     * Compares clusters first by their size as returned by {@link #SCORE} and labels as
     * returned by {@link #getLabel()}. In case of equal scores, natural order of the
     * labels decides.
     * 
     * Please note: this is a reversed comparator, so "larger" clusters end up
     * nearer the beginning of the list being sorted (which is usually the order in which
     * the applications want to display clusters).
     * 
     */
    public static final Comparator BY_REVERSED_SCORE_AND_LABEL_COMPARATOR = Ordering
        .from(Collections.reverseOrder(BY_SCORE_COMPARATOR))
        .compound(BY_LABEL_COMPARATOR);

    /**
     * Returns a comparator that compares clusters based on the aggregation of their size
     * and score. If scoreWeight is 0.0, the order depends only on cluster
     * sizes. If scoreWeight is 1.1, the order depends only on cluster
     * scores. For scoreWeight values between 0.0 and 1.0, the higher the
     * scoreWeight, the more contribution of cluster scores to the order. In
     * case of a tie on the aggregated cluster size and score, clusters are compared by
     * the natural order of their labels.
     * 
     * Please note: this is a reversed comparator, so "larger" clusters end up
     * nearer the beginning of the list being sorted (which is usually the order in which
     * the applications want to display clusters).
     * 
     */
    public static Comparator byReversedWeightedScoreAndSizeComparator(
        final double scoreWeight)
    {
        if (scoreWeight < 0 || scoreWeight > 1)
        {
            throw new IllegalArgumentException(
                "Score weight must be between 0.0 (inclusive) and 1.0 (inclusive) ");
        }

        return Ordering.natural().onResultOf(new Function()
        {
            public Double apply(Cluster cluster)
            {
                return -Math.pow(cluster.size(), (1 - scoreWeight))
                    * Math.pow((Double) cluster.getAttribute(SCORE), scoreWeight);
            }
        }).compound(BY_LABEL_COMPARATOR);
    }

    /**
     * A comparator that puts {@link #OTHER_TOPICS} clusters at the end of the list. In
     * other words, to this comparator an {@link #OTHER_TOPICS} topics cluster is "bigger"
     * than a non-{{@link #OTHER_TOPICS} cluster.
     * 
     * Note: This comparator is designed for use in combination with
     * other comparators, such as {@link #BY_REVERSED_SIZE_AND_LABEL_COMPARATOR}. If you
     * only need to partition a list of clusters into regular and other topic ones, this
     * is better done in linear time without resorting to {@link Collections#sort(List)}.
     * 
     */
    public static final Comparator OTHER_TOPICS_AT_THE_END = Ordering.natural()
        .onResultOf(new Function()
        {
            public Double apply(Cluster cluster)
            {
                return cluster.isOtherTopics() ? 1.0 : -1.0;
            }
        });

    /**
     * Assigns sequential identifiers to the provided clusters (and their
     * sub-clusters). If any cluster already has an identifier, identifier will not be
     * changed but all clusters must have unique identifiers.
     * 
     * @param clusters Clusters to assign identifiers to.
     * @throws IllegalArgumentException if the provided clusters contain non-unique
     *             identifiers.
     */
    public static void assignClusterIds(Collection clusters)
    {
        final List flattened = flatten(clusters);
        synchronized (clusters)
        {
            // First, find the start value for the id and check uniqueness of the ids
            // already provided.
            boolean hadIds = false;
            for (final Cluster cluster : flattened)
            {
                if (cluster.id != null)
                {
                    hadIds = true;
                    break;
                }
            }

            if (hadIds)
            {
                final HashSet ids = Sets.newHashSet();
                for (final Cluster c : flattened)
                {
                    if (!ids.add(c.id))
                    {
                        throw new IllegalArgumentException(
                            "Cluster identifiers must be unique, duplicated identifier: " + c.id);
                    }
                }
                if (ids.contains(null))
                {
                    throw new IllegalArgumentException(
                        "Null cluster identifiers cannot be mixed with existing non-null identifiers.");
                }
            }
            else
            {
                // Assign new IDs.
                int id = 0;
                for (final Cluster c : flattened)
                {
                    if (c.id == null)
                    {
                        c.id = id++;
                    }
                }
            }
        }
    }

    /**
     * Flattens a hierarchy of clusters into a flat list.
     */
    public static List flatten(Collection hierarchical)
    {
        return flatten(hierarchical, Lists. newArrayList());
    }

    /*
     * Recursive descent into subclusters.
     */
    private static List flatten(Collection hierarchical, List flat)
    {
        for (Cluster c : hierarchical)
        {
            flat.add(c);
            flatten(c.getSubclusters(), flat);
        }
        return flat;
    }

    /**
     * Locate the first cluster that has id equal to id. The search includes
     * all the clusters in the input and their sub-clusters. The first cluster with
     * matching identifier is returned or null if no such cluster could be
     * found.
     */
    public static Cluster find(int id, Collection clusters)
    {
        for (Cluster c : clusters)
        {
            if (c != null)
            {
                if (c.id != null && c.id == id)
                {
                    return c;
                }

                if (!c.getSubclusters().isEmpty())
                {
                    final Cluster sub = find(id, c.getSubclusters());
                    if (sub != null)
                    {
                        return sub;
                    }
                }
            }
        }

        return null;
    }

    /**
     * Builds an "Other Topics" cluster that groups those documents from
     * allDocument that were not referenced in any cluster in
     * clusters.
     * 
     * @param allDocuments all documents to check against
     * @param clusters list of clusters with assigned documents
     * @return the "Other Topics" cluster
     */
    public static Cluster buildOtherTopics(List allDocuments,
        List clusters)
    {
        return buildOtherTopics(allDocuments, clusters, OTHER_TOPICS_LABEL);
    }

    /**
     * Builds an "Other Topics" cluster that groups those documents from
     * allDocument that were not referenced in any cluster in
     * clusters.
     * 
     * @param allDocuments all documents to check against
     * @param clusters list of clusters with assigned documents
     * @param label label for the "Other Topics" group
     * @return the "Other Topics" cluster
     */
    public static Cluster buildOtherTopics(List allDocuments,
        List clusters, String label)
    {
        final Set unclusteredDocuments = Sets.newLinkedHashSet(allDocuments);
        final Set assignedDocuments = Sets.newHashSet();

        for (Cluster cluster : clusters)
        {
            collectAllDocuments(cluster, assignedDocuments);
        }

        unclusteredDocuments.removeAll(assignedDocuments);

        final Cluster otherTopics = new Cluster(label);
        otherTopics.addDocuments(unclusteredDocuments);
        otherTopics.setOtherTopics(true);

        return otherTopics;
    }

    /**
     * If there are unclustered documents, appends the "Other Topics" group to the
     * clusters.
     * 
     * @see #buildOtherTopics(List, List)
     */
    public static Cluster appendOtherTopics(List allDocuments,
        List clusters)
    {
        return appendOtherTopics(allDocuments, clusters, OTHER_TOPICS_LABEL);
    }

    /**
     * If there are unclustered documents, appends the "Other Topics" group to the
     * clusters.
     * 
     * @see #buildOtherTopics(List, List, String)
     */
    public static Cluster appendOtherTopics(List allDocuments,
        List clusters, String label)
    {
        final Cluster otherTopics = buildOtherTopics(allDocuments, clusters, label);
        if (!otherTopics.getDocuments().isEmpty())
        {
            clusters.add(otherTopics);
        }
        return otherTopics;
    }

    /**
     * An extremely dodgy method that remaps {@link Document} references 
     * inside this cluster. This operation is allowed only when the cluster has not been
     * assigned an ID yet (so theoretically before the {@link ProcessingResult} has been
     * published. While there are theoretically other ways to achieve the same result (copying
     * the entire set of clusters) this is the most memory and cpu efficient way.
     * 
     * Only documents from this cluster are remapped, subclusters need to be processed separately.
     */
    public void remapDocumentReferences(IdentityHashMap docMapping)
    {
        if (this.id != null) throw new IllegalStateException();
        for (int i = documents.size(); --i >= 0;) 
        {
            Document doc = documents.get(i);
            Document remapped = docMapping.get(doc);
            if (remapped != null) {
                documents.set(i, remapped);
            }
        }

        // Invalidate recursive flattened cache.
        this.allDocuments = null;
    }

    @Persist
    private void beforeSerialization()
    {
        documentIds = Lists.transform(documents, new Function()
        {
            public DocumentRefid apply(Document document)
            {
                return new DocumentRefid(document.getStringId());
            }
        });

        // Remove score from attributes for serialization
        otherAttributesForSerialization = MapUtils.asHashMap(SimpleXmlWrappers
            .wrap(attributes));
        otherAttributesForSerialization.remove(SCORE);
        if (otherAttributesForSerialization.isEmpty())
        {
            otherAttributesForSerialization = null;
        }
    }

    @Commit
    private void afterDeserialization() throws Exception
    {
        if (otherAttributesForSerialization != null)
        {
            attributes.putAll(SimpleXmlWrappers.unwrap(otherAttributesForSerialization));
        }

        phrasesView = Collections.unmodifiableList(phrases);
        subclustersView = Collections.unmodifiableList(subclusters);
        // Documents will be restored on the ProcessingResult level
    }

    /**
     * For JSON serialization only.
     */
    @JsonProperty("documents")
    private List getDocumentIds()
    {
        return Lists.transform(documents, DOCUMENT_TO_ID);
    }

    private static Function DOCUMENT_TO_ID = new Function()
    {
        @Override
        public String apply(Document doc)
        {
            return doc.getStringId();
        }
    };
    
    /**
     * For JSON and XML serialization only.
     */
    @JsonProperty("attributes")
    private Map getOtherAttributes()
    {
        final Map otherAttributes = Maps.newHashMap(attributesView);
        return otherAttributes.isEmpty() ? null : otherAttributes;
    }

    @Override
    public String toString()
    {
        return "[Cluster, label: " + getLabel() + ", docs: " + size() + ", subclusters: " + getSubclusters().size() + "]";
    }
}