All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.text.vsm.VectorSpaceModelContext Maven / Gradle / Ivy

Go to download

Carrot2 search results clustering framework. Minimal functional subset (core algorithms and infrastructure, no document sources).

There is a newer version: 3.16.3
Show newest version

/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.text.vsm;

import org.carrot2.mahout.math.matrix.DoubleMatrix2D;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.PreprocessingContext.AllLabels;
import org.carrot2.text.preprocessing.PreprocessingContext.AllStems;

import com.carrotsearch.hppc.IntIntHashMap;

/**
 * Stores data related to the Vector Space Model of the processed documents.
 */
public class VectorSpaceModelContext
{
    /** Preprocessing context for the underlying documents. */
    public final PreprocessingContext preprocessingContext;

    /**
     * Term-document matrix. Rows of the matrix correspond to word stems, columns
     * correspond to the processed documents. For mapping between rows of this matrix and
     * {@link AllStems}, see {@link #stemToRowIndex}.
     * 

* This matrix is produced by * {@link TermDocumentMatrixBuilder#buildTermDocumentMatrix(VectorSpaceModelContext)}. *

*/ public DoubleMatrix2D termDocumentMatrix; /** * Term-document-like matrix for phrases from {@link AllLabels}. If there are no * phrases in {@link AllLabels}, phrase matrix is null. For mapping * between rows of this matrix and {@link AllStems}, see {@link #stemToRowIndex}. *

* This matrix is produced by * {@link TermDocumentMatrixBuilder#buildTermPhraseMatrix(VectorSpaceModelContext)}. *

*/ public DoubleMatrix2D termPhraseMatrix; /** * Stem index to row index mapping for the tdMatrix. Keys in this map are * indices of entries in {@link AllStems} arrays, values are the indices of * tdMatrix rows corresponding to the stems. Please note that depending * on the limit on the size of the matrix, some stems may not have their corresponding * matrix rows. *

* This object is produced by * {@link TermDocumentMatrixBuilder#buildTermDocumentMatrix(VectorSpaceModelContext)}. *

*/ public IntIntHashMap stemToRowIndex; /** * Creates a vector space model context with the provided preprocessing context. */ public VectorSpaceModelContext(PreprocessingContext preprocessingContext) { this.preprocessingContext = preprocessingContext; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy