All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.carrot2.text.preprocessing.Substring Maven / Gradle / Ivy


/*
 * Carrot2 project.
 *
 * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
 * All rights reserved.
 *
 * Refer to the full license file "carrot2.LICENSE"
 * in the root folder of the repository checkout or at:
 * http://www.carrot2.org/carrot2.LICENSE
 */

package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.IntIntHashMap;

/**
 * Represents a general substring. Contains information on the substring's boundaries and
 * absolute frequency.
 */
final class Substring
{
    /** The substring's unique id */
    public int id;

    /** Substring's start position */
    public int from;

    /** Substring's end position */
    public int to;

    /** Substring's absolute frequency */
    public int frequency;

    /** This substring's frequency across documents */
    public IntIntHashMap tfByDocument;

    /** Used to properly aggregate phrase frequencies */
    public int documentIndexToOffset = -1;

    public Substring(int id, int from, int to, int frequency)
    {
        this.id = id;
        this.from = from;
        this.to = to;
        this.frequency = frequency;
    }

    public boolean isEquivalentTo(Substring other, 
        int [] tokensWordIndex, int [] wordsStemIndex)
    {
        if ((other.to - other.from) != (to - from))
        {
            return false;
        }

        for (int i = 0; i < (to - from); i++)
        {
            if (wordsStemIndex[tokensWordIndex[other.from + i]] != 
                wordsStemIndex[tokensWordIndex[from + i]])
            {
                return false;
            }
        }

        return true;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy